json2token static method
Convert a JSON object to a Donut token sequence.
This converts structured JSON into the special token format used by Donut for sequence-to-sequence training/inference.
Rules:
- dict →
<s_{key}>{value}</s_{key}>for each key-value pair - list → elements joined by
<sep/> - string/number → literal text
Example:
json2token({'menu': {'nm': 'Latte', 'price': '5.0'}})
// → '<s_menu><s_nm>Latte</s_nm><s_price>5.0</s_price></s_menu>'
Implementation
static String json2token(dynamic obj,
{bool updateSpecialTokens = true, DonutTokenizer? tokenizer}) {
if (obj is Map<String, dynamic>) {
final buf = StringBuffer();
for (final entry in obj.entries) {
buf.write('<s_${entry.key}>');
buf.write(json2token(
entry.value,
updateSpecialTokens: updateSpecialTokens,
tokenizer: tokenizer,
));
buf.write('</s_${entry.key}>');
if (updateSpecialTokens && tokenizer != null) {
tokenizer.addSpecialTokens([
'<s_${entry.key}>',
'</s_${entry.key}>',
]);
}
}
return buf.toString();
} else if (obj is List) {
final parts = obj.map((e) => json2token(
e,
updateSpecialTokens: updateSpecialTokens,
tokenizer: tokenizer,
));
if (updateSpecialTokens && tokenizer != null) {
tokenizer.addSpecialTokens(['<sep/>']);
}
return parts.join('<sep/>');
} else {
return obj.toString();
}
}