json2token static method

String json2token(
  1. dynamic obj, {
  2. bool updateSpecialTokens = true,
  3. DonutTokenizer? tokenizer,
})

Convert a JSON object to a Donut token sequence.

This converts structured JSON into the special token format used by Donut for sequence-to-sequence training/inference.

Rules:

  • dict → <s_{key}>{value}</s_{key}> for each key-value pair
  • list → elements joined by <sep/>
  • string/number → literal text

Example:

json2token({'menu': {'nm': 'Latte', 'price': '5.0'}})
// → '<s_menu><s_nm>Latte</s_nm><s_price>5.0</s_price></s_menu>'

Implementation

static String json2token(dynamic obj,
    {bool updateSpecialTokens = true, DonutTokenizer? tokenizer}) {
  if (obj is Map<String, dynamic>) {
    final buf = StringBuffer();
    for (final entry in obj.entries) {
      buf.write('<s_${entry.key}>');
      buf.write(json2token(
        entry.value,
        updateSpecialTokens: updateSpecialTokens,
        tokenizer: tokenizer,
      ));
      buf.write('</s_${entry.key}>');

      if (updateSpecialTokens && tokenizer != null) {
        tokenizer.addSpecialTokens([
          '<s_${entry.key}>',
          '</s_${entry.key}>',
        ]);
      }
    }
    return buf.toString();
  } else if (obj is List) {
    final parts = obj.map((e) => json2token(
          e,
          updateSpecialTokens: updateSpecialTokens,
          tokenizer: tokenizer,
        ));
    if (updateSpecialTokens && tokenizer != null) {
      tokenizer.addSpecialTokens(['<sep/>']);
    }
    return parts.join('<sep/>');
  } else {
    return obj.toString();
  }
}