encodeNative method

_Tuple2<Uint32List, int> encodeNative(
  1. String text,
  2. Set<String> allowedSpecial
)

Implementation

_Tuple2<Uint32List, int> encodeNative(
  String text,
  Set<String> allowedSpecial,
) {
  final tokens = <int>[];

  var start = 0;
  var lastPieceTokenLen = 0;

  while (true) {
    Match? nextSpecial;
    var startFind = start;
    while (true) {
      nextSpecial = specialRegex.firstMatch(text.substring(startFind));
      if (nextSpecial == null) {
        break;
      }

      if (allowedSpecial.contains(nextSpecial.group(0)!)) {
        break;
      }
      startFind = start + nextSpecial.end;
    }

    final end = nextSpecial == null ? text.length : start + nextSpecial.start;

    for (var mat in regex.allMatches(text.substring(start, end))) {
      var piece = ByteArray.fromList(utf8.encode(mat.group(0)!));
      if (encoder.containsKey(piece)) {
        lastPieceTokenLen = 1;
        tokens.add(encoder[piece]!);
        continue;
      }

      var encoded = util.bytePairEncode(piece, encoder);
      lastPieceTokenLen = encoded.length;
      tokens.addAll(encoded);
    }

    if (nextSpecial != null) {
      var piece = nextSpecial.group(0)!;
      var token = specialTokensEncoder[piece]!;
      tokens.add(token);
      start = start + nextSpecial.end;
      lastPieceTokenLen = 0;
    } else {
      break;
    }
  }

  return _Tuple2(Uint32List.fromList(tokens), lastPieceTokenLen);
}