encodeNative method
Implementation
_Tuple2<Uint32List, int> encodeNative(
String text,
Set<String> allowedSpecial,
) {
final tokens = <int>[];
var start = 0;
var lastPieceTokenLen = 0;
while (true) {
Match? nextSpecial;
var startFind = start;
while (true) {
nextSpecial = specialRegex.firstMatch(text.substring(startFind));
if (nextSpecial == null) {
break;
}
if (allowedSpecial.contains(nextSpecial.group(0)!)) {
break;
}
startFind = start + nextSpecial.end;
}
final end = nextSpecial == null ? text.length : start + nextSpecial.start;
for (var mat in regex.allMatches(text.substring(start, end))) {
var piece = ByteArray.fromList(utf8.encode(mat.group(0)!));
if (encoder.containsKey(piece)) {
lastPieceTokenLen = 1;
tokens.add(encoder[piece]!);
continue;
}
var encoded = util.bytePairEncode(piece, encoder);
lastPieceTokenLen = encoded.length;
tokens.addAll(encoded);
}
if (nextSpecial != null) {
var piece = nextSpecial.group(0)!;
var token = specialTokensEncoder[piece]!;
tokens.add(token);
start = start + nextSpecial.end;
lastPieceTokenLen = 0;
} else {
break;
}
}
return _Tuple2(Uint32List.fromList(tokens), lastPieceTokenLen);
}