encode method
Encode text to a list of token IDs.
text: input string to tokenize
addSpecialTokens: if true, wraps with BOS/EOS tokens
maxLength: optional maximum sequence length (truncate if exceeded)
Implementation
List<int> encode(
String text, {
bool addSpecialTokens = true,
int? maxLength,
}) {
final tokens = tokenize(text);
var ids = tokens.map((t) => vocab[t] ?? unkTokenId).toList();
if (addSpecialTokens) {
ids = [bosTokenId, ...ids, eosTokenId];
}
if (maxLength != null && ids.length > maxLength) {
ids = ids.sublist(0, maxLength);
}
return ids;
}