tokenize method
Tokenize text into a list of token strings using BPE.
Implementation
List<String> tokenize(String text) {
if (text.isEmpty) return [];
final result = <String>[];
// Split on special tokens first
final parts = _splitOnSpecialTokens(text);
for (final part in parts) {
if (specialTokens.contains(part) || vocab.containsKey(part)) {
result.add(part);
} else {
// Apply BPE to regular text
final bpeTokens = _applyBpe(part);
result.addAll(bpeTokens);
}
}
return result;
}