tokenize method

List<String> tokenize(
  1. String text
)

Tokenize text into a list of token strings using BPE.

Implementation

List<String> tokenize(String text) {
  if (text.isEmpty) return [];

  final result = <String>[];

  // Split on special tokens first
  final parts = _splitOnSpecialTokens(text);

  for (final part in parts) {
    if (specialTokens.contains(part) || vocab.containsKey(part)) {
      result.add(part);
    } else {
      // Apply BPE to regular text
      final bpeTokens = _applyBpe(part);
      result.addAll(bpeTokens);
    }
  }

  return result;
}