encodeWithUnstable method

(List<int>, Set<List<int>>) encodeWithUnstable(
  1. String text, {
  2. SpecialTokensSet allowedSpecial = const SpecialTokensSet.empty(),
  3. SpecialTokensSet disallowedSpecial = const SpecialTokensSet.all(),
})

Encodes a string into stable tokens and possible completion sequences.

Note that the stable tokens will only represent a substring of text.

See encode for more details on allowedSpecial and disallowedSpecial.

This API should itself be considered unstable.

final enc = Tiktoken.getEncoderForModel(OpenAiModel.gpt_4);
enc.encodeWithUnstable("hello fanta");

final text = "hello";
final result = enc.encodeWithUnstable(text)
final stableTokens = result.i1, completions = result.i2;
assert(text.encode().startswith(enc.decode_bytes(stable_tokens)))
assert all(enc.decode_bytes(stable_tokens + seq).startswith(text.encode()) for seq in completions)

Implementation

(List<int>, Set<List<int>>) encodeWithUnstable(
  String text, {
  SpecialTokensSet allowedSpecial = const SpecialTokensSet.empty(),
  SpecialTokensSet disallowedSpecial = const SpecialTokensSet.all(),
}) {
  final allowedSpecialSet =
      allowedSpecial.isAll ? specialTokensSet : allowedSpecial.set;

  final disallowedSpecialSet =
      disallowedSpecial.isAll
          ? specialTokensSet.difference(allowedSpecialSet)
          : disallowedSpecial.set;

  _verifyDisallowed(text, disallowedSpecialSet);

  var tuple = _coreBPE.encodeUnstableNative(text, allowedSpecialSet);
  return (tuple.i1, tuple.i2);
}