splitByTokens method
Splits text into chunks of approximately chunkSize tokens.
If overlap is provided, each chunk overlaps with the previous one by
that many tokens.
Implementation
List<String> splitByTokens(String text, int chunkSize, {int overlap = 0}) {
if (text.isEmpty) return [];
final chunks = <String>[];
var start = 0;
while (start < text.length) {
// Find end position for this chunk.
var end = text.length;
while (_encoder.count(text.substring(start, end)) > chunkSize &&
end > start + 1) {
end =
start +
((end - start) *
chunkSize /
_encoder.count(text.substring(start, end)))
.floor();
if (end <= start) end = start + 1;
}
chunks.add(text.substring(start, end));
if (end >= text.length) break;
// Move start back by overlap.
if (overlap > 0 && chunks.length > 1) {
final overlapChars = (overlap * 4).clamp(0, end - start);
start = end - overlapChars;
} else {
start = end;
}
}
return chunks;
}