mergeSplits method
Merges smaller pieces into medium size chunks to send to the LLM.
Implementation
@protected
@visibleForTesting
List<String> mergeSplits(
final List<String> splits,
final String separator,
) {
final separatorLen = lengthFunction(separator);
final docs = <String>[];
var currentDoc = <String>[];
var total = 0;
for (final d in splits) {
final len = lengthFunction(d);
if (total + len + (currentDoc.isNotEmpty ? separatorLen : 0) >
chunkSize) {
if (total > chunkSize) {
// TODO Log warning:
// 'Created a chunk of size $total,
// which is longer than the specified $chunkSize'
}
if (currentDoc.isNotEmpty) {
final doc = _joinDocs(currentDoc, separator: separator);
if (doc != null) {
docs.add(doc);
}
// Keep on popping if:
// - we have a larger chunk than in the chunk overlap
// - or if we still have any chunks and the length is long
while (total > chunkOverlap ||
(total + len + (currentDoc.isNotEmpty ? separatorLen : 0) >
chunkSize &&
total > 0)) {
total -=
lengthFunction(currentDoc[0]) +
(currentDoc.length > 1 ? separatorLen : 0);
currentDoc = currentDoc.sublist(1);
}
}
}
currentDoc.add(d);
total += len + (currentDoc.length > 1 ? separatorLen : 0);
}
final doc = _joinDocs(currentDoc, separator: separator);
if (doc != null) {
docs.add(doc);
}
return docs;
}