mergeSplits method

  1. @protected
  2. @visibleForTesting
List<String> mergeSplits(
  1. List<String> splits,
  2. String separator
)

Merges smaller pieces into medium size chunks to send to the LLM.

Implementation

@protected
@visibleForTesting
List<String> mergeSplits(
  final List<String> splits,
  final String separator,
) {
  final separatorLen = lengthFunction(separator);

  final docs = <String>[];
  var currentDoc = <String>[];
  var total = 0;

  for (final d in splits) {
    final len = lengthFunction(d);

    if (total + len + (currentDoc.isNotEmpty ? separatorLen : 0) >
        chunkSize) {
      if (total > chunkSize) {
        // TODO Log warning:
        // 'Created a chunk of size $total,
        // which is longer than the specified $chunkSize'
      }
      if (currentDoc.isNotEmpty) {
        final doc = _joinDocs(currentDoc, separator: separator);
        if (doc != null) {
          docs.add(doc);
        }
        // Keep on popping if:
        // - we have a larger chunk than in the chunk overlap
        // - or if we still have any chunks and the length is long
        while (total > chunkOverlap ||
            (total + len + (currentDoc.isNotEmpty ? separatorLen : 0) >
                    chunkSize &&
                total > 0)) {
          total -=
              lengthFunction(currentDoc[0]) +
              (currentDoc.length > 1 ? separatorLen : 0);
          currentDoc = currentDoc.sublist(1);
        }
      }
    }

    currentDoc.add(d);
    total += len + (currentDoc.length > 1 ? separatorLen : 0);
  }

  final doc = _joinDocs(currentDoc, separator: separator);
  if (doc != null) {
    docs.add(doc);
  }

  return docs;
}