compressWithSimilarity static method
Future<CompressedContext>
compressWithSimilarity({
- required List<
ChunkSearchResult> chunks, - required List<
double> queryEmbedding, - CompressionLevel level = CompressionLevel.balanced,
- int maxSentences = 15,
- double minSimilarity = 0.2,
- String language = 'ko',
Compress with Phase 2 similarity-based selection.
Combines Phase 1 (rule-based) with Phase 2 (similarity-based):
- Split text into sentences
- Score each sentence by query similarity
- Select top-K sentences
- Apply Phase 1 compression (duplicates, stopwords)
chunks - Search result chunks to compress.
queryEmbedding - Pre-computed query embedding.
level - Compression level for Phase 1 post-processing.
maxSentences - Maximum sentences to keep after selection.
minSimilarity - Minimum similarity threshold.
Implementation
static Future<CompressedContext> compressWithSimilarity({
required List<ChunkSearchResult> chunks,
required List<double> queryEmbedding,
CompressionLevel level = CompressionLevel.balanced,
int maxSentences = 15,
double minSimilarity = 0.2,
String language = 'ko',
}) async {
if (chunks.isEmpty || queryEmbedding.isEmpty) {
return const CompressedContext(
text: '',
originalChars: 0,
compressedChars: 0,
ratio: 1.0,
estimatedTokensSaved: 0,
includedChunks: [],
);
}
// Combine all chunk content
final originalText = chunks.map((c) => c.content).join('\n\n');
final originalChars = originalText.length;
// Step 1: Split into sentences
final sentences = await rust.splitSentences(text: originalText);
// Step 2: Score and select relevant sentences
final scored = await scoreSentences(
sentences: sentences,
queryEmbedding: queryEmbedding,
topK: maxSentences,
minSimilarity: minSimilarity,
);
// Step 3: Reconstruct text from selected sentences (preserve original order)
final selectedSentences = scored.toList()
..sort((a, b) => a.index.compareTo(b.index));
final selectedText = selectedSentences.map((s) => s.sentence).join(' ');
// Step 4: Apply Phase 1 compression on selected text
final options = rust.CompressionOptions(
removeStopwords: false, // Disabled - damages context
removeDuplicates: true,
language: language,
level: level.index,
);
final result = await rust.compressText(
text: selectedText,
maxChars: 0, // No char limit after similarity selection
options: options,
);
// Calculate token savings
final originalTokens = (originalChars / 4).ceil();
final compressedTokens = (result.compressedChars / 4).ceil();
final tokensSaved = originalTokens - compressedTokens;
return CompressedContext(
text: result.text,
originalChars: originalChars,
compressedChars: result.compressedChars,
ratio: result.ratio,
estimatedTokensSaved: tokensSaved,
includedChunks: chunks,
);
}