regenerateAllEmbeddings method
Regenerate embeddings for all existing chunks using the current model. This is needed when the embedding model or tokenizer has been updated.
Implementation
Future<void> regenerateAllEmbeddings({
void Function(int done, int total)? onProgress,
}) async {
// 1. Get total stats for progress tracking
final stats = await rust_rag.getSourceStatsInCollection(
collectionId: collectionId,
);
final totalChunks = stats.chunkCount.toInt();
debugPrint(
'[regenerateAllEmbeddings] Found $totalChunks chunks to re-embed (Safe Batch Mode)',
);
// 2. Iterate by Source to ensure memory safety
// Instead of loading all chunks (which could be huge), we process source by source
final sources = await rust_rag.listSourcesInCollection(
collectionId: collectionId,
);
int processedCount = 0;
for (final source in sources) {
final sourceId = source.id.toInt();
final chunkCount = await rust_rag.getSourceChunkCount(
sourceId: source.id,
);
// Process chunks for this source in batches
const batchSize = 50;
for (var offset = 0; offset < chunkCount; offset += batchSize) {
// Fetch batch of chunks
// We use getAdjacentChunks to fetch specific ranges safely
// minIndex = offset, maxIndex = offset + batchSize - 1
List<ChunkSearchResult> batch;
try {
batch = await rust_rag.getAdjacentChunks(
sourceId: source.id,
minIndex: offset,
maxIndex: offset + batchSize - 1, // Inclusive
);
} catch (e) {
debugPrint(
'[regenerateAllEmbeddings] Failed to fetch batch for source $sourceId: $e',
);
continue;
}
// Re-embed and update each chunk
for (final chunk in batch) {
try {
final embedding = await EmbeddingService.embed(chunk.content);
await rust_rag.updateChunkEmbedding(
chunkId: chunk.chunkId,
embedding: Float32List.fromList(embedding),
);
} catch (e) {
debugPrint(
'[regenerateAllEmbeddings] Failed to update chunk ${chunk.chunkId}: $e',
);
}
}
processedCount += batch.length;
onProgress?.call(processedCount, totalChunks);
// Yield to event loop to prevent UI jank
await Future.delayed(Duration.zero);
}
}
debugPrint('[regenerateAllEmbeddings] Completed. Rebuilding HNSW index...');
// 3. Rebuild HNSW index
await rebuildIndex(force: true);
debugPrint('[regenerateAllEmbeddings] Done!');
}