regenerateAllEmbeddings method

Future<void> regenerateAllEmbeddings({
  1. void onProgress(
    1. int done,
    2. int total
    )?,
})

Regenerate embeddings for all existing chunks using the current model. This is needed when the embedding model or tokenizer has been updated.

Implementation

Future<void> regenerateAllEmbeddings({
  void Function(int done, int total)? onProgress,
}) async {
  // 1. Get all chunk IDs and contents
  final chunks = await getAllChunkIdsAndContents(dbPath: dbPath);
  print(
    '[regenerateAllEmbeddings] Found ${chunks.length} chunks to re-embed',
  );

  // 2. Re-embed each chunk
  for (var i = 0; i < chunks.length; i++) {
    final chunk = chunks[i];
    final embedding = await EmbeddingService.embed(chunk.content);

    // 3. Update in DB
    await updateChunkEmbedding(
      dbPath: dbPath,
      chunkId: chunk.chunkId,
      embedding: Float32List.fromList(embedding),
    );

    onProgress?.call(i + 1, chunks.length);

    // Log progress every 50 chunks
    if ((i + 1) % 50 == 0) {
      print('[regenerateAllEmbeddings] Progress: ${i + 1}/${chunks.length}');
    }
  }

  print('[regenerateAllEmbeddings] Completed. Rebuilding HNSW index...');

  // 4. Rebuild HNSW index
  await rebuildIndex();

  print('[regenerateAllEmbeddings] Done!');
}