regenerateAllEmbeddings method

Future<void> regenerateAllEmbeddings({
  1. void onProgress(
    1. int done,
    2. int total
    )?,
})

Regenerate embeddings for all existing chunks using the current model. This is needed when the embedding model or tokenizer has been updated.

Implementation

Future<void> regenerateAllEmbeddings({
  void Function(int done, int total)? onProgress,
}) async {
  // 1. Get total stats for progress tracking
  final stats = await rust_rag.getSourceStatsInCollection(
    collectionId: collectionId,
  );
  final totalChunks = stats.chunkCount.toInt();

  debugPrint(
    '[regenerateAllEmbeddings] Found $totalChunks chunks to re-embed (Safe Batch Mode)',
  );

  // 2. Iterate by Source to ensure memory safety
  // Instead of loading all chunks (which could be huge), we process source by source
  final sources = await rust_rag.listSourcesInCollection(
    collectionId: collectionId,
  );
  int processedCount = 0;

  for (final source in sources) {
    final sourceId = source.id.toInt();
    final chunkCount = await rust_rag.getSourceChunkCount(
      sourceId: source.id,
    );

    // Process chunks for this source in batches
    const batchSize = 50;
    for (var offset = 0; offset < chunkCount; offset += batchSize) {
      // Fetch batch of chunks
      // We use getAdjacentChunks to fetch specific ranges safely
      // minIndex = offset, maxIndex = offset + batchSize - 1
      List<ChunkSearchResult> batch;
      try {
        batch = await rust_rag.getAdjacentChunks(
          sourceId: source.id,
          minIndex: offset,
          maxIndex: offset + batchSize - 1, // Inclusive
        );
      } catch (e) {
        debugPrint(
          '[regenerateAllEmbeddings] Failed to fetch batch for source $sourceId: $e',
        );
        continue;
      }

      // Re-embed and update each chunk
      for (final chunk in batch) {
        try {
          final embedding = await EmbeddingService.embed(chunk.content);
          await rust_rag.updateChunkEmbedding(
            chunkId: chunk.chunkId,
            embedding: Float32List.fromList(embedding),
          );
        } catch (e) {
          debugPrint(
            '[regenerateAllEmbeddings] Failed to update chunk ${chunk.chunkId}: $e',
          );
        }
      }

      processedCount += batch.length;
      onProgress?.call(processedCount, totalChunks);

      // Yield to event loop to prevent UI jank
      await Future.delayed(Duration.zero);
    }
  }

  debugPrint('[regenerateAllEmbeddings] Completed. Rebuilding HNSW index...');

  // 3. Rebuild HNSW index
  await rebuildIndex(force: true);

  debugPrint('[regenerateAllEmbeddings] Done!');
}