addSourceWithChunking method

Future<SourceAddResult> addSourceWithChunking(
  1. String content, {
  2. String? metadata,
  3. void onProgress(
    1. int done,
    2. int total
    )?,
})

Add a source document with automatic chunking and embedding.

The document is:

  1. Split into chunks based on chunkConfig
  2. Each chunk is embedded
  3. Source and chunks are stored in DB

Implementation

Future<SourceAddResult> addSourceWithChunking(
  String content, {
  String? metadata,
  void Function(int done, int total)? onProgress,
}) async {
  // 1. Add source document
  final sourceResult = await addSource(
    dbPath: dbPath,
    content: content,
    metadata: metadata,
  );

  if (sourceResult.isDuplicate) {
    return SourceAddResult(
      sourceId: sourceResult.sourceId.toInt(),
      isDuplicate: true,
      chunkCount: 0,
      message: sourceResult.message,
    );
  }

  // 2. Split into semantic chunks using Rust (Unicode sentence/word boundaries)
  final chunks = semanticChunkWithOverlap(
    text: content,
    maxChars: maxChunkChars,
    overlapChars: overlapChars,
  );

  if (chunks.isEmpty) {
    return SourceAddResult(
      sourceId: sourceResult.sourceId.toInt(),
      isDuplicate: false,
      chunkCount: 0,
      message: 'No chunks created',
    );
  }

  // 3. Generate embeddings for each chunk
  final chunkDataList = <ChunkData>[];

  for (var i = 0; i < chunks.length; i++) {
    onProgress?.call(i, chunks.length);

    final chunk = chunks[i];
    final embedding = await EmbeddingService.embed(chunk.content);

    chunkDataList.add(
      ChunkData(
        content: chunk.content,
        chunkIndex: chunk.index,
        startPos: chunk.startPos,
        endPos: chunk.endPos,
        chunkType: chunk.chunkType,
        embedding: Float32List.fromList(embedding),
      ),
    );
  }

  onProgress?.call(chunks.length, chunks.length);

  // 4. Store chunks
  await addChunks(
    dbPath: dbPath,
    sourceId: sourceResult.sourceId,
    chunks: chunkDataList,
  );

  return SourceAddResult(
    sourceId: sourceResult.sourceId.toInt(),
    isDuplicate: false,
    chunkCount: chunks.length,
    message: 'Added ${chunks.length} chunks',
  );
}