addSourceWithChunking method

Future<SourceAddResult> addSourceWithChunking(
  1. String content, {
  2. String? metadata,
  3. String? filePath,
  4. ChunkingStrategy? strategy,
  5. void onProgress(
    1. int done,
    2. int total
    )?,
})

Add a source document with automatic chunking and embedding.

The document is:

  1. Split into chunks based on file type (auto-detected from filePath)
  2. Each chunk is embedded
  3. Source and chunks are stored in DB

If filePath is provided, chunking strategy is auto-detected:

  • .md, .markdown → Markdown-aware chunking (preserves headers, code blocks)
  • Other files → Default recursive chunking

Implementation

Future<SourceAddResult> addSourceWithChunking(
  String content, {
  String? metadata,
  String? filePath,
  ChunkingStrategy? strategy,
  void Function(int done, int total)? onProgress,
}) async {
  // 1. Add source document
  final sourceResult = await addSource(
    dbPath: dbPath,
    content: content,
    metadata: metadata,
  );

  if (sourceResult.isDuplicate) {
    return SourceAddResult(
      sourceId: sourceResult.sourceId.toInt(),
      isDuplicate: true,
      chunkCount: 0,
      message: sourceResult.message,
    );
  }

  // 2. Determine chunking strategy
  final effectiveStrategy = strategy ?? detectChunkingStrategy(filePath);

  // 3. Split content based on strategy
  final chunkDataList = <ChunkData>[];

  if (effectiveStrategy == ChunkingStrategy.markdown) {
    // Markdown-aware chunking
    final chunks = markdownChunk(text: content, maxChars: maxChunkChars);

    for (var i = 0; i < chunks.length; i++) {
      onProgress?.call(i, chunks.length);
      final chunk = chunks[i];
      final embedding = await EmbeddingService.embed(chunk.content);

      // Include header path in chunk type for context
      final enrichedType = chunk.headerPath.isNotEmpty
          ? '${chunk.chunkType}|${chunk.headerPath}'
          : chunk.chunkType;

      chunkDataList.add(
        ChunkData(
          content: chunk.content,
          chunkIndex: chunk.index,
          startPos: chunk.startPos,
          endPos: chunk.endPos,
          chunkType: enrichedType,
          embedding: Float32List.fromList(embedding),
        ),
      );
    }
    onProgress?.call(chunks.length, chunks.length);
  } else {
    // Default recursive chunking
    final chunks = semanticChunkWithOverlap(
      text: content,
      maxChars: maxChunkChars,
      overlapChars: overlapChars,
    );

    for (var i = 0; i < chunks.length; i++) {
      onProgress?.call(i, chunks.length);
      final chunk = chunks[i];
      final embedding = await EmbeddingService.embed(chunk.content);

      chunkDataList.add(
        ChunkData(
          content: chunk.content,
          chunkIndex: chunk.index,
          startPos: chunk.startPos,
          endPos: chunk.endPos,
          chunkType: chunk.chunkType,
          embedding: Float32List.fromList(embedding),
        ),
      );
    }
    onProgress?.call(chunks.length, chunks.length);
  }

  if (chunkDataList.isEmpty) {
    return SourceAddResult(
      sourceId: sourceResult.sourceId.toInt(),
      isDuplicate: false,
      chunkCount: 0,
      message: 'No chunks created',
    );
  }

  // 4. Store chunks
  await addChunks(
    dbPath: dbPath,
    sourceId: sourceResult.sourceId,
    chunks: chunkDataList,
  );

  return SourceAddResult(
    sourceId: sourceResult.sourceId.toInt(),
    isDuplicate: false,
    chunkCount: chunkDataList.length,
    message:
        'Added ${chunkDataList.length} chunks (${effectiveStrategy.name})',
  );
}