addSourceWithChunking method
Add a source document with automatic chunking and embedding.
The document is:
- Split into chunks based on file type (auto-detected from
filePath) - Each chunk is embedded
- Source and chunks are stored in DB
If filePath is provided, chunking strategy is auto-detected:
.md,.markdown→ Markdown-aware chunking (preserves headers, code blocks)- Other files → Default recursive chunking
Implementation
Future<SourceAddResult> addSourceWithChunking(
String content, {
String? metadata,
String? filePath,
ChunkingStrategy? strategy,
void Function(int done, int total)? onProgress,
}) async {
// 1. Add source document
final sourceResult = await addSource(
dbPath: dbPath,
content: content,
metadata: metadata,
);
if (sourceResult.isDuplicate) {
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: true,
chunkCount: 0,
message: sourceResult.message,
);
}
// 2. Determine chunking strategy
final effectiveStrategy = strategy ?? detectChunkingStrategy(filePath);
// 3. Split content based on strategy
final chunkDataList = <ChunkData>[];
if (effectiveStrategy == ChunkingStrategy.markdown) {
// Markdown-aware chunking
final chunks = markdownChunk(text: content, maxChars: maxChunkChars);
for (var i = 0; i < chunks.length; i++) {
onProgress?.call(i, chunks.length);
final chunk = chunks[i];
final embedding = await EmbeddingService.embed(chunk.content);
// Include header path in chunk type for context
final enrichedType = chunk.headerPath.isNotEmpty
? '${chunk.chunkType}|${chunk.headerPath}'
: chunk.chunkType;
chunkDataList.add(
ChunkData(
content: chunk.content,
chunkIndex: chunk.index,
startPos: chunk.startPos,
endPos: chunk.endPos,
chunkType: enrichedType,
embedding: Float32List.fromList(embedding),
),
);
}
onProgress?.call(chunks.length, chunks.length);
} else {
// Default recursive chunking
final chunks = semanticChunkWithOverlap(
text: content,
maxChars: maxChunkChars,
overlapChars: overlapChars,
);
for (var i = 0; i < chunks.length; i++) {
onProgress?.call(i, chunks.length);
final chunk = chunks[i];
final embedding = await EmbeddingService.embed(chunk.content);
chunkDataList.add(
ChunkData(
content: chunk.content,
chunkIndex: chunk.index,
startPos: chunk.startPos,
endPos: chunk.endPos,
chunkType: chunk.chunkType,
embedding: Float32List.fromList(embedding),
),
);
}
onProgress?.call(chunks.length, chunks.length);
}
if (chunkDataList.isEmpty) {
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: false,
chunkCount: 0,
message: 'No chunks created',
);
}
// 4. Store chunks
await addChunks(
dbPath: dbPath,
sourceId: sourceResult.sourceId,
chunks: chunkDataList,
);
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: false,
chunkCount: chunkDataList.length,
message:
'Added ${chunkDataList.length} chunks (${effectiveStrategy.name})',
);
}