addSourceWithChunking method
Add a source document with automatic chunking and embedding.
The document is:
- Split into chunks based on
chunkConfig - Each chunk is embedded
- Source and chunks are stored in DB
Implementation
Future<SourceAddResult> addSourceWithChunking(
String content, {
String? metadata,
void Function(int done, int total)? onProgress,
}) async {
// 1. Add source document
final sourceResult = await addSource(
dbPath: dbPath,
content: content,
metadata: metadata,
);
if (sourceResult.isDuplicate) {
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: true,
chunkCount: 0,
message: sourceResult.message,
);
}
// 2. Split into semantic chunks using Rust (Unicode sentence/word boundaries)
final chunks = semanticChunkWithOverlap(
text: content,
maxChars: maxChunkChars,
overlapChars: overlapChars,
);
if (chunks.isEmpty) {
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: false,
chunkCount: 0,
message: 'No chunks created',
);
}
// 3. Generate embeddings for each chunk
final chunkDataList = <ChunkData>[];
for (var i = 0; i < chunks.length; i++) {
onProgress?.call(i, chunks.length);
final chunk = chunks[i];
final embedding = await EmbeddingService.embed(chunk.content);
chunkDataList.add(
ChunkData(
content: chunk.content,
chunkIndex: chunk.index,
startPos: chunk.startPos,
endPos: chunk.endPos,
chunkType: chunk.chunkType,
embedding: Float32List.fromList(embedding),
),
);
}
onProgress?.call(chunks.length, chunks.length);
// 4. Store chunks
await addChunks(
dbPath: dbPath,
sourceId: sourceResult.sourceId,
chunks: chunkDataList,
);
return SourceAddResult(
sourceId: sourceResult.sourceId.toInt(),
isDuplicate: false,
chunkCount: chunks.length,
message: 'Added ${chunks.length} chunks',
);
}