addSourceWithChunking method
Add a source document with automatic chunking and embedding.
The document is:
- Split into chunks based on file type (auto-detected from
filePath) - Each chunk is embedded
- Source and chunks are stored in DB
If filePath is provided, chunking strategy is auto-detected:
.md,.markdown→ Markdown-aware chunking (preserves headers, code blocks)- Other files → Default recursive chunking
Implementation
Future<SourceAddResult> addSourceWithChunking(
String content, {
String? metadata,
String? name,
String? filePath,
ChunkingStrategy? strategy,
Duration? chunkDelay,
void Function(int done, int total)? onProgress,
}) async {
// 1. Determine model path for isolate
// Use stored modelPath if available, otherwise try to guess (fallback)
// NOTE: In production usage via RagEngine, modelPath should always be provided.
String effectiveModelPath = modelPath ?? '';
if (effectiveModelPath.isEmpty) {
final directory = await getApplicationDocumentsDirectory();
effectiveModelPath = '${directory.path}/bge-m3-quantized-int8.onnx';
debugPrint(
'[SourceRagService] Warning: modelPath not set, using fallback: $effectiveModelPath',
);
}
// Check if file exists to avoid isolate crash
if (!await File(effectiveModelPath).exists()) {
throw Exception(
"Embedding model file not found at: $effectiveModelPath. Ensure RagEngine is initialized or modelPath is correct.",
);
}
// 2. Prepare request
final effectiveStrategy =
strategy ??
(filePath != null &&
(filePath.endsWith('.md') || filePath.endsWith('.markdown'))
? ChunkingStrategy.markdown
: ChunkingStrategy.recursive);
// 3. Process in background isolate with progress reporting
debugPrint('[SourceRagService] Offloading processing to isolate...');
final receivePort = ReceivePort();
final completer = Completer<_ProcessingResult>();
// Add request with SendPort
final isolateRequest = _IsolateRequest(
content: content,
modelPath: effectiveModelPath,
maxChars: maxChunkChars,
overlapChars: overlapChars,
strategy: effectiveStrategy,
sendPort: receivePort.sendPort,
);
try {
await Isolate.spawn(_processContentInIsolate, isolateRequest);
receivePort.listen((message) {
if (message is List && message.length == 2 && message[0] is int) {
// Progress update: [done, total]
onProgress?.call(message[0] as int, message[1] as int);
} else if (message is _ProcessingResult) {
// Final result
receivePort.close();
completer.complete(message);
} else if (message is List &&
message.isNotEmpty &&
message[0] == 'error') {
// Error handling
receivePort.close();
completer.completeError(Exception(message[1]));
}
});
final result = await completer.future;
debugPrint(
'[SourceRagService] Isolate finished. Got ${result.chunks.length} chunks.',
);
final chunks = result.chunks;
// 4. Add source document to DB
final res = await rust_rag.addSource(
content: content,
metadata: metadata,
name: name ?? filePath, // Use available identifier
);
// 5. Save chunks to DB (if not duplicate)
if (!res.isDuplicate) {
await rust_rag.addChunks(sourceId: res.sourceId, chunks: chunks);
// Mark as completed
await rust_rag.updateSourceStatus(
sourceId: res.sourceId,
status: 'completed',
);
} else {
// Even if duplicate, ensure it's marked completed if it was stuck
await rust_rag.updateSourceStatus(
sourceId: res.sourceId,
status: 'completed',
);
}
// 6. Mark index dirty
await _markDirty();
return SourceAddResult(
sourceId: res.sourceId.toInt(),
isDuplicate: res.isDuplicate,
chunkCount: chunks.length,
message: res.message,
);
} catch (e) {
receivePort.close();
rethrow;
}
}