benchmarkIngestFfiEntrypoints static method
Measure FFI text-byte traffic across the three IngestSession entrypoints
on a deterministic document: the canonical prepareSourceIngestion
(Dart String), prepareSourceIngestionFromUtf8 (bytes), and
prepareSourceIngestionFromFile (path-only). Stub embeddings keep the
measurement focused on FFI byte traffic.
The body bytes never round-trip back through Dart on the file variant,
so session_prepare_content_in_bytes should be 0 for it.
Implementation
static Future<IngestFfiEntrypointBenchResult>
benchmarkIngestFfiEntrypoints({
int targetBytes = 1 * 1024 * 1024,
int embeddingDim = 384,
int maxChunkChars = 1500,
int overlapChars = 100,
int batchSize = 16,
String? restoreDbPath,
String? dbPathOverride,
}) async {
final content = _generateBenchDoc(targetBytes);
// ASCII-only doc: String length == UTF-8 byte count.
final docUtf8Bytes = content.length;
final benchDbPath = dbPathOverride ??
"${(await getApplicationDocumentsDirectory()).path}/ingest_ffi_entrypoints_bench.sqlite";
final benchFile = File(benchDbPath);
if (await benchFile.exists()) {
await benchFile.delete();
}
final benchTextFile = File('$benchDbPath.txt');
if (await benchTextFile.exists()) {
await benchTextFile.delete();
}
await benchTextFile.writeAsString(content, flush: true);
await initDbPool(dbPath: benchDbPath, maxSize: 4);
await initDb();
await source_rag.initSourceDb();
final stubEmbedding = Float32List(embeddingDim);
Future<ingest_metrics.IngestTrafficStats> runOne(
Future<ingest_session.PreparedIngestion> Function() prepare,
String collectionId,
) async {
ingest_metrics.resetIngestTrafficStats();
final prepared = await prepare();
final session = prepared.session;
if (session == null) {
throw StateError(
'benchmarkIngestFfiEntrypoints: prepare returned no session '
'(state=${prepared.state}); benchmark requires a fresh collection.',
);
}
try {
var saved = 0;
while (saved < prepared.totalChunks) {
final batch = await session.takeEmbeddingBatch(batchSize: batchSize);
if (batch.isEmpty) break;
final embeddings = batch
.map(
(req) => ingest_session.ChunkEmbedding(
chunkIndex: req.chunkIndex,
embedding: stubEmbedding,
),
)
.toList(growable: false);
saved += await session.commitEmbeddings(embeddings: embeddings);
}
await session.finalize();
} finally {
await session.dispose();
}
final stats = ingest_metrics.ingestTrafficStats();
await source_rag.deleteSourceInCollection(
collectionId: collectionId,
sourceId: prepared.sourceId,
);
return stats;
}
final stringStats = await runOne(
() => ingest_session.prepareSourceIngestion(
collectionId: 'bench-string',
content: content,
metadata: null,
name: 'bench-string',
strategy: ingest_session.IngestStrategy.recursive,
maxChars: maxChunkChars,
overlapChars: overlapChars,
),
'bench-string',
);
final utf8Bytes = Uint8List.fromList(content.codeUnits);
final utf8Stats = await runOne(
() => ingest_session.prepareSourceIngestionFromUtf8(
collectionId: 'bench-utf8',
contentBytes: utf8Bytes,
metadata: null,
name: 'bench-utf8',
strategy: ingest_session.IngestStrategy.recursive,
maxChars: maxChunkChars,
overlapChars: overlapChars,
),
'bench-utf8',
);
final fileStats = await runOne(
() => ingest_session.prepareSourceIngestionFromFile(
collectionId: 'bench-file',
filePath: benchTextFile.path,
metadata: null,
name: 'bench-file',
strategyHint: ingest_session.IngestStrategy.recursive,
maxChars: maxChunkChars,
overlapChars: overlapChars,
),
'bench-file',
);
await closeDbPool();
if (await benchFile.exists()) {
await benchFile.delete();
}
if (await benchTextFile.exists()) {
await benchTextFile.delete();
}
if (restoreDbPath != null) {
await initDbPool(dbPath: restoreDbPath, maxSize: 4);
}
return IngestFfiEntrypointBenchResult(
docBytes: docUtf8Bytes,
stringPath: stringStats,
utf8Path: utf8Stats,
filePath: fileStats,
);
}