benchmarkIngestFfiEntrypoints static method

Future<IngestFfiEntrypointBenchResult> benchmarkIngestFfiEntrypoints({
  1. int targetBytes = 1 * 1024 * 1024,
  2. int embeddingDim = 384,
  3. int maxChunkChars = 1500,
  4. int overlapChars = 100,
  5. int batchSize = 16,
  6. String? restoreDbPath,
  7. String? dbPathOverride,
})

Measure FFI text-byte traffic across the three IngestSession entrypoints on a deterministic document: the canonical prepareSourceIngestion (Dart String), prepareSourceIngestionFromUtf8 (bytes), and prepareSourceIngestionFromFile (path-only). Stub embeddings keep the measurement focused on FFI byte traffic.

The body bytes never round-trip back through Dart on the file variant, so session_prepare_content_in_bytes should be 0 for it.

Implementation

static Future<IngestFfiEntrypointBenchResult>
    benchmarkIngestFfiEntrypoints({
  int targetBytes = 1 * 1024 * 1024,
  int embeddingDim = 384,
  int maxChunkChars = 1500,
  int overlapChars = 100,
  int batchSize = 16,
  String? restoreDbPath,
  String? dbPathOverride,
}) async {
  final content = _generateBenchDoc(targetBytes);
  // ASCII-only doc: String length == UTF-8 byte count.
  final docUtf8Bytes = content.length;

  final benchDbPath = dbPathOverride ??
      "${(await getApplicationDocumentsDirectory()).path}/ingest_ffi_entrypoints_bench.sqlite";
  final benchFile = File(benchDbPath);
  if (await benchFile.exists()) {
    await benchFile.delete();
  }
  final benchTextFile = File('$benchDbPath.txt');
  if (await benchTextFile.exists()) {
    await benchTextFile.delete();
  }
  await benchTextFile.writeAsString(content, flush: true);

  await initDbPool(dbPath: benchDbPath, maxSize: 4);
  await initDb();
  await source_rag.initSourceDb();

  final stubEmbedding = Float32List(embeddingDim);

  Future<ingest_metrics.IngestTrafficStats> runOne(
    Future<ingest_session.PreparedIngestion> Function() prepare,
    String collectionId,
  ) async {
    ingest_metrics.resetIngestTrafficStats();
    final prepared = await prepare();
    final session = prepared.session;
    if (session == null) {
      throw StateError(
        'benchmarkIngestFfiEntrypoints: prepare returned no session '
        '(state=${prepared.state}); benchmark requires a fresh collection.',
      );
    }
    try {
      var saved = 0;
      while (saved < prepared.totalChunks) {
        final batch = await session.takeEmbeddingBatch(batchSize: batchSize);
        if (batch.isEmpty) break;
        final embeddings = batch
            .map(
              (req) => ingest_session.ChunkEmbedding(
                chunkIndex: req.chunkIndex,
                embedding: stubEmbedding,
              ),
            )
            .toList(growable: false);
        saved += await session.commitEmbeddings(embeddings: embeddings);
      }
      await session.finalize();
    } finally {
      await session.dispose();
    }
    final stats = ingest_metrics.ingestTrafficStats();
    await source_rag.deleteSourceInCollection(
      collectionId: collectionId,
      sourceId: prepared.sourceId,
    );
    return stats;
  }

  final stringStats = await runOne(
    () => ingest_session.prepareSourceIngestion(
      collectionId: 'bench-string',
      content: content,
      metadata: null,
      name: 'bench-string',
      strategy: ingest_session.IngestStrategy.recursive,
      maxChars: maxChunkChars,
      overlapChars: overlapChars,
    ),
    'bench-string',
  );

  final utf8Bytes = Uint8List.fromList(content.codeUnits);
  final utf8Stats = await runOne(
    () => ingest_session.prepareSourceIngestionFromUtf8(
      collectionId: 'bench-utf8',
      contentBytes: utf8Bytes,
      metadata: null,
      name: 'bench-utf8',
      strategy: ingest_session.IngestStrategy.recursive,
      maxChars: maxChunkChars,
      overlapChars: overlapChars,
    ),
    'bench-utf8',
  );

  final fileStats = await runOne(
    () => ingest_session.prepareSourceIngestionFromFile(
      collectionId: 'bench-file',
      filePath: benchTextFile.path,
      metadata: null,
      name: 'bench-file',
      strategyHint: ingest_session.IngestStrategy.recursive,
      maxChars: maxChunkChars,
      overlapChars: overlapChars,
    ),
    'bench-file',
  );

  await closeDbPool();
  if (await benchFile.exists()) {
    await benchFile.delete();
  }
  if (await benchTextFile.exists()) {
    await benchTextFile.delete();
  }
  if (restoreDbPath != null) {
    await initDbPool(dbPath: restoreDbPath, maxSize: 4);
  }

  return IngestFfiEntrypointBenchResult(
    docBytes: docUtf8Bytes,
    stringPath: stringStats,
    utf8Path: utf8Stats,
    filePath: fileStats,
  );
}