benchmarkQueryPayload static method
Measure current query/search payload shape without changing runtime behavior.
The benchmark seeds a deterministic collection with precomputed stub embeddings, then compares:
- legacy
searchHybridfull-result payload, SearchHandle+ full hydration,SearchHandle+ preview excerpts,SearchHandle+ context-only assembly.
This intentionally measures string bytes surfaced to Dart, not internal Rust allocations. It is the pre-optimization baseline for moving the query path toward body-less retrieval.
The result also includes native lower-bound counters for content rows read by legacy hybrid materialization and shared SearchHandle hydration.
Implementation
static Future<QueryPayloadBenchResult> benchmarkQueryPayload({
int topK = 4,
int adjacentChunks = 1,
int previewMaxBytes = 24,
int tokenBudget = 4000,
String? restoreDbPath,
String? dbPathOverride,
}) async {
const collectionId = 'bench-query-payload';
const queryText = 'install checksum smoke';
final queryEmbedding = Float32List.fromList([1.0, 0.0, 0.0, 0.0]);
final benchDbPath =
dbPathOverride ??
"${(await getApplicationDocumentsDirectory()).path}/query_payload_bench.sqlite";
final benchFile = File(benchDbPath);
if (await benchFile.exists()) {
await benchFile.delete();
}
final tokenizerFile = File('$benchDbPath.tokenizer.json');
if (await tokenizerFile.exists()) {
await tokenizerFile.delete();
}
await initDbPool(dbPath: benchDbPath, maxSize: 4);
await initDb();
await source_rag.initSourceDb();
await tokenizerFile.writeAsString(
'{"version":"1.0","truncation":null,"padding":null,'
'"added_tokens":[],"normalizer":null,'
'"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,'
'"decoder":null,"model":{"type":"WordLevel",'
'"vocab":{"[UNK]":0,"install":1,"checksum":2,"smoke":3},'
'"unk_token":"[UNK]"}}',
flush: true,
);
await initTokenizer(tokenizerPath: tokenizerFile.path);
Future<void> seedSource({
required String name,
required String? metadata,
required List<String> chunks,
required List<Float32List> embeddings,
}) async {
final source = await source_rag.addSourceInCollection(
collectionId: collectionId,
content: chunks.join('\n'),
metadata: metadata,
name: name,
);
await source_rag.updateSourceStatus(
sourceId: source.sourceId,
status: 'completed',
);
var cursor = 0;
final chunkData = <source_rag.ChunkData>[];
for (var i = 0; i < chunks.length; i++) {
final content = chunks[i];
chunkData.add(
source_rag.ChunkData(
content: content,
chunkIndex: i,
startPos: cursor,
endPos: cursor + content.length,
chunkType: i == 0 && name == 'guide'
? 'text|Guide > Setup'
: 'general',
embedding: embeddings[i],
),
);
cursor += content.length + 1;
}
await source_rag.addChunks(sourceId: source.sourceId, chunks: chunkData);
}
await seedSource(
name: 'guide',
metadata: '{"source":"guide"}',
chunks: [
'Install the package from the signed release archive before running the app.',
'Verify the checksum and compare it with the release manifest.',
'Configure the local model path after the package is installed.',
],
embeddings: [
Float32List.fromList([1.0, 0.0, 0.0, 0.0]),
Float32List.fromList([0.95, 0.05, 0.0, 0.0]),
Float32List.fromList([0.75, 0.15, 0.0, 0.0]),
],
);
await seedSource(
name: 'smoke',
metadata: '{"source":"smoke"}',
chunks: [
'Run the smoke test after installation and inspect the first query.',
'Record the startup timing and search payload size for regression checks.',
'Archive the debug log only when the test reports a failure.',
],
embeddings: [
Float32List.fromList([0.82, 0.18, 0.0, 0.0]),
Float32List.fromList([0.68, 0.32, 0.0, 0.0]),
Float32List.fromList([0.2, 0.8, 0.0, 0.0]),
],
);
await source_rag.rebuildChunkHnswIndexForCollection(
collectionId: collectionId,
);
await source_rag.rebuildChunkBm25IndexForCollection(
collectionId: collectionId,
);
QueryPayloadVariantStats variantStats({
required String label,
required double elapsedMs,
required int hitCount,
int metaBytes = 0,
int contextBytes = 0,
int fullChunkBytes = 0,
int previewBytes = 0,
query_metrics.QueryContentReadStats? nativeReadStats,
}) {
return _variantStatsFromNative(
label: label,
elapsedMs: elapsedMs,
hitCount: hitCount,
metaBytes: metaBytes,
contextBytes: contextBytes,
fullChunkBytes: fullChunkBytes,
previewBytes: previewBytes,
nativeReadStats: nativeReadStats,
);
}
Future<QueryPayloadVariantStats> runLegacy() async {
query_metrics.resetQueryContentReadStats();
final sw = Stopwatch()..start();
final results = await searchHybrid(
queryText: queryText,
queryEmbedding: queryEmbedding,
topK: topK,
config: const RrfConfig(k: 60, vectorWeight: 1.0, bm25Weight: 0.0),
filter: const SearchFilter(collectionId: collectionId),
);
sw.stop();
final nativeReadStats = query_metrics.takeQueryContentReadStats();
final fullChunkBytes = results.fold<int>(
0,
(sum, result) =>
sum +
_utf8ByteLength(result.content) +
_utf8ByteLength(result.metadata),
);
return variantStats(
label: 'legacy searchHybrid',
elapsedMs: sw.elapsedMicroseconds / 1000.0,
hitCount: results.length,
fullChunkBytes: fullChunkBytes,
nativeReadStats: nativeReadStats,
);
}
Future<QueryPayloadVariantStats> runHandleVariant(
_QueryPayloadHydration hydration,
) async {
source_rag.SearchHandle? handle;
query_metrics.resetQueryContentReadStats();
final sw = Stopwatch()..start();
try {
handle = await source_rag.searchMetaHybrid(
collectionId: collectionId,
queryText: queryText,
queryEmbedding: queryEmbedding,
options: source_rag.SearchMetaHybridOptions(
topK: topK,
vectorWeight: 1.0,
bm25Weight: 0.0,
adjacentChunks: adjacentChunks,
),
);
final hits = await handle.hitMeta();
final metaBytes = hits.fold<int>(
0,
(sum, hit) =>
sum +
_utf8ByteLength(hit.rawType) +
_utf8ByteLength(hit.headerPathPreview),
);
final assembled = await handle.assembleContext(
options: source_rag.AssembleContextOptions(
tokenBudget: tokenBudget,
strategy: source_rag.ContextAssemblyStrategy.relevanceFirst,
separator: '\n\n---\n\n',
singleSourceMode: false,
),
);
final contextBytes = _utf8ByteLength(assembled.text);
final hitIds = _toInt64List(
hits.map((hit) => hit.chunkId.toInt()).toList(growable: false),
);
var fullChunkBytes = 0;
var previewBytes = 0;
switch (hydration) {
case _QueryPayloadHydration.full:
final chunks = await handle.hydrateChunks(chunkIds: hitIds);
fullChunkBytes = chunks.fold<int>(
0,
(sum, chunk) =>
sum +
_utf8ByteLength(chunk.content) +
_utf8ByteLength(chunk.chunkType) +
_utf8ByteLength(chunk.metadata),
);
break;
case _QueryPayloadHydration.preview:
final excerpts = await handle.getChunkExcerpts(
chunkIds: hitIds,
maxBytes: previewMaxBytes,
);
previewBytes = excerpts.fold<int>(
0,
(sum, excerpt) =>
sum +
_utf8ByteLength(excerpt.rawType) +
_utf8ByteLength(excerpt.headerPathPreview) +
_utf8ByteLength(excerpt.excerpt),
);
break;
case _QueryPayloadHydration.contextOnly:
break;
}
sw.stop();
final nativeReadStats = query_metrics.takeQueryContentReadStats();
return variantStats(
label: switch (hydration) {
_QueryPayloadHydration.full => 'handle + full hydrate',
_QueryPayloadHydration.preview => 'handle + preview excerpts',
_QueryPayloadHydration.contextOnly => 'handle + context only',
},
elapsedMs: sw.elapsedMicroseconds / 1000.0,
hitCount: hits.length,
metaBytes: metaBytes,
contextBytes: contextBytes,
fullChunkBytes: fullChunkBytes,
previewBytes: previewBytes,
nativeReadStats: nativeReadStats,
);
} finally {
if (handle != null) {
await handle.dispose();
}
}
}
try {
return QueryPayloadBenchResult(
topK: topK,
adjacentChunks: adjacentChunks,
previewMaxBytes: previewMaxBytes,
legacySearchHybrid: await runLegacy(),
handleFull: await runHandleVariant(_QueryPayloadHydration.full),
handlePreview: await runHandleVariant(_QueryPayloadHydration.preview),
handleContextOnly: await runHandleVariant(
_QueryPayloadHydration.contextOnly,
),
);
} finally {
query_metrics.resetQueryContentReadStats();
await closeDbPool();
if (await benchFile.exists()) {
await benchFile.delete();
}
if (await tokenizerFile.exists()) {
await tokenizerFile.delete();
}
if (restoreDbPath != null) {
await initDbPool(dbPath: restoreDbPath, maxSize: 4);
}
}
}