runQualityTest static method
Run full quality test
Implementation
static Future<QualityTestSummary> runQualityTest({
Function(String, int, int)? onProgress,
}) async {
final dir = await getApplicationDocumentsDirectory();
final testDbPath = "${dir.path}/quality_test_db.sqlite";
// Delete existing DB
final dbFile = File(testDbPath);
if (await dbFile.exists()) {
await dbFile.delete();
}
// Initialize DB
await initDbPool(dbPath: testDbPath, maxSize: 5);
await initDb();
onProgress?.call("Embedding documents...", 0, testDocuments.length);
// Embed and store documents
for (var i = 0; i < testDocuments.length; i++) {
final doc = testDocuments[i];
final emb = await EmbeddingService.embed(doc);
await addDocument(content: doc, embedding: emb);
onProgress?.call("Embedding documents...", i + 1, testDocuments.length);
}
// Rebuild HNSW index (explicit call after adding documents)
debugPrint(
'[DEBUG] Rebuilding HNSW index with ${testDocuments.length} documents...',
);
await rebuildHnswIndex();
debugPrint('[DEBUG] HNSW index rebuilt');
// Run tests
final results = <QualityResult>[];
for (var i = 0; i < testCases.length; i++) {
final tc = testCases[i];
onProgress?.call("Testing: ${tc.query}", i + 1, testCases.length);
// Query embedding
// Enable debug mode for first query
if (i == 0) {
EmbeddingService.debugMode = true;
debugPrint('[DEBUG] === Testing query: "${tc.query}" ===');
}
final queryEmb = await EmbeddingService.embed(tc.query);
if (i == 0) {
EmbeddingService.debugMode = false;
debugPrint(
'[DEBUG] Query embedding (first 5): ${queryEmb.take(5).toList()}',
);
}
// Search
final searchResults = await searchSimilar(
queryEmbedding: queryEmb,
topK: 3,
);
// Debug first query results
if (i == 0) {
debugPrint(
'[DEBUG] Search results for "${tc.query}": ${searchResults.length} items',
);
for (final r in searchResults) {
debugPrint('[DEBUG] - ${r.substring(0, r.length.clamp(0, 50))}...');
}
}
// Calculate metrics
final recall = _calculateRecall(searchResults, tc.relevantDocs, 3);
final precision = _calculatePrecision(searchResults, tc.relevantDocs, 3);
// Pass if at least 1 match
final passed = searchResults.any(
(doc) => _isRelevant(doc, tc.relevantDocs),
);
results.add(
QualityResult(
query: tc.query,
expected: tc.relevantDocs,
actual: searchResults,
recallAt3: recall,
precision: precision,
passed: passed,
),
);
}
// Cleanup
await closeDbPool();
await dbFile.delete();
// Aggregate
final avgRecall =
results.map((r) => r.recallAt3).reduce((a, b) => a + b) /
results.length;
final avgPrecision =
results.map((r) => r.precision).reduce((a, b) => a + b) /
results.length;
final passedCount = results.where((r) => r.passed).length;
return QualityTestSummary(
results: results,
avgRecallAt3: avgRecall,
avgPrecision: avgPrecision,
passed: passedCount,
total: results.length,
);
}