runQualityTest static method
Run full quality test
Implementation
static Future<QualityTestSummary> runQualityTest({
Function(String, int, int)? onProgress,
}) async {
final dir = await getApplicationDocumentsDirectory();
final testDbPath = "${dir.path}/quality_test_db.sqlite";
// Delete existing DB
final dbFile = File(testDbPath);
if (await dbFile.exists()) {
await dbFile.delete();
}
// Initialize DB
await initDb(dbPath: testDbPath);
onProgress?.call("Embedding documents...", 0, testDocuments.length);
// Embed and store documents
for (var i = 0; i < testDocuments.length; i++) {
final doc = testDocuments[i];
final emb = await EmbeddingService.embed(doc);
await addDocument(dbPath: testDbPath, content: doc, embedding: emb);
onProgress?.call("Embedding documents...", i + 1, testDocuments.length);
}
// Rebuild HNSW index (explicit call after adding documents)
print('[DEBUG] Rebuilding HNSW index with ${testDocuments.length} documents...');
await rebuildHnswIndex(dbPath: testDbPath);
print('[DEBUG] HNSW index rebuilt');
// Run tests
final results = <QualityResult>[];
for (var i = 0; i < testCases.length; i++) {
final tc = testCases[i];
onProgress?.call("Testing: ${tc.query}", i + 1, testCases.length);
// Query embedding
// Enable debug mode for first query
if (i == 0) {
EmbeddingService.debugMode = true;
print('[DEBUG] === Testing query: "${tc.query}" ===');
}
final queryEmb = await EmbeddingService.embed(tc.query);
if (i == 0) {
EmbeddingService.debugMode = false;
print('[DEBUG] Query embedding (first 5): ${queryEmb.take(5).toList()}');
}
// Search
final searchResults = await searchSimilar(
dbPath: testDbPath,
queryEmbedding: queryEmb,
topK: 3,
);
// Debug first query results
if (i == 0) {
print('[DEBUG] Search results for "${tc.query}": ${searchResults.length} items');
for (final r in searchResults) {
print('[DEBUG] - ${r.substring(0, r.length.clamp(0, 50))}...');
}
}
// Calculate metrics
final recall = _calculateRecall(searchResults, tc.relevantDocs, 3);
final precision = _calculatePrecision(searchResults, tc.relevantDocs, 3);
// Pass if at least 1 match
final passed = searchResults.any((doc) => _isRelevant(doc, tc.relevantDocs));
results.add(QualityResult(
query: tc.query,
expected: tc.relevantDocs,
actual: searchResults,
recallAt3: recall,
precision: precision,
passed: passed,
));
}
// Cleanup
await dbFile.delete();
// Aggregate
final avgRecall = results.map((r) => r.recallAt3).reduce((a, b) => a + b) / results.length;
final avgPrecision = results.map((r) => r.precision).reduce((a, b) => a + b) / results.length;
final passedCount = results.where((r) => r.passed).length;
return QualityTestSummary(
results: results,
avgRecallAt3: avgRecall,
avgPrecision: avgPrecision,
passed: passedCount,
total: results.length,
);
}