runQualityTest static method

Future<QualityTestSummary> runQualityTest({
  1. dynamic onProgress(
    1. String,
    2. int,
    3. int
    )?,
})

Run full quality test

Implementation

static Future<QualityTestSummary> runQualityTest({
  Function(String, int, int)? onProgress,
}) async {
  final dir = await getApplicationDocumentsDirectory();
  final testDbPath = "${dir.path}/quality_test_db.sqlite";

  // Delete existing DB
  final dbFile = File(testDbPath);
  if (await dbFile.exists()) {
    await dbFile.delete();
  }

  // Initialize DB
  await initDb(dbPath: testDbPath);

  onProgress?.call("Embedding documents...", 0, testDocuments.length);

  // Embed and store documents
  for (var i = 0; i < testDocuments.length; i++) {
    final doc = testDocuments[i];
    final emb = await EmbeddingService.embed(doc);
    await addDocument(dbPath: testDbPath, content: doc, embedding: emb);
    onProgress?.call("Embedding documents...", i + 1, testDocuments.length);
  }

  // Rebuild HNSW index (explicit call after adding documents)
  print('[DEBUG] Rebuilding HNSW index with ${testDocuments.length} documents...');
  await rebuildHnswIndex(dbPath: testDbPath);
  print('[DEBUG] HNSW index rebuilt');

  // Run tests
  final results = <QualityResult>[];

  for (var i = 0; i < testCases.length; i++) {
    final tc = testCases[i];
    onProgress?.call("Testing: ${tc.query}", i + 1, testCases.length);

    // Query embedding
    // Enable debug mode for first query
    if (i == 0) {
      EmbeddingService.debugMode = true;
      print('[DEBUG] === Testing query: "${tc.query}" ===');
    }
    final queryEmb = await EmbeddingService.embed(tc.query);
    if (i == 0) {
      EmbeddingService.debugMode = false;
      print('[DEBUG] Query embedding (first 5): ${queryEmb.take(5).toList()}');
    }

    // Search
    final searchResults = await searchSimilar(
      dbPath: testDbPath,
      queryEmbedding: queryEmb,
      topK: 3,
    );

    // Debug first query results
    if (i == 0) {
      print('[DEBUG] Search results for "${tc.query}": ${searchResults.length} items');
      for (final r in searchResults) {
        print('[DEBUG]   - ${r.substring(0, r.length.clamp(0, 50))}...');
      }
    }

    // Calculate metrics
    final recall = _calculateRecall(searchResults, tc.relevantDocs, 3);
    final precision = _calculatePrecision(searchResults, tc.relevantDocs, 3);

    // Pass if at least 1 match
    final passed = searchResults.any((doc) => _isRelevant(doc, tc.relevantDocs));

    results.add(QualityResult(
      query: tc.query,
      expected: tc.relevantDocs,
      actual: searchResults,
      recallAt3: recall,
      precision: precision,
      passed: passed,
    ));
  }

  // Cleanup
  await dbFile.delete();

  // Aggregate
  final avgRecall = results.map((r) => r.recallAt3).reduce((a, b) => a + b) / results.length;
  final avgPrecision = results.map((r) => r.precision).reduce((a, b) => a + b) / results.length;
  final passedCount = results.where((r) => r.passed).length;

  return QualityTestSummary(
    results: results,
    avgRecallAt3: avgRecall,
    avgPrecision: avgPrecision,
    passed: passedCount,
    total: results.length,
  );
}