runQualityTest static method

Future<QualityTestSummary> runQualityTest({
  1. dynamic onProgress(
    1. String,
    2. int,
    3. int
    )?,
})

Run full quality test

Implementation

static Future<QualityTestSummary> runQualityTest({
  Function(String, int, int)? onProgress,
}) async {
  final dir = await getApplicationDocumentsDirectory();
  final testDbPath = "${dir.path}/quality_test_db.sqlite";

  // Delete existing DB
  final dbFile = File(testDbPath);
  if (await dbFile.exists()) {
    await dbFile.delete();
  }

  // Initialize DB
  await initDbPool(dbPath: testDbPath, maxSize: 5);
  await initDb();

  onProgress?.call("Embedding documents...", 0, testDocuments.length);

  // Embed and store documents
  for (var i = 0; i < testDocuments.length; i++) {
    final doc = testDocuments[i];
    final emb = await EmbeddingService.embed(doc);
    await addDocument(content: doc, embedding: emb);
    onProgress?.call("Embedding documents...", i + 1, testDocuments.length);
  }

  // Rebuild HNSW index (explicit call after adding documents)
  debugPrint(
    '[DEBUG] Rebuilding HNSW index with ${testDocuments.length} documents...',
  );
  await rebuildHnswIndex();
  debugPrint('[DEBUG] HNSW index rebuilt');

  // Run tests
  final results = <QualityResult>[];

  for (var i = 0; i < testCases.length; i++) {
    final tc = testCases[i];
    onProgress?.call("Testing: ${tc.query}", i + 1, testCases.length);

    // Query embedding
    // Enable debug mode for first query
    if (i == 0) {
      EmbeddingService.debugMode = true;
      debugPrint('[DEBUG] === Testing query: "${tc.query}" ===');
    }
    final queryEmb = await EmbeddingService.embed(tc.query);
    if (i == 0) {
      EmbeddingService.debugMode = false;
      debugPrint(
        '[DEBUG] Query embedding (first 5): ${queryEmb.take(5).toList()}',
      );
    }

    // Search
    final searchResults = await searchSimilar(
      queryEmbedding: queryEmb,
      topK: 3,
    );

    // Debug first query results
    if (i == 0) {
      debugPrint(
        '[DEBUG] Search results for "${tc.query}": ${searchResults.length} items',
      );
      for (final r in searchResults) {
        debugPrint('[DEBUG]   - ${r.substring(0, r.length.clamp(0, 50))}...');
      }
    }

    // Calculate metrics
    final recall = _calculateRecall(searchResults, tc.relevantDocs, 3);
    final precision = _calculatePrecision(searchResults, tc.relevantDocs, 3);

    // Pass if at least 1 match
    final passed = searchResults.any(
      (doc) => _isRelevant(doc, tc.relevantDocs),
    );

    results.add(
      QualityResult(
        query: tc.query,
        expected: tc.relevantDocs,
        actual: searchResults,
        recallAt3: recall,
        precision: precision,
        passed: passed,
      ),
    );
  }

  // Cleanup
  await closeDbPool();
  await dbFile.delete();

  // Aggregate
  final avgRecall =
      results.map((r) => r.recallAt3).reduce((a, b) => a + b) /
      results.length;
  final avgPrecision =
      results.map((r) => r.precision).reduce((a, b) => a + b) /
      results.length;
  final passedCount = results.where((r) => r.passed).length;

  return QualityTestSummary(
    results: results,
    avgRecallAt3: avgRecall,
    avgPrecision: avgPrecision,
    passed: passedCount,
    total: results.length,
  );
}