analyze method

Future<SuiteHealthReport> analyze({
  1. required String suiteName,
  2. int recentRunCount = 10,
})

跨多 run 分析。从 store 拉最近 recentRunCount 次 run 计算 graduation/broken 候选 + 难度分布。

Implementation

Future<SuiteHealthReport> analyze({
  required String suiteName,
  int recentRunCount = 10,
}) async {
  final runs = await store.loadRecent(
    suiteName: suiteName,
    limit: recentRunCount,
  );
  if (runs.isEmpty) {
    throw StateError(
      'No persisted runs found for suite "$suiteName". '
      'Save at least one EvalRunReport to the store before analyzing.',
    );
  }

  // 时间倒序保证 runs[0] 是最新的。
  runs.sort((a, b) => b.startedAt.compareTo(a.startedAt));

  // 收集每个 task 在每次 run 的通过率。
  // taskId -> [{run, passRate, trialsCount, passedCount}, ...]
  final perTaskPerRun = <String, List<_TaskRunStat>>{};
  for (final run in runs) {
    final byTask = run.trialsByTask();
    for (final entry in byTask.entries) {
      final passes = entry.value.where((tr) => tr.allGradersPassed).length;
      final total = entry.value.length;
      perTaskPerRun
          .putIfAbsent(entry.key, () => [])
          .add(
            _TaskRunStat(
              runName: run.runName,
              startedAt: run.startedAt,
              passes: passes,
              trials: total,
            ),
          );
    }
  }

  final graduates = <GraduationCandidate>[];
  final brokens = <BrokenTaskCandidate>[];
  final histogram = <String, int>{};

  for (final entry in perTaskPerRun.entries) {
    final stats = entry.value
      ..sort((a, b) => b.startedAt.compareTo(a.startedAt));

    // 跨 run 平均通过率 → 难度分布
    final allTrials = stats.fold<int>(0, (a, b) => a + b.trials);
    final allPasses = stats.fold<int>(0, (a, b) => a + b.passes);
    final overallPassRate = allTrials == 0 ? 0.0 : allPasses / allTrials;
    final bucket = _bucketFor(overallPassRate);
    histogram[bucket] = (histogram[bucket] ?? 0) + 1;

    // 毕业判定:连续多次 run 都达到 mature 阈值
    var consecutiveMature = 0;
    for (final s in stats) {
      if (s.trials == 0) break;
      if (s.passes / s.trials >= thresholds.matureTaskPassRate) {
        consecutiveMature++;
      } else {
        break;
      }
    }
    if (consecutiveMature >= thresholds.consecutiveRunsForGraduation) {
      graduates.add(
        GraduationCandidate(
          taskId: entry.key,
          recentMeanPassRate: overallPassRate,
          consecutiveMatureRuns: consecutiveMature,
          contributingRuns: stats
              .take(consecutiveMature)
              .map((s) => s.runName)
              .toList(),
        ),
      );
    }

    // 破损候选:跨 run 几乎全失败
    if (allTrials >= thresholds.minTrialsForBrokenJudgment &&
        overallPassRate <= thresholds.brokenTaskPassRate) {
      brokens.add(
        BrokenTaskCandidate(
          taskId: entry.key,
          totalTrials: allTrials,
          passedTrials: allPasses,
          contributingRuns: stats.map((s) => s.runName).toList(),
        ),
      );
    }
  }

  // 当前饱和度(基于最新一次 run)
  final newest = runs.first;
  final newestSat = _computeSaturationFor(newest);

  return SuiteHealthReport(
    suiteName: suiteName,
    suiteKind: newest.suite.kind,
    analyzedRunCount: runs.length,
    thresholds: thresholds,
    graduationCandidates: graduates,
    brokenTaskCandidates: brokens,
    currentSaturationRatio: newestSat.saturatedTaskRatio,
    currentlySaturated: newestSat.suiteSaturated,
    difficultyHistogram: histogram,
  );
}