applyBenchmarkLatency function - provider_profile library

applyBenchmarkLatency function

List<BenchmarkedOllamaModel> applyBenchmarkLatency({

required List<RankedOllamaModel> models,
required Map<String, int?> benchmarkMs,
required RecommendationGoal goal,

})

Apply benchmark latency data to ranked models, re-sort by adjusted score.

Implementation

List<BenchmarkedOllamaModel> applyBenchmarkLatency({
  required List<RankedOllamaModel> models,
  required Map<String, int?> benchmarkMs,
  required RecommendationGoal goal,
}) {
  final divisor = goal == RecommendationGoal.latency
      ? 120.0
      : goal == RecommendationGoal.coding
      ? 500.0
      : 240.0;

  final scoredModels = models.map((model) {
    final latency = benchmarkMs[model.name];
    final penalty = latency == null ? 0.0 : latency / divisor;
    final reasons = latency == null
        ? model.reasons
        : ['benchmarked at ${latency}ms', ...model.reasons];

    return BenchmarkedOllamaModel(
      name: model.name,
      sizeBytes: model.sizeBytes,
      family: model.family,
      families: model.families,
      parameterSize: model.parameterSize,
      quantizationLevel: model.quantizationLevel,
      benchmarkMs: latency,
      reasons: reasons,
      summary: reasons.take(3).join(', '),
      score: double.parse((model.score - penalty).toStringAsFixed(2)),
    );
  }).toList();

  final benchmarked = scoredModels.where((m) => m.benchmarkMs != null).toList();
  if (benchmarked.isEmpty) {
    scoredModels.sort((a, b) => _compareRankedModels(a, b, goal));
    return scoredModels;
  }

  final unbenchmarked = scoredModels
      .where((m) => m.benchmarkMs == null)
      .toList();
  benchmarked.sort((a, b) => _compareRankedModels(a, b, goal));
  return [...benchmarked, ...unbenchmarked];
}