applyBenchmarkLatency function
List<BenchmarkedOllamaModel>
applyBenchmarkLatency({
- required List<
RankedOllamaModel> models, - required Map<
String, int?> benchmarkMs, - required RecommendationGoal goal,
Apply benchmark latency data to ranked models, re-sort by adjusted score.
Implementation
List<BenchmarkedOllamaModel> applyBenchmarkLatency({
required List<RankedOllamaModel> models,
required Map<String, int?> benchmarkMs,
required RecommendationGoal goal,
}) {
final divisor = goal == RecommendationGoal.latency
? 120.0
: goal == RecommendationGoal.coding
? 500.0
: 240.0;
final scoredModels = models.map((model) {
final latency = benchmarkMs[model.name];
final penalty = latency == null ? 0.0 : latency / divisor;
final reasons = latency == null
? model.reasons
: ['benchmarked at ${latency}ms', ...model.reasons];
return BenchmarkedOllamaModel(
name: model.name,
sizeBytes: model.sizeBytes,
family: model.family,
families: model.families,
parameterSize: model.parameterSize,
quantizationLevel: model.quantizationLevel,
benchmarkMs: latency,
reasons: reasons,
summary: reasons.take(3).join(', '),
score: double.parse((model.score - penalty).toStringAsFixed(2)),
);
}).toList();
final benchmarked = scoredModels.where((m) => m.benchmarkMs != null).toList();
if (benchmarked.isEmpty) {
scoredModels.sort((a, b) => _compareRankedModels(a, b, goal));
return scoredModels;
}
final unbenchmarked = scoredModels
.where((m) => m.benchmarkMs == null)
.toList();
benchmarked.sort((a, b) => _compareRankedModels(a, b, goal));
return [...benchmarked, ...unbenchmarked];
}