evaluate method
Future<EvaluationResult>
evaluate(
- Iterable<
ChatMessage> messages, - ChatResponse modelResponse, {
- ChatConfiguration? chatConfiguration,
- Iterable<
EvaluationContext> ? additionalContext, - CancellationToken? cancellationToken,
override
Evaluates modelResponse and returns an EvaluationResult.
messages is the full conversation history that produced
modelResponse. chatConfiguration is required when the evaluator
itself uses an AI model. additionalContext provides domain-specific
context beyond what is in messages.
Implementation
@override
Future<EvaluationResult> evaluate(
Iterable<ChatMessage> messages,
ChatResponse modelResponse, {
ChatConfiguration? chatConfiguration,
Iterable<EvaluationContext>? additionalContext,
CancellationToken? cancellationToken,
}) async {
final metrics = {
for (final name in evaluationMetricNames) name: NumericMetric(name),
};
final result = EvaluationResult(
metrics: {for (final m in metrics.values) m.name: m});
try {
final serviceResult = await _callService(
messages: messages.toList(),
modelResponse: modelResponse,
additionalContext: additionalContext?.toList() ?? const [],
);
for (final entry in serviceResult.entries) {
final publicName = _metricNames[entry.key];
if (publicName != null && metrics.containsKey(publicName)) {
metrics[publicName]!.value = entry.value;
metrics[publicName]!.interpretation =
metrics[publicName]!.interpretContentHarmScore();
}
}
} catch (e) {
for (final m in metrics.values) {
m.addDiagnostic(EvaluationDiagnostic.error(e.toString()));
}
}
return result;
}