evaluate method
Future<EvaluationResult>
evaluate(
- Iterable<
ChatMessage> messages, - ChatResponse modelResponse, {
- ChatConfiguration? chatConfiguration,
- Iterable<
EvaluationContext> ? additionalContext, - CancellationToken? cancellationToken,
override
Evaluates modelResponse and returns an EvaluationResult.
messages is the full conversation history that produced
modelResponse. chatConfiguration is required when the evaluator
itself uses an AI model. additionalContext provides domain-specific
context beyond what is in messages.
Implementation
@override
Future<EvaluationResult> evaluate(
Iterable<ChatMessage> messages,
ChatResponse modelResponse, {
ChatConfiguration? chatConfiguration,
Iterable<EvaluationContext>? additionalContext,
CancellationToken? cancellationToken,
}) async {
final metricName = evaluationMetricNames.first;
final metric = NumericMetric(metricName);
final result = EvaluationResult.fromList([metric]);
if (chatConfiguration == null) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'chatConfiguration is required for AI-based evaluators.'));
return result;
}
if (modelResponse.text.isEmpty) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'The modelResponse supplied for evaluation was null or empty.'));
return result;
}
final instructions = buildEvaluationInstructions(
messages.toList(),
modelResponse,
additionalContext?.toList() ?? const [],
);
if (instructions == null) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'Could not build evaluation instructions. '
'A required evaluation context may be missing.'));
return result;
}
final start = DateTime.now();
final evalResponse = await chatConfiguration.chatClient.getResponse(
messages: instructions,
options: _chatOptions,
cancellationToken: cancellationToken,
);
final duration = DateTime.now().difference(start);
if (!metric.tryParseEvaluationResponseWithTags(evalResponse, duration)) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'Could not parse a score from the evaluation response.'));
} else {
metric.interpretation = metric.interpretScore();
}
return result;
}