evaluate method
Future<EvaluationResult>
evaluate(
- Iterable<
ChatMessage> messages, - ChatResponse modelResponse, {
- ChatConfiguration? chatConfiguration,
- Iterable<
EvaluationContext> ? additionalContext, - CancellationToken? cancellationToken,
override
Evaluates modelResponse and returns an EvaluationResult.
messages is the full conversation history that produced
modelResponse. chatConfiguration is required when the evaluator
itself uses an AI model. additionalContext provides domain-specific
context beyond what is in messages.
Implementation
@override
Future<EvaluationResult> evaluate(
Iterable<ChatMessage> messages,
ChatResponse modelResponse, {
ChatConfiguration? chatConfiguration,
Iterable<EvaluationContext>? additionalContext,
CancellationToken? cancellationToken,
}) async {
final metric = NumericMetric(f1MetricName);
final result = EvaluationResult.fromList([metric]);
final responseText = modelResponse.text;
if (responseText.isEmpty) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'The modelResponse supplied for evaluation was null or empty.'));
return result;
}
final ctx = additionalContext?.whereType<F1EvaluatorContext>().firstOrNull;
if (ctx == null) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'An F1EvaluatorContext was not found in additionalContext.'));
return result;
}
final start = DateTime.now();
final reference = SimpleWordTokenizer.wordTokenize(ctx.groundTruth);
final hypothesis = SimpleWordTokenizer.wordTokenize(responseText);
final score = F1Algorithm.calculateF1Score(reference, hypothesis);
final duration = DateTime.now().difference(start);
metric.value = score;
metric.addOrUpdateDurationMetadata(duration);
metric.addOrUpdateContext(ctx);
metric.interpretation = metric.interpret();
return result;
}