evaluate method
Future<EvaluationResult>
evaluate(
- Iterable<
ChatMessage> messages, - ChatResponse modelResponse, {
- ChatConfiguration? chatConfiguration,
- Iterable<
EvaluationContext> ? additionalContext, - CancellationToken? cancellationToken,
override
Evaluates modelResponse and returns an EvaluationResult.
messages is the full conversation history that produced
modelResponse. chatConfiguration is required when the evaluator
itself uses an AI model. additionalContext provides domain-specific
context beyond what is in messages.
Implementation
@override
Future<EvaluationResult> evaluate(
Iterable<ChatMessage> messages,
ChatResponse modelResponse, {
ChatConfiguration? chatConfiguration,
Iterable<EvaluationContext>? additionalContext,
CancellationToken? cancellationToken,
}) async {
final metric = NumericMetric(gleuMetricName);
final result = EvaluationResult.fromList([metric]);
final responseText = modelResponse.text;
if (responseText.isEmpty) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'The modelResponse supplied for evaluation was null or empty.'));
return result;
}
final ctx = additionalContext?.whereType<GLEUEvaluatorContext>().firstOrNull;
if (ctx == null) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'A GLEUEvaluatorContext was not found in additionalContext.'));
return result;
}
if (ctx.references.isEmpty) {
metric.addDiagnostic(EvaluationDiagnostic.error(
'The supplied GLEUEvaluatorContext contained no references.'));
return result;
}
final start = DateTime.now();
final references = ctx.references
.map((r) => SimpleWordTokenizer.wordTokenize(r))
.toList();
final hypothesis = SimpleWordTokenizer.wordTokenize(responseText);
final score = GLEUAlgorithm.sentenceGLEU(references, hypothesis);
final duration = DateTime.now().difference(start);
metric.value = score;
metric.addOrUpdateDurationMetadata(duration);
metric.addOrUpdateContext(ctx);
metric.interpretation = metric.interpret();
return result;
}