evaluate method

  1. @override
Future<EvaluationResult> evaluate(
  1. Iterable<ChatMessage> messages,
  2. ChatResponse modelResponse, {
  3. ChatConfiguration? chatConfiguration,
  4. Iterable<EvaluationContext>? additionalContext,
  5. CancellationToken? cancellationToken,
})
override

Evaluates modelResponse and returns an EvaluationResult.

messages is the full conversation history that produced modelResponse. chatConfiguration is required when the evaluator itself uses an AI model. additionalContext provides domain-specific context beyond what is in messages.

Implementation

@override
Future<EvaluationResult> evaluate(
  Iterable<ChatMessage> messages,
  ChatResponse modelResponse, {
  ChatConfiguration? chatConfiguration,
  Iterable<EvaluationContext>? additionalContext,
  CancellationToken? cancellationToken,
}) async {
  final metric = NumericMetric(gleuMetricName);
  final result = EvaluationResult.fromList([metric]);

  final responseText = modelResponse.text;
  if (responseText.isEmpty) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'The modelResponse supplied for evaluation was null or empty.'));
    return result;
  }

  final ctx = additionalContext?.whereType<GLEUEvaluatorContext>().firstOrNull;
  if (ctx == null) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'A GLEUEvaluatorContext was not found in additionalContext.'));
    return result;
  }
  if (ctx.references.isEmpty) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'The supplied GLEUEvaluatorContext contained no references.'));
    return result;
  }

  final start = DateTime.now();
  final references = ctx.references
      .map((r) => SimpleWordTokenizer.wordTokenize(r))
      .toList();
  final hypothesis = SimpleWordTokenizer.wordTokenize(responseText);
  final score = GLEUAlgorithm.sentenceGLEU(references, hypothesis);
  final duration = DateTime.now().difference(start);

  metric.value = score;
  metric.addOrUpdateDurationMetadata(duration);
  metric.addOrUpdateContext(ctx);
  metric.interpretation = metric.interpret();
  return result;
}