evaluate method

  1. @override
Future<EvaluationResult> evaluate(
  1. Iterable<ChatMessage> messages,
  2. ChatResponse modelResponse, {
  3. ChatConfiguration? chatConfiguration,
  4. Iterable<EvaluationContext>? additionalContext,
  5. CancellationToken? cancellationToken,
})
override

Evaluates modelResponse and returns an EvaluationResult.

messages is the full conversation history that produced modelResponse. chatConfiguration is required when the evaluator itself uses an AI model. additionalContext provides domain-specific context beyond what is in messages.

Implementation

@override
Future<EvaluationResult> evaluate(
  Iterable<ChatMessage> messages,
  ChatResponse modelResponse, {
  ChatConfiguration? chatConfiguration,
  Iterable<EvaluationContext>? additionalContext,
  CancellationToken? cancellationToken,
}) async {
  final metricName = evaluationMetricNames.first;
  final metric = NumericMetric(metricName);
  final result = EvaluationResult.fromList([metric]);

  if (chatConfiguration == null) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'chatConfiguration is required for AI-based evaluators.'));
    return result;
  }

  if (modelResponse.text.isEmpty) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'The modelResponse supplied for evaluation was null or empty.'));
    return result;
  }

  final instructions = buildEvaluationInstructions(
    messages.toList(),
    modelResponse,
    additionalContext?.toList() ?? const [],
  );

  if (instructions == null) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'Could not build evaluation instructions. '
        'A required evaluation context may be missing.'));
    return result;
  }

  final start = DateTime.now();
  final evalResponse = await chatConfiguration.chatClient.getResponse(
    messages: instructions,
    options: _chatOptions,
    cancellationToken: cancellationToken,
  );
  final duration = DateTime.now().difference(start);

  if (!metric.tryParseEvaluationResponseWithTags(evalResponse, duration)) {
    metric.addDiagnostic(EvaluationDiagnostic.error(
        'Could not parse a score from the evaluation response.'));
  } else {
    metric.interpretation = metric.interpretScore();
  }
  return result;
}