evaluate method

  1. @override
Future<EvaluationResult> evaluate(
  1. Iterable<ChatMessage> messages,
  2. ChatResponse modelResponse, {
  3. ChatConfiguration? chatConfiguration,
  4. Iterable<EvaluationContext>? additionalContext,
  5. CancellationToken? cancellationToken,
})
override

Evaluates modelResponse and returns an EvaluationResult.

messages is the full conversation history that produced modelResponse. chatConfiguration is required when the evaluator itself uses an AI model. additionalContext provides domain-specific context beyond what is in messages.

Implementation

@override
Future<EvaluationResult> evaluate(
  Iterable<ChatMessage> messages,
  ChatResponse modelResponse, {
  ChatConfiguration? chatConfiguration,
  Iterable<EvaluationContext>? additionalContext,
  CancellationToken? cancellationToken,
}) async {
  final metrics = {
    for (final name in evaluationMetricNames) name: NumericMetric(name),
  };
  final result = EvaluationResult(
      metrics: {for (final m in metrics.values) m.name: m});

  try {
    final serviceResult = await _callService(
      messages: messages.toList(),
      modelResponse: modelResponse,
      additionalContext: additionalContext?.toList() ?? const [],
    );
    for (final entry in serviceResult.entries) {
      final publicName = _metricNames[entry.key];
      if (publicName != null && metrics.containsKey(publicName)) {
        metrics[publicName]!.value = entry.value;
        metrics[publicName]!.interpretation =
            metrics[publicName]!.interpretContentHarmScore();
      }
    }
  } catch (e) {
    for (final m in metrics.values) {
      m.addDiagnostic(EvaluationDiagnostic.error(e.toString()));
    }
  }

  return result;
}