evaluate method

Future<EvaluationResult> evaluate(
  1. Iterable<ChatMessage> messages,
  2. ChatResponse modelResponse, {
  3. Iterable<EvaluationContext>? additionalContext,
  4. CancellationToken? cancellationToken,
})

Evaluates modelResponse against all configured evaluators and returns the aggregated EvaluationResult.

May only be called once per ScenarioRun instance. Call dispose afterwards to persist results.

Implementation

Future<EvaluationResult> evaluate(
  Iterable<ChatMessage> messages,
  ChatResponse modelResponse, {
  Iterable<EvaluationContext>? additionalContext,
  CancellationToken? cancellationToken,
}) async {
  if (_result != null) {
    throw StateError(
      'ScenarioRun "$scenarioName/$iterationName/$executionName" has '
      'already been evaluated. Do not call evaluate() more than once.',
    );
  }

  final evaluationResult = await _compositeEvaluator.evaluate(
    messages,
    modelResponse,
    chatConfiguration: chatConfiguration,
    additionalContext: additionalContext,
    cancellationToken: cancellationToken,
  );

  if (_evaluationMetricInterpreter != null) {
    for (final metric in evaluationResult.metrics.values) {
      final override = _evaluationMetricInterpreter(metric);
      if (override != null) {
        metric.interpretation = override;
      }
    }
  }

  final details = _chatDetails;
  final chatDetails =
      (details != null && details.turnDetails.isNotEmpty) ? details : null;

  _result = ScenarioRunResult(
    scenarioName: scenarioName,
    iterationName: iterationName,
    executionName: executionName,
    creationTime: DateTime.now().toUtc(),
    messages: messages.toList(),
    modelResponse: modelResponse,
    evaluationResult: evaluationResult,
    chatDetails: chatDetails,
    tags: _tags,
  );

  return evaluationResult;
}