evaluate method
Future<EvaluationResult>
evaluate(
- Iterable<
ChatMessage> messages, - ChatResponse modelResponse, {
- Iterable<
EvaluationContext> ? additionalContext, - CancellationToken? cancellationToken,
Evaluates modelResponse against all configured evaluators and returns
the aggregated EvaluationResult.
May only be called once per ScenarioRun instance. Call dispose afterwards to persist results.
Implementation
Future<EvaluationResult> evaluate(
Iterable<ChatMessage> messages,
ChatResponse modelResponse, {
Iterable<EvaluationContext>? additionalContext,
CancellationToken? cancellationToken,
}) async {
if (_result != null) {
throw StateError(
'ScenarioRun "$scenarioName/$iterationName/$executionName" has '
'already been evaluated. Do not call evaluate() more than once.',
);
}
final evaluationResult = await _compositeEvaluator.evaluate(
messages,
modelResponse,
chatConfiguration: chatConfiguration,
additionalContext: additionalContext,
cancellationToken: cancellationToken,
);
if (_evaluationMetricInterpreter != null) {
for (final metric in evaluationResult.metrics.values) {
final override = _evaluationMetricInterpreter(metric);
if (override != null) {
metric.interpretation = override;
}
}
}
final details = _chatDetails;
final chatDetails =
(details != null && details.turnDetails.isNotEmpty) ? details : null;
_result = ScenarioRunResult(
scenarioName: scenarioName,
iterationName: iterationName,
executionName: executionName,
creationTime: DateTime.now().toUtc(),
messages: messages.toList(),
modelResponse: modelResponse,
evaluationResult: evaluationResult,
chatDetails: chatDetails,
tags: _tags,
);
return evaluationResult;
}