buildEvaluationInstructions method
List<ChatMessage> ?
buildEvaluationInstructions(
- List<
ChatMessage> messages, - ChatResponse modelResponse,
- List<
EvaluationContext> additionalContext
override
Builds the evaluation instructions (system + user messages).
Return null to signal that a required context was missing.
Implementation
@override
List<ChatMessage>? buildEvaluationInstructions(
List<ChatMessage> messages,
ChatResponse modelResponse,
List<EvaluationContext> additionalContext,
) {
final ctx = additionalContext.whereType<ToolCallAccuracyEvaluatorContext>().firstOrNull;
if (ctx == null) return null;
final toolCalls = modelResponse.messages
.expand((m) => m.contents)
.whereType<FunctionCallContent>()
.map((c) => '- ${c.name}(${c.arguments})')
.join('\n');
final toolDefs = ctx.contents.map((c) => c.toString()).join('\n');
final userRequest = messages.lastUserMessage?.text ?? '';
final prompt = '''
# Definition
**Tool Call Accuracy** measures how accurately the AI used available tools — whether the right tools were called, with correct parameter names, and accurate values extracted from the conversation.
AVAILABLE TOOLS:
$toolDefs
# Ratings
## [ToolCallAccuracy: 1] Wrong tools called or completely incorrect parameters.
## [ToolCallAccuracy: 2] Partially correct tool calls with significant parameter errors.
## [ToolCallAccuracy: 3] Correct tools called but with some parameter errors.
## [ToolCallAccuracy: 4] Correct tools called with mostly correct parameters.
## [ToolCallAccuracy: 5] Perfectly accurate tool calls with all correct parameters.
# Data
QUERY: $userRequest
TOOL CALLS MADE:
${toolCalls.isEmpty ? "(none)" : toolCalls}
# Tasks
## Score the accuracy of the tool calls.
- **ThoughtChain**: Think step by step. Start with "Let's think step by step:".
- **Explanation**: A very short explanation of why you think the input Data should get that Score.
- **Score**: An integer score (1–5) based on the definitions.
## Please provide your answers between the tags: <S0>your chain of thoughts</S0>, <S1>your explanation</S1>, <S2>your Score</S2>.
# Output
''';
return [
ChatMessage.fromText(ChatRole.system, _systemPrompt),
ChatMessage.fromText(ChatRole.user, prompt),
];
}