chat method
Generates a response with conversation history context.
messages represents the sequence of conversational exchanges.
Implementation
@override
Future<String> chat(List<Map<String, String>> messages, {int maxTokens = 256}) async {
if (!isReady) {
throw Exception('LocalLlamaProvider: Local LLM is not loaded. Call initialize() first.');
}
try {
// Reconstruct prompt in standard ChatML/Llama chat template format
final formattedPrompt = _formatChatTemplate(messages);
return await generate(formattedPrompt, maxTokens: maxTokens);
} catch (e) {
SintSentinel.logger.e('LocalLlamaProvider: Chat inference failed: $e');
rethrow;
}
}