generate method
Generates a response from a prompt (single turn, no conversation history).
Implementation
@override
Future<String> generate(String prompt, {int maxTokens = 256}) async {
if (!isReady) {
throw Exception('LocalLlamaProvider: Local LLM is not loaded. Call initialize() first.');
}
try {
final session = llama.ChatSession(_engine!);
final responseChunks = await session.create(
[llama.LlamaTextContent(prompt)],
params: llama.GenerationParams(maxTokens: maxTokens),
).toList();
final buffer = StringBuffer();
for (final chunk in responseChunks) {
final delta = chunk.choices.first.delta;
if (delta.content != null) {
buffer.write(delta.content);
}
}
final response = buffer.toString();
// Extract thinking trace if model output contains it
final parsed = ThinkingParser.split(response);
return parsed.content;
} catch (e) {
SintSentinel.logger.e('LocalLlamaProvider: Generation failed: $e');
rethrow;
}
}