generate static method

Future<LLMGenerationResult> generate(
  1. String prompt, {
  2. LLMGenerationOptions? options,
})

Full text generation with metrics

Matches Swift RunAnywhere.generate(_:options:).

final result = await RunAnywhere.generate(
  'Explain quantum computing',
  options: LLMGenerationOptions(maxTokens: 200, temperature: 0.7),
);
print('Response: ${result.text}');
print('Latency: ${result.latencyMs}ms');

Implementation

static Future<LLMGenerationResult> generate(
  String prompt, {
  LLMGenerationOptions? options,
}) async {
  if (!_isInitialized) {
    throw SDKError.notInitialized();
  }

  final opts = options ?? const LLMGenerationOptions();
  final startTime = DateTime.now();

  // Verify model is loaded via DartBridgeLLM (mirrors Swift CppBridge.LLM pattern)
  if (!DartBridge.llm.isLoaded) {
    throw SDKError.componentNotReady(
      'LLM model not loaded. Call loadModel() first.',
    );
  }

  final modelId = DartBridge.llm.currentModelId ?? 'unknown';

  // Get model name from registry for telemetry
  final modelInfo =
      await DartBridgeModelRegistry.instance.getPublicModel(modelId);
  final modelName = modelInfo?.name;

  // Determine effective system prompt - add JSON conversion instructions if structuredOutput is provided
  String? effectiveSystemPrompt = opts.systemPrompt;
  if (opts.structuredOutput != null) {
    final jsonSystemPrompt =
        DartBridgeStructuredOutput.shared.getSystemPrompt(
      opts.structuredOutput!.schema,
    );
    // If user already provided a system prompt, prepend the JSON instructions
    if (effectiveSystemPrompt != null && effectiveSystemPrompt.isNotEmpty) {
      effectiveSystemPrompt = '$jsonSystemPrompt\n\n$effectiveSystemPrompt';
    } else {
      effectiveSystemPrompt = jsonSystemPrompt;
    }
  }

  try {
    // Generate directly via DartBridgeLLM (calls rac_llm_component_generate)
    final result = await DartBridge.llm.generate(
      prompt,
      maxTokens: opts.maxTokens,
      temperature: opts.temperature,
      systemPrompt: effectiveSystemPrompt,
    );

    final endTime = DateTime.now();
    final latencyMs = endTime.difference(startTime).inMicroseconds / 1000.0;
    final tokensPerSecond = result.totalTimeMs > 0
        ? (result.completionTokens / result.totalTimeMs) * 1000
        : 0.0;

    // Track generation success with full metrics (mirrors other SDKs)
    TelemetryService.shared.trackGeneration(
      modelId: modelId,
      modelName: modelName,
      promptTokens: result.promptTokens,
      completionTokens: result.completionTokens,
      latencyMs: latencyMs.round(),
      temperature: opts.temperature,
      maxTokens: opts.maxTokens,
      contextLength: 8192, // Default context length for LlamaCpp
      tokensPerSecond: tokensPerSecond,
      isStreaming: false,
    );

    // Extract structured data if structuredOutput is provided
    Map<String, dynamic>? structuredData;
    if (opts.structuredOutput != null) {
      try {
        final jsonString =
            DartBridgeStructuredOutput.shared.extractJson(result.text);
        if (jsonString != null) {
          final parsed = jsonDecode(jsonString);
          structuredData = _normalizeStructuredData(parsed);
        }
      } catch (e) {
        // JSON extraction/parse failed — return text result without structured data
        final logger = SDKLogger('StructuredOutputHandler');
        logger.info('JSON extraction/parse failed: $e');
      }
    }

    return LLMGenerationResult(
      text: result.text,
      inputTokens: result.promptTokens,
      tokensUsed: result.completionTokens,
      modelUsed: modelId,
      latencyMs: latencyMs,
      framework: 'llamacpp',
      tokensPerSecond: tokensPerSecond,
      structuredData: structuredData,
    );
  } catch (e) {
    // Track generation failure
    TelemetryService.shared.trackError(
      errorCode: 'generation_failed',
      errorMessage: e.toString(),
      context: {'model_id': modelId},
    );
    throw SDKError.generationFailed('$e');
  }
}