generate method - NativeBackend class - native_backend library

generate method

Map<String, dynamic> generate(

String prompt, {
String? systemPrompt,
int maxTokens = 512,
double temperature = 0.7,

})

Generate text (non-streaming).

Implementation

Map<String, dynamic> generate(
  String prompt, {
  String? systemPrompt,
  int maxTokens = 512,
  double temperature = 0.7,
}) {
  _ensureBackendType('llamacpp');
  _ensureHandle();

  final promptPtr = prompt.toNativeUtf8();
  final resultPtr = calloc<RacLlmResultStruct>();

  // Create options struct
  final optionsPtr = calloc<RacLlmOptionsStruct>();
  optionsPtr.ref.maxTokens = maxTokens;
  optionsPtr.ref.temperature = temperature;
  optionsPtr.ref.topP = 1.0;
  optionsPtr.ref.streamingEnabled = RAC_FALSE;
  optionsPtr.ref.systemPrompt = systemPrompt?.toNativeUtf8() ?? nullptr;

  try {
    final generate = _lib.lookupFunction<RacLlmComponentGenerateNative,
        RacLlmComponentGenerateDart>('rac_llm_component_generate');

    final status = generate(
      _handle!,
      promptPtr,
      optionsPtr.cast(),
      resultPtr.cast(),
    );

    if (status != RAC_SUCCESS) {
      throw NativeBackendException(
        'Text generation failed: ${RacCore.getErrorMessage(status)}',
        code: status,
      );
    }

    // Extract result
    final result = resultPtr.ref;
    final text = result.text != nullptr ? result.text.toDartString() : '';

    return {
      'text': text,
      'prompt_tokens': result.promptTokens,
      'completion_tokens': result.completionTokens,
      'total_tokens': result.totalTokens,
      'time_to_first_token_ms': result.timeToFirstTokenMs,
      'total_time_ms': result.totalTimeMs,
      'tokens_per_second': result.tokensPerSecond,
    };
  } finally {
    calloc.free(promptPtr);
    if (optionsPtr.ref.systemPrompt != nullptr) {
      calloc.free(optionsPtr.ref.systemPrompt);
    }
    calloc.free(optionsPtr);
    calloc.free(resultPtr);
  }
}