generateResponse method

Stream<LlmResponseContext> generateResponse(
  1. String text
)

Generates a response based on the input text.

Implementation

Stream<LlmResponseContext> generateResponse(String text) {
  assert(() {
    if (_responseController != null) {
      throw Exception(
        'Asked for response before completing response from '
        'previous query, or cancelling previous query.',
      );
    }
    return true;
  }());
  _responseController = StreamController<LlmResponseContext>();
  final textPtr = text.copyToNative();
  final callback = NativeCallable<LlmResponseCallback>.listener(
    (
      Pointer<Void> context,
      Pointer<bindings.LlmResponseContext> responseContext,
    ) {
      if (_responseController == null) {
        // Short-circuit if the caller has cancelled this query before receiving
        // the complete output.
        return;
      }
      // Not often, but also not never, `nullptr` seems to arrive here, which
      // breaks everything if not caught and discarded.
      if (responseContext == nullptr) {
        _log.warning('Discarding unexpected nullptr from PredictAsync');
        return;
      }
      _responseController!.add(
        // Ideally this would pass the raw pointer to the
        // LlmResponseContext.native() constructor and rely on
        // LlmResponseContext.dispose() for cleanup, but passing pointers
        // between threads does not work.
        LlmResponseContext(
          responseArray: responseContext.ref.response_array
              .toDartStrings(responseContext.ref.response_count),
          isDone: responseContext.ref.done,
        ),
      );
      bindings.LlmInferenceEngine_CloseResponseContext(responseContext);
      if (responseContext.ref.done) {
        malloc.free(textPtr);
        _finalizeResponse();
      }
    },
  );
  bindings.LlmInferenceEngine_Session_PredictAsync(
    worker,
    nullptr,
    textPtr,
    callback.nativeFunction,
  );
  return _responseController!.stream;
}