generateResponse method
Generates a response based on the input text.
Implementation
Stream<LlmResponseContext> generateResponse(String text) {
assert(() {
if (_responseController != null) {
throw Exception(
'Asked for response before completing response from '
'previous query, or cancelling previous query.',
);
}
return true;
}());
_responseController = StreamController<LlmResponseContext>();
final textPtr = text.copyToNative();
final callback = NativeCallable<LlmResponseCallback>.listener(
(
Pointer<Void> context,
Pointer<bindings.LlmResponseContext> responseContext,
) {
if (_responseController == null) {
// Short-circuit if the caller has cancelled this query before receiving
// the complete output.
return;
}
// Not often, but also not never, `nullptr` seems to arrive here, which
// breaks everything if not caught and discarded.
if (responseContext == nullptr) {
_log.warning('Discarding unexpected nullptr from PredictAsync');
return;
}
_responseController!.add(
// Ideally this would pass the raw pointer to the
// LlmResponseContext.native() constructor and rely on
// LlmResponseContext.dispose() for cleanup, but passing pointers
// between threads does not work.
LlmResponseContext(
responseArray: responseContext.ref.response_array
.toDartStrings(responseContext.ref.response_count),
isDone: responseContext.ref.done,
),
);
bindings.LlmInferenceEngine_CloseResponseContext(responseContext);
if (responseContext.ref.done) {
malloc.free(textPtr);
_finalizeResponse();
}
},
);
bindings.LlmInferenceEngine_Session_PredictAsync(
worker,
nullptr,
textPtr,
callback.nativeFunction,
);
return _responseController!.stream;
}