generateStream static method
Streaming text generation
Matches Swift RunAnywhere.generateStream(_:options:).
Returns an LLMStreamingResult containing:
stream: Stream of tokens as they are generatedresult: Future that completes with final generation metricscancel: Function to cancel the generation
final result = await RunAnywhere.generateStream('Tell me a story');
// Consume tokens as they arrive
await for (final token in result.stream) {
print(token);
}
// Get final metrics after stream completes
final metrics = await result.result;
print('Tokens: ${metrics.tokensUsed}');
// Or cancel early if needed
result.cancel();
Implementation
static Future<LLMStreamingResult> generateStream(
String prompt, {
LLMGenerationOptions? options,
}) async {
if (!_isInitialized) {
throw SDKError.notInitialized();
}
final opts = options ?? const LLMGenerationOptions();
final startTime = DateTime.now();
DateTime? firstTokenTime;
// Verify model is loaded via DartBridgeLLM (mirrors Swift CppBridge.LLM pattern)
if (!DartBridge.llm.isLoaded) {
throw SDKError.componentNotReady(
'LLM model not loaded. Call loadModel() first.',
);
}
final modelId = DartBridge.llm.currentModelId ?? 'unknown';
// Get model name from registry for telemetry
final modelInfo =
await DartBridgeModelRegistry.instance.getPublicModel(modelId);
final modelName = modelInfo?.name;
// Determine effective system prompt - add JSON conversion instructions if structuredOutput is provided
String? effectiveSystemPrompt = opts.systemPrompt;
if (opts.structuredOutput != null) {
final jsonSystemPrompt =
DartBridgeStructuredOutput.shared.getSystemPrompt(
opts.structuredOutput!.schema,
);
// If user already provided a system prompt, prepend the JSON instructions
if (effectiveSystemPrompt != null && effectiveSystemPrompt.isNotEmpty) {
effectiveSystemPrompt = '$jsonSystemPrompt\n\n$effectiveSystemPrompt';
} else {
effectiveSystemPrompt = jsonSystemPrompt;
}
}
// Create a broadcast stream controller for the tokens
final controller = StreamController<String>.broadcast();
final allTokens = <String>[];
// Start streaming generation via DartBridgeLLM
final tokenStream = DartBridge.llm.generateStream(
prompt,
maxTokens: opts.maxTokens,
temperature: opts.temperature,
systemPrompt: effectiveSystemPrompt,
);
// Forward tokens and collect them, track subscription in bridge for cancellation
DartBridge.llm.setActiveStreamSubscription(
tokenStream.listen(
(token) {
// Track first token time
firstTokenTime ??= DateTime.now();
allTokens.add(token);
if (!controller.isClosed) {
controller.add(token);
}
},
onError: (Object error) {
// Track streaming generation error
TelemetryService.shared.trackError(
errorCode: 'streaming_generation_failed',
errorMessage: error.toString(),
context: {'model_id': modelId},
);
if (!controller.isClosed) {
controller.addError(error);
}
},
onDone: () {
if (!controller.isClosed) {
unawaited(controller.close());
}
// Clear subscription when done
DartBridge.llm.setActiveStreamSubscription(null);
},
),
);
// Build result future that completes when stream is done
final resultFuture = controller.stream.toList().then((_) {
final endTime = DateTime.now();
final latencyMs = endTime.difference(startTime).inMicroseconds / 1000.0;
final tokensPerSecond =
latencyMs > 0 ? allTokens.length / (latencyMs / 1000) : 0.0;
// Calculate time to first token
int? timeToFirstTokenMs;
if (firstTokenTime != null) {
timeToFirstTokenMs =
firstTokenTime!.difference(startTime).inMilliseconds;
}
// Estimate tokens (~4 chars per token)
final promptTokens = (prompt.length / 4).ceil();
final completionTokens = allTokens.length;
// Track streaming generation success with full metrics (mirrors other SDKs)
TelemetryService.shared.trackGeneration(
modelId: modelId,
modelName: modelName,
promptTokens: promptTokens,
completionTokens: completionTokens,
latencyMs: latencyMs.round(),
temperature: opts.temperature,
maxTokens: opts.maxTokens,
contextLength: 8192, // Default context length for LlamaCpp
tokensPerSecond: tokensPerSecond,
timeToFirstTokenMs: timeToFirstTokenMs,
isStreaming: true,
);
// Extract structured data if structuredOutput is provided
Map<String, dynamic>? structuredData;
final fullText = allTokens.join();
if (opts.structuredOutput != null) {
try {
final jsonString =
DartBridgeStructuredOutput.shared.extractJson(fullText);
if (jsonString != null) {
final parsed = jsonDecode(jsonString);
structuredData = _normalizeStructuredData(parsed);
}
} catch (_) {
// JSON extraction/parse failed — return text result without structured data
}
}
return LLMGenerationResult(
text: fullText,
inputTokens: promptTokens,
tokensUsed: completionTokens,
modelUsed: modelId,
latencyMs: latencyMs,
framework: 'llamacpp',
tokensPerSecond: tokensPerSecond,
structuredData: structuredData,
);
});
return LLMStreamingResult(
stream: controller.stream,
result: resultFuture,
cancel: () {
// Cancel via the bridge (handles both stream subscription and native cancel)
DartBridge.llm.cancelGeneration();
},
);
}