initialize method
Initialize the engine with model path and settings.
Implementation
Future<void> initialize({
required String modelPath,
String backend = 'gpu',
int maxTokens = 2048,
String? cacheDir,
bool enableVision = false,
int maxNumImages = 0,
bool enableAudio = false,
bool? enableSpeculativeDecoding,
}) async {
final initSw = Stopwatch()..start();
_ensureBindings();
_backend = backend;
final bindingsMs = initSw.elapsedMilliseconds;
gemmaLog('[LiteRtLmFfi/perf] _ensureBindings: ${bindingsMs}ms');
final b = _bindings!;
// Create engine settings
final modelPathPtr = modelPath.toNativeUtf8();
final backendPtr = backend.toNativeUtf8();
final visionBackendPtr = enableVision ? backend.toNativeUtf8() : nullptr;
final audioBackendPtr = enableAudio ? 'cpu'.toNativeUtf8() : nullptr;
try {
final settingsCreateStart = initSw.elapsedMilliseconds;
final settings = b.litert_lm_engine_settings_create(
modelPathPtr.cast(),
backendPtr.cast(),
visionBackendPtr == nullptr ? nullptr : visionBackendPtr.cast(),
audioBackendPtr == nullptr ? nullptr : audioBackendPtr.cast(),
);
gemmaLog(
'[LiteRtLmFfi/perf] settings_create: ${initSw.elapsedMilliseconds - settingsCreateStart}ms');
if (settings == nullptr) {
throw Exception('Failed to create engine settings');
}
// Configure settings
b.litert_lm_engine_settings_set_max_num_tokens(settings, maxTokens);
// Enable benchmarking for session metrics (token counts, timing)
b.litert_lm_engine_settings_enable_benchmark(settings);
if (cacheDir != null) {
final cacheDirPtr = cacheDir.toNativeUtf8();
// Sets cache dir on main, vision, and audio executors (C API patched)
b.litert_lm_engine_settings_set_cache_dir(settings, cacheDirPtr.cast());
calloc.free(cacheDirPtr);
}
if (maxNumImages > 0) {
b.litert_lm_engine_settings_set_max_num_images(settings, maxNumImages);
}
// MTP / speculative decoding (LiteRT-LM v0.11.0+). Skip when null so
// the SDK uses the model's default; only call when caller explicitly
// forces on/off.
if (enableSpeculativeDecoding != null) {
b.litert_lm_engine_settings_set_enable_speculative_decoding(
settings, enableSpeculativeDecoding);
}
// Windows NPU: point LiteRT at the directory containing
// `LiteRtDispatch.dll` and disable HW mask update path. Native Assets
// bundles both DLLs next to the executable, so resolvedExecutable.parent
// is the right path. Without `dispatch_lib_dir` LiteRT reads
// uninitialized env-option memory and engine_create crashes; without
// `use_hw_masking_for_npu(false)` LiteRT sets up the kWH HW mask method
// which Intel preview NPU (LunarLake/PantherLake) doesn't fully support
// → CFG check failure 0xc0000409 (per Matt Kreileder's Intel NPU
// pipeline instructions).
if (Platform.isWindows && backend == 'npu') {
final exeDir = File(Platform.resolvedExecutable).parent.path;
final dirPtr = exeDir.toNativeUtf8();
b.litert_lm_engine_settings_set_litert_dispatch_lib_dir(
settings, dirPtr.cast());
calloc.free(dirPtr);
b.litert_lm_engine_settings_set_use_hw_masking_for_npu(settings, false);
gemmaLog(
'[LiteRtLmFfi] NPU Windows: dispatch_lib_dir=$exeDir, use_hw_masking_for_npu=false');
}
// Android NPU: point LiteRT at the app's nativeLibraryDir so it can
// dlopen libLiteRtDispatch_Qualcomm.so from there. On Android, Native
// Assets unpacks all bundled .so files into nativeLibraryDir at install
// time; without this setting LiteRT searches system paths and fails.
if (Platform.isAndroid && backend == 'npu') {
const bundledChannel = MethodChannel('flutter_gemma_bundled');
final nativeLibDir =
await bundledChannel.invokeMethod<String>('getNativeLibraryDir');
if (nativeLibDir == null) {
throw StateError(
'[LiteRtLmFfi] NPU Android: getNativeLibraryDir returned null — '
'plugin channel not registered; cannot locate '
'libLiteRtDispatch_Qualcomm.so.');
}
final dirPtr = nativeLibDir.toNativeUtf8();
b.litert_lm_engine_settings_set_litert_dispatch_lib_dir(
settings, dirPtr.cast());
calloc.free(dirPtr);
gemmaLog('[LiteRtLmFfi] NPU Android: dispatch_lib_dir=$nativeLibDir');
}
// Create engine in a background isolate to avoid blocking UI.
// Pass settings pointer as int address (Pointer can't cross isolates).
gemmaLog(
'[LiteRtLmFfi] Creating engine from $modelPath (backend=$backend, maxTokens=$maxTokens) ...');
gemmaLog(
'[LiteRtLmFfi/perf] === START litert_lm_engine_create (native — model load + accelerator init + KV cache prefill) ===');
final settingsAddr = settings.address;
final isolateLogLevel = gemmaLogLevel;
final sw = Stopwatch()..start();
final engineAddr = await Isolate.run(() {
gemmaLogLevel = isolateLogLevel;
final isolateSw = Stopwatch()..start();
final lib = Platform.isIOS
? DynamicLibrary.open(
'@executable_path/Frameworks/LiteRtLm.framework/LiteRtLm')
: Platform.isMacOS
? DynamicLibrary.open('LiteRtLm.framework/LiteRtLm')
: (Platform.isLinux || Platform.isAndroid)
? DynamicLibrary.open('libLiteRtLm.so')
: DynamicLibrary.open('LiteRtLm.dll');
gemmaLog(
'[LiteRtLmFfi/perf] isolate: DynamicLibrary.open: ${isolateSw.elapsedMilliseconds}ms',
level: GemmaLogLevel.verbose);
final lookupStart = isolateSw.elapsedMilliseconds;
final create = lib.lookupFunction<Pointer Function(Pointer),
Pointer Function(Pointer)>('litert_lm_engine_create');
gemmaLog(
'[LiteRtLmFfi/perf] isolate: lookupFunction: ${isolateSw.elapsedMilliseconds - lookupStart}ms',
level: GemmaLogLevel.verbose);
final createStart = isolateSw.elapsedMilliseconds;
final ptr = create(Pointer.fromAddress(settingsAddr)).address;
gemmaLog(
'[LiteRtLmFfi/perf] isolate: native litert_lm_engine_create: ${isolateSw.elapsedMilliseconds - createStart}ms',
level: GemmaLogLevel.verbose);
return ptr;
});
_engine = Pointer<LiteRtLmEngine>.fromAddress(engineAddr);
sw.stop();
gemmaLog(
'[LiteRtLmFfi/perf] === END litert_lm_engine_create: ${sw.elapsedMilliseconds}ms (includes isolate spawn ~50-200ms) ===');
gemmaLog(
'[LiteRtLmFfi] litert_lm_engine_create took ${sw.elapsedMilliseconds}ms');
b.litert_lm_engine_settings_delete(settings);
if (_engine == null || _engine == nullptr) {
_dumpNativeLog();
throw Exception(
'Failed to create engine. Model may be invalid: $modelPath');
}
_isInitialized = true;
gemmaLog(
'[LiteRtLmFfi/perf] initialize() total: ${initSw.elapsedMilliseconds}ms');
gemmaLog('[LiteRtLmFfi] Engine initialized successfully');
// Auto-dump the SDK's stderr log after successful engine_create so
// users can see what happens inside the native call (model load time,
// accelerator init, sampler dlopen attempts, KV cache prefill, etc.).
// No-op when stderr redirection isn't wired (release / Android /
// Windows). Safe to call before _isInitialized was true since the
// dump only reads a file, doesn't touch native state.
_dumpNativeLog();
} finally {
calloc.free(modelPathPtr);
calloc.free(backendPtr);
if (visionBackendPtr != nullptr) calloc.free(visionBackendPtr);
if (audioBackendPtr != nullptr) calloc.free(audioBackendPtr);
}
}