createModel method

  1. @override
Future<InferenceModel> createModel({
  1. required ModelType modelType,
  2. ModelFileType fileType = ModelFileType.task,
  3. int maxTokens = 1024,
  4. PreferredBackend? preferredBackend,
  5. List<int>? loraRanks,
  6. int? maxNumImages,
  7. bool supportImage = false,
  8. bool supportAudio = false,
  9. bool? enableSpeculativeDecoding,
  10. int? maxConcurrentSessions,
})
override

Creates and returns a new InferenceModel instance.

modelType — model type to create. maxTokens — maximum context length for the model. preferredBackend — backend preference (e.g., CPU, GPU). loraRanks — optional supported LoRA ranks. maxNumImages — maximum number of images (for multimodal models). supportImage — whether the model supports images. supportAudio — whether the model supports audio (Gemma 3n E4B only). enableSpeculativeDecoding — Multi-Token Prediction toggle for Gemma 4 E2B/E4B (LiteRT-LM v0.11.0+). null honors the model's default; true/false forces on/off. Older .litertlm files without an MTP drafter ignore this flag at the SDK level. maxConcurrentSessions — optional cap on the number of sessions open at once via InferenceModel.openSession. null (default) = no cap, backward-compatible. When set, the (cap+1)-th InferenceModel.openSession throws StateError. Use this on mobile with large models to guard against OOM from multiple concurrent KV caches.

Implementation

@override
Future<InferenceModel> createModel({
  required ModelType modelType,
  ModelFileType fileType = ModelFileType.task,
  int maxTokens = 1024,
  PreferredBackend? preferredBackend,
  List<int>? loraRanks,
  int? maxNumImages,
  bool supportImage = false,
  bool supportAudio = false, // Enabling audio support (Gemma 3n E4B)
  bool? enableSpeculativeDecoding,
  int? maxConcurrentSessions,
}) async {
  // Check if model is ready through unified system
  final manager = _unifiedManager;
  final activeModel = manager.activeInferenceModel;

  // No active inference model - user must set one first
  if (activeModel == null) {
    throw StateError(
        'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` to set a model first');
  }

  // Check if singleton exists and matches the active model
  if (_initCompleter != null &&
      _initializedModel != null &&
      _lastActiveInferenceSpec != null) {
    final currentSpec = _lastActiveInferenceSpec!;
    final requestedSpec = activeModel as InferenceModelSpec;

    if (currentSpec.name != requestedSpec.name) {
      // Active model changed - close old model and create new one
      gemmaLog(
          '⚠️  Active model changed: ${currentSpec.name} → ${requestedSpec.name}');
      gemmaLog('🔄 Closing old model and creating new one...');
      await _initializedModel?.close();
      // close-listener will reset _initializedModel and _initCompleter
      _lastActiveInferenceSpec = null;
    } else {
      // Same model - return existing singleton
      gemmaLog(
          'ℹ️  Reusing existing model instance for ${requestedSpec.name}');
      return _initCompleter!.future;
    }
  }

  // If singleton doesn't exist or was just closed, create new one
  if (_initCompleter case Completer<InferenceModel> completer) {
    return completer.future;
  }

  final completer = _initCompleter = Completer<InferenceModel>();

  // Verify the active model is still installed
  final isModelInstalled = await manager.isModelInstalled(activeModel);
  if (!isModelInstalled) {
    completer.completeError(
      Exception(
          'Active model is no longer installed. Use the `modelManager` to load the model first'),
    );
    return completer.future;
  }

  // Get the actual model file path through unified system
  final modelFilePaths = await manager.getModelFilePaths(activeModel);
  if (modelFilePaths == null || modelFilePaths.isEmpty) {
    completer.completeError(
      Exception(
          'Model file paths not found. Use the `modelManager` to load the model first'),
    );
    return completer.future;
  }

  final modelPath = modelFilePaths.values.first;
  final modelFile = File(modelPath);

  if (!await modelFile.exists()) {
    completer.completeError(
      Exception('Model file not found at path: ${modelFile.path}'),
    );
    return completer.future;
  }

  gemmaLog('Using unified model file: $modelPath');

  try {
    // Engine selection routes ENTIRELY through [EngineRegistry] (probe-chain).
    // Core registers NO default engine: both MediaPipe (.task/.bin, from
    // flutter_gemma_mediapipe) and LiteRT-LM (.litertlm, from
    // flutter_gemma_litertlm) are fully opt-in via
    // FlutterGemma.initialize(inferenceEngines: [...]). Core only resolves the
    // model path (preamble above) + owns the singleton lifecycle centrally
    // (track + reset on close); the selected engine builds the model.

    final spec = activeModel as InferenceModelSpec;
    final config = RuntimeConfig(
      maxTokens: maxTokens,
      modelPath: modelPath,
      preferredBackend: preferredBackend,
      supportImage: supportImage,
      supportAudio: supportAudio,
      maxNumImages: maxNumImages,
      enableSpeculativeDecoding: enableSpeculativeDecoding,
      maxConcurrentSessions: maxConcurrentSessions,
      loraRanks: loraRanks,
    );
    final engine = EngineRegistry.instance.findFor(spec);
    if (engine == null) {
      throw StateError(
        'No inference engine can handle this model (ModelFileType.${spec.fileType.name}). '
        'Add the engine package to pubspec.yaml and pass it in inferenceEngines: '
        'of FlutterGemma.initialize(...). Registered engines: '
        '${EngineRegistry.instance.registered.map((e) => e.name).join(", ")}.',
      );
    }
    final model = await engine.createModel(spec, config);

    // Core owns the singleton lifecycle: track it + reset on close. The
    // package-built model fires this via CloseNotifier (addCloseListener).
    _initializedModel = model;
    model.addCloseListener(() {
      _initializedModel = null;
      _initCompleter = null;
      _lastActiveInferenceSpec = null;
    });

    _lastActiveInferenceSpec = spec;
    completer.complete(model);
    return model;
  } catch (e, st) {
    // FIX #170: Reset state to allow retry with different model
    _initCompleter = null;
    _initializedModel = null;
    _lastActiveInferenceSpec = null;
    completer.completeError(e, st);
    Error.throwWithStackTrace(e, st);
  }
}