createModel method

  1. @override
Future<InferenceModel> createModel({
  1. required ModelType modelType,
  2. ModelFileType fileType = ModelFileType.task,
  3. int maxTokens = 1024,
  4. PreferredBackend? preferredBackend,
  5. List<int>? loraRanks,
  6. int? maxNumImages,
  7. bool supportImage = false,
  8. bool supportAudio = false,
})
override

Creates and returns a new InferenceModel instance.

modelType — model type to create. maxTokens — maximum context length for the model. preferredBackend — backend preference (e.g., CPU, GPU). loraRanks — optional supported LoRA ranks. maxNumImages — maximum number of images (for multimodal models). supportImage — whether the model supports images. supportAudio — whether the model supports audio (Gemma 3n E4B only).

Implementation

@override
Future<InferenceModel> createModel({
  required ModelType modelType,
  ModelFileType fileType = ModelFileType.task,
  int maxTokens = 1024,
  PreferredBackend? preferredBackend,
  List<int>? loraRanks,
  int? maxNumImages,
  bool supportImage = false,
  bool supportAudio = false, // Enabling audio support (Gemma 3n E4B)
}) async {
  // Check if model is ready through unified system
  final manager = _unifiedManager;
  final activeModel = manager.activeInferenceModel;

  // No active inference model - user must set one first
  if (activeModel == null) {
    throw StateError(
        'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` to set a model first');
  }

  // Check if singleton exists and matches the active model
  if (_initCompleter != null &&
      _initializedModel != null &&
      _lastActiveInferenceSpec != null) {
    final currentSpec = _lastActiveInferenceSpec!;
    final requestedSpec = activeModel as InferenceModelSpec;

    if (currentSpec.name != requestedSpec.name) {
      // Active model changed - close old model and create new one
      debugPrint(
          '⚠️  Active model changed: ${currentSpec.name} → ${requestedSpec.name}');
      debugPrint('🔄 Closing old model and creating new one...');
      await _initializedModel?.close();
      // onClose callback will reset _initializedModel and _initCompleter
      _lastActiveInferenceSpec = null;
    } else {
      // Same model - return existing singleton
      debugPrint(
          'ℹ️  Reusing existing model instance for ${requestedSpec.name}');
      return _initCompleter!.future;
    }
  }

  // If singleton doesn't exist or was just closed, create new one
  if (_initCompleter case Completer<InferenceModel> completer) {
    return completer.future;
  }

  final completer = _initCompleter = Completer<InferenceModel>();

  // Verify the active model is still installed
  final isModelInstalled = await manager.isModelInstalled(activeModel);
  if (!isModelInstalled) {
    completer.completeError(
      Exception(
          'Active model is no longer installed. Use the `modelManager` to load the model first'),
    );
    return completer.future;
  }

  // Get the actual model file path through unified system
  final modelFilePaths = await manager.getModelFilePaths(activeModel);
  if (modelFilePaths == null || modelFilePaths.isEmpty) {
    completer.completeError(
      Exception(
          'Model file paths not found. Use the `modelManager` to load the model first'),
    );
    return completer.future;
  }

  final modelPath = modelFilePaths.values.first;
  final modelFile = File(modelPath);

  if (!await modelFile.exists()) {
    completer.completeError(
      Exception('Model file not found at path: ${modelFile.path}'),
    );
    return completer.future;
  }

  debugPrint('Using unified model file: $modelPath');

  try {
    final InferenceModel model;

    // .litertlm files on iOS → use FFI (same as desktop)
    // .task/.bin files → use MediaPipe via Pigeon (existing path)
    if (fileType == ModelFileType.litertlm &&
        (Platform.isIOS || Platform.isAndroid)) {
      debugPrint(
          '[FlutterGemmaMobile] Using FFI path for .litertlm on ${Platform.operatingSystem}');
      final ffiPathSw = Stopwatch()..start();
      final cacheDir = (await getApplicationSupportDirectory()).path;
      debugPrint(
          '[FlutterGemmaMobile/perf] getApplicationSupportDirectory: ${ffiPathSw.elapsedMilliseconds}ms');
      final ffiClient = LiteRtLmFfiClient();
      final backend = switch (preferredBackend) {
        PreferredBackend.cpu => 'cpu',
        PreferredBackend.gpu || null => 'gpu',
        PreferredBackend.npu => throw UnsupportedError(
            'PreferredBackend.npu is not supported on the .litertlm FFI path. '
            'Use a MediaPipe .task model on Android for NPU acceleration.',
          ),
      };
      final beforeInit = ffiPathSw.elapsedMilliseconds;
      await ffiClient.initialize(
        modelPath: modelPath,
        backend: backend,
        maxTokens: maxTokens,
        cacheDir: cacheDir,
        enableVision: supportImage,
        maxNumImages: supportImage ? (maxNumImages ?? 1) : 0,
        enableAudio: supportAudio,
      );
      debugPrint(
          '[FlutterGemmaMobile/perf] ffiClient.initialize total: ${ffiPathSw.elapsedMilliseconds - beforeInit}ms');
      debugPrint(
          '[FlutterGemmaMobile/perf] FFI model creation total: ${ffiPathSw.elapsedMilliseconds}ms');

      model = _initializedModel = FfiInferenceModel(
        ffiClient: ffiClient,
        maxTokens: maxTokens,
        modelType: modelType,
        fileType: fileType,
        supportImage: supportImage,
        supportAudio: supportAudio,
        onClose: () {
          _initializedModel = null;
          _initCompleter = null;
          _lastActiveInferenceSpec = null;
        },
      );
    } else {
      // MediaPipe path (Android, iOS .task files)
      await _platformService.createModel(
        maxTokens: maxTokens,
        modelPath: modelPath,
        loraRanks: loraRanks ?? supportedLoraRanks,
        preferredBackend: preferredBackend,
        maxNumImages: supportImage ? (maxNumImages ?? 1) : null,
        supportAudio: supportAudio ? true : null,
      );

      model = _initializedModel = MobileInferenceModel(
        maxTokens: maxTokens,
        modelType: modelType,
        fileType: fileType,
        preferredBackend: preferredBackend,
        supportedLoraRanks: loraRanks ?? supportedLoraRanks,
        supportImage: supportImage,
        supportAudio: supportAudio,
        maxNumImages: maxNumImages,
        onClose: () {
          _initializedModel = null;
          _initCompleter = null;
          _lastActiveInferenceSpec = null;
        },
      );
    }

    // Save the spec that was used to create this model
    _lastActiveInferenceSpec = activeModel as InferenceModelSpec;

    completer.complete(model);
    return model;
  } catch (e, st) {
    // FIX #170: Reset state to allow retry with different model
    _initCompleter = null;
    _initializedModel = null;
    _lastActiveInferenceSpec = null;
    completer.completeError(e, st);
    Error.throwWithStackTrace(e, st);
  }
}