createModel method

@override

required ModelType modelType,
ModelFileType fileType = ModelFileType.task,
int maxTokens = 1024,
PreferredBackend? preferredBackend,
List<int>? loraRanks,
int? maxNumImages,
bool supportImage = false,

})

override

Creates and returns a new InferenceModel instance.

modelType — model type to create. maxTokens — maximum context length for the model. preferredBackend — backend preference (e.g., CPU, GPU). loraRanks — optional supported LoRA ranks. maxNumImages — maximum number of images (for multimodal models). supportImage — whether the model supports images.

Implementation

@override
Future<InferenceModel> createModel({
  required ModelType modelType,
  ModelFileType fileType = ModelFileType.task,
  int maxTokens = 1024,
  PreferredBackend? preferredBackend,
  List<int>? loraRanks,
  int? maxNumImages,
  bool supportImage = false,
}) async {
  // Check active model
  final activeModel = _modelManager.activeInferenceModel;
  if (activeModel == null) {
    throw StateError(
      'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` first',
    );
  }

  // Check if singleton exists and matches active model
  if (_initCompleter != null &&
      _initializedModel != null &&
      _lastActiveInferenceSpec != null) {
    final currentSpec = _lastActiveInferenceSpec!;
    final requestedSpec = activeModel as InferenceModelSpec;

    if (currentSpec.name != requestedSpec.name) {
      // Active model changed - close old and create new
      debugPrint('Active model changed: ${currentSpec.name} -> ${requestedSpec.name}');
      await _initializedModel?.close();
      // Explicitly null these out (onClose callback also does this, but be safe)
      _initCompleter = null;
      _initializedModel = null;
      _lastActiveInferenceSpec = null;
    } else {
      // Same model - return existing
      debugPrint('Reusing existing model instance for ${requestedSpec.name}');
      return _initCompleter!.future;
    }
  }

  // Return existing completer if initialization in progress (re-check after potential close)
  if (_initCompleter case Completer<InferenceModel> completer) {
    return completer.future;
  }

  final completer = _initCompleter = Completer<InferenceModel>();

  try {
    // Verify model is installed
    final isInstalled = await _modelManager.isModelInstalled(activeModel);
    if (!isInstalled) {
      throw Exception('Active model is no longer installed');
    }

    // Get model file path
    final modelFilePaths = await _modelManager.getModelFilePaths(activeModel);
    if (modelFilePaths == null || modelFilePaths.isEmpty) {
      throw Exception('Model file paths not found');
    }

    final modelPath = modelFilePaths.values.first;
    debugPrint('[FlutterGemmaDesktop] Using model: $modelPath');

    // Start server and create gRPC client
    await _ensureServerRunning();

    final grpcClient = LiteRtLmClient();
    await grpcClient.connect();

    // Initialize model - server validates file existence
    // This avoids TOCTOU race condition (file could be deleted between check and use)
    try {
      await grpcClient.initialize(
        modelPath: modelPath,
        backend: preferredBackend == PreferredBackend.cpu ? 'cpu' : 'gpu',
        maxTokens: maxTokens,
      );
    } catch (e) {
      // Provide clearer error message for file-related issues
      final errorMsg = e.toString();
      if (errorMsg.contains('FileNotFoundException') ||
          errorMsg.contains('No such file') ||
          errorMsg.contains('not found')) {
        throw Exception('Model file not found or inaccessible: $modelPath');
      }
      rethrow;
    }

    // Create model instance
    final model = _initializedModel = DesktopInferenceModel(
      grpcClient: grpcClient,
      maxTokens: maxTokens,
      modelType: modelType,
      fileType: fileType,
      supportImage: supportImage,
      onClose: () {
        _initializedModel = null;
        _initCompleter = null;
        _lastActiveInferenceSpec = null;
      },
    );

    _lastActiveInferenceSpec = activeModel as InferenceModelSpec;

    completer.complete(model);
    return model;
  } catch (e, st) {
    completer.completeError(e, st);
    _initCompleter = null;
    rethrow;
  }
}

createModel method

Implementation

FlutterGemmaDesktop class