createModel method
Future<InferenceModel>
createModel({
- required ModelType modelType,
- ModelFileType fileType = ModelFileType.task,
- int maxTokens = 1024,
- PreferredBackend? preferredBackend,
- List<
int> ? loraRanks, - int? maxNumImages,
- bool supportImage = false,
override
Creates and returns a new InferenceModel instance.
modelType — model type to create.
maxTokens — maximum context length for the model.
preferredBackend — backend preference (e.g., CPU, GPU).
loraRanks — optional supported LoRA ranks.
maxNumImages — maximum number of images (for multimodal models).
supportImage — whether the model supports images.
Implementation
@override
Future<InferenceModel> createModel({
required ModelType modelType,
ModelFileType fileType = ModelFileType.task,
int maxTokens = 1024,
PreferredBackend? preferredBackend,
List<int>? loraRanks,
int? maxNumImages,
bool supportImage = false,
}) async {
// Check active model
final activeModel = _modelManager.activeInferenceModel;
if (activeModel == null) {
throw StateError(
'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` first',
);
}
// Check if singleton exists and matches active model
if (_initCompleter != null &&
_initializedModel != null &&
_lastActiveInferenceSpec != null) {
final currentSpec = _lastActiveInferenceSpec!;
final requestedSpec = activeModel as InferenceModelSpec;
if (currentSpec.name != requestedSpec.name) {
// Active model changed - close old and create new
debugPrint('Active model changed: ${currentSpec.name} -> ${requestedSpec.name}');
await _initializedModel?.close();
// Explicitly null these out (onClose callback also does this, but be safe)
_initCompleter = null;
_initializedModel = null;
_lastActiveInferenceSpec = null;
} else {
// Same model - return existing
debugPrint('Reusing existing model instance for ${requestedSpec.name}');
return _initCompleter!.future;
}
}
// Return existing completer if initialization in progress (re-check after potential close)
if (_initCompleter case Completer<InferenceModel> completer) {
return completer.future;
}
final completer = _initCompleter = Completer<InferenceModel>();
try {
// Verify model is installed
final isInstalled = await _modelManager.isModelInstalled(activeModel);
if (!isInstalled) {
throw Exception('Active model is no longer installed');
}
// Get model file path
final modelFilePaths = await _modelManager.getModelFilePaths(activeModel);
if (modelFilePaths == null || modelFilePaths.isEmpty) {
throw Exception('Model file paths not found');
}
final modelPath = modelFilePaths.values.first;
debugPrint('[FlutterGemmaDesktop] Using model: $modelPath');
// Start server and create gRPC client
await _ensureServerRunning();
final grpcClient = LiteRtLmClient();
await grpcClient.connect();
// Initialize model - server validates file existence
// This avoids TOCTOU race condition (file could be deleted between check and use)
try {
await grpcClient.initialize(
modelPath: modelPath,
backend: preferredBackend == PreferredBackend.cpu ? 'cpu' : 'gpu',
maxTokens: maxTokens,
);
} catch (e) {
// Provide clearer error message for file-related issues
final errorMsg = e.toString();
if (errorMsg.contains('FileNotFoundException') ||
errorMsg.contains('No such file') ||
errorMsg.contains('not found')) {
throw Exception('Model file not found or inaccessible: $modelPath');
}
rethrow;
}
// Create model instance
final model = _initializedModel = DesktopInferenceModel(
grpcClient: grpcClient,
maxTokens: maxTokens,
modelType: modelType,
fileType: fileType,
supportImage: supportImage,
onClose: () {
_initializedModel = null;
_initCompleter = null;
_lastActiveInferenceSpec = null;
},
);
_lastActiveInferenceSpec = activeModel as InferenceModelSpec;
completer.complete(model);
return model;
} catch (e, st) {
completer.completeError(e, st);
_initCompleter = null;
rethrow;
}
}