createSession method

@override

Future<InferenceModelSession> createSession({

double temperature = 0.8,
int randomSeed = 1,
int topK = 1,
double? topP,
String? loraPath,
bool? enableVisionModality,
bool? enableAudioModality,
String? systemInstruction,
bool enableThinking = false,
List<Tool> tools = const [],

})

override

Creates a new InferenceModelSession for generation.

temperature, randomSeed, topK, topP — parameters for sampling. loraPath — optional path to LoRA model. enableVisionModality — enable vision modality for multimodal models. enableAudioModality — enable audio modality for Gemma 3n E4B models.

Implementation

@override
Future<InferenceModelSession> createSession({
  double temperature = 0.8,
  int randomSeed = 1,
  int topK = 1,
  double? topP,
  String? loraPath,
  bool? enableVisionModality, // Enabling vision modality support
  bool? enableAudioModality, // Enabling audio modality support (Gemma 3n E4B)
  String? systemInstruction,
  bool enableThinking = false, // Not supported on Web (MediaPipe)
  List<Tool> tools =
      const [], // Tools wired through chat.dart prompt; SDK tools_json N/A on web
}) async {
  // Thinking mode not supported on Web (MediaPipe has no extraContext/channels API)
  if (enableThinking) {
    if (kDebugMode) {
      debugPrint(
          'Warning: enableThinking is not supported on Web (MediaPipe). '
          'Use Android or Desktop with .litertlm models for Gemma 4 thinking mode.');
    }
  }

  // TODO: Implement vision modality for web
  if (enableVisionModality == true) {
    if (kDebugMode) {
      debugPrint(
          'Warning: Vision modality is not yet implemented for web platform');
    }
  }

  // Audio modality is handled via supportAudio flag in the model
  if (enableAudioModality == true && !supportAudio) {
    if (kDebugMode) {
      debugPrint(
          'Warning: Audio modality requested but supportAudio is false');
    }
  }

  if (_initCompleter case Completer<InferenceModelSession> completer) {
    return completer.future;
  }
  final completer = _initCompleter = Completer<InferenceModelSession>();
  try {
    // Shared resolver handles activeModel lookup + storage-mode branch.
    // Used identically by LiteRtLmWebInferenceModel.
    final resolved = await sourceResolver.resolveActiveInferenceModel();

    final fileset = await FilesetResolver.forGenAiTasks(
            'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai@0.10.27/wasm'
                .toJS)
        .toDart;

    // LoRA path comes from the resolver alongside the model source.
    final loraPathToUse = loraPath ?? resolved.loraPath;
    final hasLoraParams = loraPathToUse != null && loraRanks != null;

    // MediaPipe consumes either modelAssetPath (Blob URL string) or
    // modelAssetBuffer (ReadableStreamDefaultReader, for OPFS streaming).
    final baseOptions = switch (resolved.model) {
      BlobUrlModelSource(:final url) =>
        LlmInferenceBaseOptions(modelAssetPath: url),
      OpfsStreamModelSource() => LlmInferenceBaseOptions(
          modelAssetBuffer:
              await (resolved.model as OpfsStreamModelSource).openReader()),
    };

    final config = LlmInferenceOptions(
        baseOptions: baseOptions,
        maxTokens: maxTokens,
        randomSeed: randomSeed,
        topK: topK,
        temperature: temperature,
        topP: topP,
        supportedLoraRanks:
            !hasLoraParams ? null : Int32List.fromList(loraRanks!).toJS,
        loraPath: !hasLoraParams ? null : loraPathToUse,
        maxNumImages: supportImage ? (maxNumImages ?? 1) : null);

    final llmInference =
        await LlmInference.createFromOptions(fileset, config).toDart;

    session = WebModelSession(
      modelType: modelType,
      fileType: fileType,
      llmInference: llmInference,
      supportImage: supportImage, // Enabling image support
      supportAudio: supportAudio, // Enabling audio support
      systemInstruction: systemInstruction,
      onClose: onClose,
    );

    completer.complete(session);
    return completer.future;
  } catch (e, st) {
    _initCompleter = null;
    completer.completeError(e, st);
    rethrow;
  }
}

createSession method

Implementation

WebInferenceModel class