generateChatResponseAsync method

Stream<ModelResponse> generateChatResponseAsync()
Implementation

Stream<ModelResponse> generateChatResponseAsync() async* {
  debugPrint('InferenceChat: Starting async stream generation');
  final buffer = StringBuffer();

  // Smart function handling mode - continuous scanning for JSON patterns
  String funcBuffer = '';

  debugPrint('InferenceChat: Starting to iterate over native tokens...');

  final originalStream = session.getResponseAsync().map((token) => TextResponse(token));

  // Apply thinking filter if needed using ModelThinkingFilter
  final Stream<ModelResponse> filteredStream = isThinking
      ? ModelThinkingFilter.filterThinkingStream(originalStream, modelType: modelType)
      : originalStream;

  await for (final response in filteredStream) {
    if (response is TextResponse) {
      final token = response.token;
      debugPrint('InferenceChat: Received filtered token: "$token"');

      // Track if this token should be added to buffer (default true)
      bool shouldAddToBuffer = true;

      // Continuous scanning for function calls in text - for models like DeepSeek
      if (tools.isNotEmpty && supportsFunctionCalls) {
        // Check if we're currently buffering potential JSON
        if (funcBuffer.isNotEmpty) {
          // We're already buffering - add token and check for completion
          funcBuffer += token;
          debugPrint(
              'InferenceChat: Buffering token: "$token", total: ${funcBuffer.length} chars');

          // Check if we now have a complete JSON
          if (FunctionCallParser.isFunctionCallComplete(funcBuffer, modelType: modelType)) {
            // First try to extract message from any JSON with message field
            try {
              final jsonData = jsonDecode(funcBuffer);
              if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
                // Found JSON with message field - extract and display the message
                final message = jsonData['message'] as String;
                debugPrint('InferenceChat: Extracted message from JSON: "$message"');
                yield TextResponse(message);
                funcBuffer = '';
                shouldAddToBuffer = false; // Don't add JSON tokens to buffer
                continue;
              }
            } catch (e) {
              debugPrint('InferenceChat: Failed to parse JSON for message extraction: $e');
            }

            // If no message field found, try parsing as function call
            final functionCall = FunctionCallParser.parse(
              funcBuffer,
              modelType: modelType,
            );
            if (functionCall != null) {
              debugPrint('InferenceChat: Found function call in complete buffer!');
              yield functionCall;
              funcBuffer = '';
              shouldAddToBuffer = false; // Don't add function call tokens to buffer
              continue;
            } else {
              // Not a valid JSON - emit as text and clear buffer
              debugPrint('InferenceChat: Invalid JSON, emitting as text');
              yield TextResponse(funcBuffer);
              funcBuffer = '';
              shouldAddToBuffer = false;
              continue;
            }
          }

          // If buffer gets too long without completing, flush as text
          if (funcBuffer.length > _maxFunctionBufferLength) {
            debugPrint('InferenceChat: Buffer too long without completion, flushing as text');
            yield TextResponse(funcBuffer);
            funcBuffer = '';
            shouldAddToBuffer = false;
            continue;
          }

          // Still buffering, don't emit yet
          shouldAddToBuffer = false;
        } else {
          // Not currently buffering - check if this token starts JSON
          if (token.contains('{') || token.contains('```')) {
            debugPrint('InferenceChat: Found potential JSON start in token: "$token"');
            funcBuffer = token;
            shouldAddToBuffer = false; // Don't add to main buffer while we determine if it's JSON
          } else {
            // Normal text token - emit immediately
            debugPrint('InferenceChat: Emitting text token: "$token"');
            yield response;
            shouldAddToBuffer = true; // Add to main buffer for history
          }
        }
      } else {
        // No function processing happening - emit token directly
        debugPrint('InferenceChat: No function processing, emitting token as text: "$token"');
        yield response;
        shouldAddToBuffer = true; // Add to main buffer for history
      }

      // Add token to buffer only if it should be included in final message
      if (shouldAddToBuffer) {
        buffer.write(token);
      }
    } else {
      // For non-TextResponse (like ThinkingResponse), pass through
      yield response;
    }
  }

  debugPrint('InferenceChat: Native token stream ended');
  final response = buffer.toString();
  debugPrint('InferenceChat: Complete response accumulated: "$response"');

  // Handle end of stream - process any remaining buffer
  if (funcBuffer.isNotEmpty) {
    debugPrint(
        'InferenceChat: Processing remaining buffer at end of stream: ${funcBuffer.length} chars');

    // For FunctionGemma, the function call spans response + funcBuffer
    // (e.g., response="<start_function_call>call:fn", funcBuffer="{params}")
    // For JSON models, funcBuffer contains the complete JSON
    final contentToCheck = modelType == ModelType.functionGemma
        ? response + funcBuffer
        : funcBuffer;

    // First try to extract message from JSON if it has message field
    if (FunctionCallParser.isFunctionCallComplete(contentToCheck, modelType: modelType)) {
      try {
        // For JSON parsing, use funcBuffer (the actual JSON part)
        // For FunctionGemma parsing, use contentToCheck (full function call)
        if (modelType != ModelType.functionGemma) {
          final jsonData = jsonDecode(funcBuffer);
          if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
            final message = jsonData['message'] as String;
            debugPrint('InferenceChat: Extracted message from end-of-stream JSON: "$message"');
            yield TextResponse(message);
            return;
          }
        }

        // Try to parse as function call
        final functionCall = FunctionCallParser.parse(
          contentToCheck,
          modelType: modelType,
        );
        if (functionCall != null) {
          debugPrint('InferenceChat: Function call found at end of stream');
          yield functionCall;
        } else {
          yield TextResponse(funcBuffer);
        }
      } catch (e) {
        debugPrint('InferenceChat: Failed to parse end-of-stream JSON: $e');
        yield TextResponse(funcBuffer);
      }
    } else {
      debugPrint('InferenceChat: No complete JSON at end of stream, emitting remaining as text');
      yield TextResponse(funcBuffer);
    }
  }

  try {
    debugPrint('InferenceChat: Calculating response tokens...');
    final responseTokens = await session.sizeInTokens(response);
    debugPrint('InferenceChat: Response tokens: $responseTokens');
    _currentTokens += responseTokens;
    debugPrint('InferenceChat: Current total tokens: $_currentTokens');

    if (_currentTokens >= (maxTokens - tokenBuffer)) {
      debugPrint('InferenceChat: Token limit reached, recreating session...');
      await _recreateSessionWithReducedChunks();
      debugPrint('InferenceChat: Session recreated successfully');
    }
  } catch (e) {
    debugPrint('InferenceChat: Error during token calculation: $e');
  }

  try {
    debugPrint('InferenceChat: Adding message to history...');
    final chatMessage = Message(text: response, isUser: false);
    debugPrint('InferenceChat: Created message object: ${chatMessage.text}');
    _fullHistory.add(chatMessage);
    debugPrint('InferenceChat: Added to full history');
    _modelHistory.add(chatMessage);
    debugPrint('InferenceChat: Added to model history');
    debugPrint('InferenceChat: Message added to history successfully');
  } catch (e) {
    debugPrint('InferenceChat: Error adding message to history: $e');
    rethrow;
  }

  debugPrint('InferenceChat: generateChatResponseAsync completed successfully');
}
generateChatResponseAsync method

Implementation

InferenceChat class