fetchUrl function

Future<WebFetchOutput> fetchUrl(
  1. WebFetchInput input, {
  2. Future<String> applyPrompt(
    1. String systemPrompt,
    2. String userPrompt
    )?,
  3. HttpClient? httpClient,
})

Fetch and process a URL with the WebFetchTool semantics.

Implementation

Future<WebFetchOutput> fetchUrl(
  WebFetchInput input, {
  Future<String> Function(String systemPrompt, String userPrompt)? applyPrompt,
  HttpClient? httpClient,
}) async {
  final sw = Stopwatch()..start();

  // Validate URL
  final validation = validateUrl(input.url);
  if (validation is InvalidUrl) {
    return WebFetchOutput(
      bytes: 0,
      code: 0,
      codeText: 'invalid_url',
      result: 'Error: ${validation.reason}',
      durationMs: sw.elapsedMilliseconds,
      url: input.url,
    );
  }

  var uri = (validation as ValidUrl).uri;

  // Upgrade HTTP to HTTPS
  if (uri.isScheme('http')) {
    uri = uri.replace(scheme: 'https');
  }

  // Check cache
  final cached = _cache.get(uri.toString());
  if (cached != null) {
    final result = await _processContent(
      cached,
      input.prompt,
      uri,
      applyPrompt,
    );
    sw.stop();
    return WebFetchOutput(
      bytes: cached.length,
      code: 200,
      codeText: 'OK (cached)',
      result: result,
      durationMs: sw.elapsedMilliseconds,
      url: uri.toString(),
    );
  }

  // Fetch
  final client = httpClient ?? HttpClient();
  try {
    final request = await client.getUrl(uri).timeout(fetchTimeout);
    request.headers.set('User-Agent', 'Neomage/1.0 (AI Coding Assistant)');
    request.headers.set('Accept', 'text/markdown, text/html, */*');

    final response = await request.close().timeout(fetchTimeout);

    // Check content length
    final contentLength = response.contentLength;
    if (contentLength > maxHttpContentLength) {
      return WebFetchOutput(
        bytes: contentLength,
        code: response.statusCode,
        codeText: 'content_too_large',
        result:
            'Content exceeds maximum size of ${maxHttpContentLength ~/ (1024 * 1024)}MB',
        durationMs: sw.elapsedMilliseconds,
        url: uri.toString(),
      );
    }

    // Handle redirects
    if (response.isRedirect ||
        (response.statusCode >= 300 && response.statusCode < 400)) {
      final location = response.headers.value('location');
      if (location != null) {
        final redirectUri = Uri.parse(location);
        if (!isPermittedRedirect(uri, redirectUri)) {
          return WebFetchOutput(
            bytes: 0,
            code: response.statusCode,
            codeText: 'cross_origin_redirect',
            result: 'Redirect to different domain not followed: $location',
            durationMs: sw.elapsedMilliseconds,
            url: uri.toString(),
          );
        }
      }
    }

    // Read response body
    final bytes = await response
        .fold<List<int>>([], (prev, chunk) {
          if (prev.length + chunk.length > maxHttpContentLength) {
            throw Exception('Response body exceeds maximum size');
          }
          return [...prev, ...chunk];
        })
        .timeout(fetchTimeout);

    final contentType = response.headers.contentType?.mimeType ?? 'text/html';

    // Handle binary content
    if (isBinaryContentType(contentType)) {
      return WebFetchOutput(
        bytes: bytes.length,
        code: response.statusCode,
        codeText: response.reasonPhrase,
        result: 'Binary content ($contentType, ${bytes.length} bytes)',
        durationMs: sw.elapsedMilliseconds,
        url: uri.toString(),
      );
    }

    // Decode text
    var text = utf8.decode(bytes, allowMalformed: true);

    // Convert HTML to markdown
    if (contentType.contains('html')) {
      text = htmlToMarkdown(text);
    }

    // Cache
    _cache.put(uri.toString(), text);

    // Truncate if needed
    if (text.length > maxMarkdownLength) {
      text =
          '${text.substring(0, maxMarkdownLength)}\n\n[Content truncated due to length...]';
    }

    // Apply prompt
    final result = await _processContent(text, input.prompt, uri, applyPrompt);

    sw.stop();
    return WebFetchOutput(
      bytes: bytes.length,
      code: response.statusCode,
      codeText: response.reasonPhrase,
      result: result,
      durationMs: sw.elapsedMilliseconds,
      url: uri.toString(),
    );
  } on TimeoutException {
    return WebFetchOutput(
      bytes: 0,
      code: 0,
      codeText: 'timeout',
      result: 'Request timed out after ${fetchTimeout.inSeconds}s',
      durationMs: sw.elapsedMilliseconds,
      url: uri.toString(),
    );
  } catch (e) {
    return WebFetchOutput(
      bytes: 0,
      code: 0,
      codeText: 'error',
      result: 'Fetch error: $e',
      durationMs: sw.elapsedMilliseconds,
      url: uri.toString(),
    );
  } finally {
    if (httpClient == null) client.close();
  }
}