parse method

Future<Map<String, Object>> parse({
  1. required Data scrapedData,
  2. required Config config,
  3. Uri? proxyUrl,
  4. Map<String, String>? cookies,
  5. bool debug = false,
  6. bool concurrentParsing = false,
})

Entrypoint

Implementation

Future<Map<String, Object>> parse({
  required Data scrapedData,
  required Config config,
  Uri? proxyUrl,
  Map<String, String>? cookies,
  bool debug = false,
  bool concurrentParsing = false,
}) async {
  /// Start the stopwatch
  final Stopwatch stopwatch = Stopwatch()..start();

  printLog('Parser: Fetching target...', debug, color: LogColor.blue);

  /// Fetch target based on URL
  final UrlTarget? target = fetchTarget(config.urlTargets, scrapedData.url);
  if (target == null) {
    printLog('Parser: Target not found!', debug, color: LogColor.red);
    throw WebScraperError('Unsupported URL');
  } else {
    printLog('Parser: Target found!', debug, color: LogColor.green);
  }

  /// Retrieve all parsers for the target
  final List<Parser> allParsers = config.parsers[target.name]?.toList() ?? [];

  /// Build a parent-to-children map
  final Map<String, List<Parser>> parentToChildren =
      _buildParentToChildrenMap(allParsers);

  /// Identify root parsers (_root)
  final List<Parser> rootParsers = parentToChildren['_root']?.toList() ?? [];

  /// Initialize extractedData with the URL
  extractedData['url'] = scrapedData.url;

  /// Start parsing
  final Map<String, Object> parsedData = await _distributeParsers(
    parentToChildren: parentToChildren,
    parsers: rootParsers,
    parentData: scrapedData,
    proxyUrl: proxyUrl,
    cookies: cookies,
    debug: debug,
    concurrent: concurrentParsing,
  );

  // Ensure 'url' is present in parsedData
  parsedData.putIfAbsent('url', () => scrapedData.url.toString());

  // Stop the stopwatch
  stopwatch.stop();

  // Log the parsing time
  printLog(
    'Parsing took ${stopwatch.elapsedMilliseconds} ms.',
    debug,
    color: LogColor.green,
  );

  return parsedData;
}