parse method
Entrypoint
Implementation
Future<Map<String, Object>> parse({
required Data scrapedData,
required Config config,
Uri? proxyUrl,
Map<String, String>? cookies,
bool debug = false,
bool concurrentParsing = false,
}) async {
/// Start the stopwatch
final Stopwatch stopwatch = Stopwatch()..start();
printLog('Parser: Fetching target...', debug, color: LogColor.blue);
/// Fetch target based on URL
final UrlTarget? target = fetchTarget(config.urlTargets, scrapedData.url);
if (target == null) {
printLog('Parser: Target not found!', debug, color: LogColor.red);
throw WebScraperError('Unsupported URL');
} else {
printLog('Parser: Target found!', debug, color: LogColor.green);
}
/// Retrieve all parsers for the target
final List<Parser> allParsers = config.parsers[target.name]?.toList() ?? [];
/// Build a parent-to-children map
final Map<String, List<Parser>> parentToChildren =
_buildParentToChildrenMap(allParsers);
/// Identify root parsers (_root)
final List<Parser> rootParsers = parentToChildren['_root']?.toList() ?? [];
/// Initialize extractedData with the URL
extractedData['url'] = scrapedData.url;
/// Start parsing
final Map<String, Object> parsedData = await _distributeParsers(
parentToChildren: parentToChildren,
parsers: rootParsers,
parentData: scrapedData,
proxyUrl: proxyUrl,
cookies: cookies,
debug: debug,
concurrent: concurrentParsing,
);
// Ensure 'url' is present in parsedData
parsedData.putIfAbsent('url', () => scrapedData.url.toString());
// Stop the stopwatch
stopwatch.stop();
// Log the parsing time
printLog(
'Parsing took ${stopwatch.elapsedMilliseconds} ms.',
debug,
color: LogColor.green,
);
return parsedData;
}