scrapingHandler static method

Future scrapingHandler(
  1. String url,
  2. Config config, {
  3. String? html,
})

Implementation

static Future scrapingHandler(
  String url,
  Config config, {
  String? html,
}) async {
  //Scraping headers
  Map<String, String> headers = {};
  headers.addAll({
    HttpHeaders.userAgentHeader: generateUserAgent(config.userAgent),
  });
  if (config.headers != null) {
    headers.addAll(config.headers!);
  }

  //Find if target is available for URL
  Target? target = fetchTarget(config.targets, url);
  if (target == null) {
    return SpiderError(
      500,
      "This URL is not supported, Please try another URL",
    );
  }
  String? proxy;
  if (config.proxy != null) {
    proxy = config.proxy!;
  }

  //Clean URL first
  url = runCleaner(url, target.cleaner);

  String dom;
  if (target.needsHtml) {
    //we need HTML
    if (config.forceFetch) {
      String? data = await getRequest(url, proxy, headers);
      if (data == null) {
        return SpiderError(500, 'Unable to find data from url.');
      } else {
        dom = data;
      }
    } else {
      if (html == null || html == '' || html.isEmpty) {
        String? data = await getRequest(url, proxy, headers);
        if (data == null) {
          return SpiderError(500, 'Unable to find data from url.');
        } else {
          dom = data;
        }
      } else {
        dom = html;
      }
    }
  } else {
    //When needsHtml is false, dom gets replaced with url
    dom = url;
  }

  //Only known parsers to us
  List<Parser> rootParsers = [];

  //Fetch all parsers and put them in allParsers variable as Parser model
  List<Parser> allParsers = [];
  for (final p in config.parsers[target.name]!) {
    allParsers.add(p);
  }

  //Fetch _root parsers
  rootParsers = childFinder('_root', allParsers);

  //Minify huge html page
  dom = htmlMinify(url, dom, target);

  //Start parsing
  Map<String, dynamic> f = await parsingHandler(allParsers, rootParsers, dom);
  if (f.containsKey('url')) {
    f.addAll({'target': target.name});
  } else {
    f.addAll({'url': url, 'target': target.name});
  }
  return f;
}