run function

Future<int> run(
  1. List<String> arguments,
  2. Stdout stdout
)

Parses command-line arguments and runs the crawl.

Provide dart:io Stdout as the second argument for normal operation, or provide a mock for testing.

Implementation

Future<int> run(List<String> arguments, Stdout stdout) async {
  // Redirect output to injected [stdout] for better testing.
  void print(Object message) => stdout.writeln(message);

  final parser = ArgParser(allowTrailingOptions: true)
    ..addFlag(helpFlag,
        abbr: 'h', negatable: false, help: 'Prints this usage help.')
    ..addFlag(versionFlag, abbr: 'v', negatable: false, help: 'Prints version.')
    ..addFlag(externalFlag,
        abbr: 'e',
        negatable: false,
        help: 'Check external (remote) links, too. By '
            'default, the tool only checks internal links.')
    ..addFlag(redirectFlag,
        help: 'Also report all links that point at a redirected URL.')
    ..addFlag(anchorFlag,
        help: 'Report links that point at a missing anchor.', defaultsTo: true)
    ..addSeparator('Advanced')
    ..addOption(inputFlag,
        abbr: 'i',
        help: 'Get list of URLs from the given text file (one URL per line).')
    ..addOption(skipFlag,
        help: 'Get list of URLs to skip from given text file (one RegExp '
            'pattern per line).')
    ..addMultiOption(hostsFlag,
        splitCommas: true,
        help: 'Paths to check. By default, the crawler '
            "doesn't parse HTML on sites with different path than the seed"
            'URIs. If your site spans multiple domains and you want to check '
            'HTML everywhere, use this. Provide as a glob, e.g. '
            'http://example.com/subdirectory/**.')
    ..addFlag(ansiFlag,
        help: 'Use ANSI terminal capabilities for nicer input. Turn this off '
            'if the output is broken.',
        defaultsTo: true)
    ..addFlag(connectionFailuresAsWarnings,
        help: 'Report connection failures as warnings rather than errors.')
    ..addFlag(debugFlag,
        abbr: 'd', negatable: false, help: 'Debug mode (very verbose).');

  final argResults = parser.parse(arguments);

  if (argResults[helpFlag] == true) {
    print('Linkcheck will crawl given site and check links.\n');
    print('usage: linkcheck [switches] [url]\n');
    print(parser.usage);
    return 0;
  }

  if (argResults[versionFlag] == true) {
    print('linkcheck version $version');
    return 0;
  }

  final ansiTerm = argResults[ansiFlag] == true && stdout.hasTerminal;
  final reportConnectionFailuresAsWarnings =
      argResults[connectionFailuresAsWarnings] == true;
  final verbose = argResults[debugFlag] == true;
  final shouldCheckExternal = argResults[externalFlag] == true;
  final showRedirects = argResults[redirectFlag] == true;
  final shouldCheckAnchors = argResults[anchorFlag] == true;
  final inputFile = argResults[inputFlag] as String?;
  final skipFile = argResults[skipFlag] as String?;

  var urls = argResults.rest.toList();
  var skipper = UrlSkipper.empty();

  if (inputFile != null) {
    final file = File(inputFile);
    try {
      urls.addAll(file.readAsLinesSync().where((url) => url.isNotEmpty));
    } on FileSystemException catch (e) {
      print("Can't read input file '$inputFile': $e");
      return 2;
    }
  }

  if (skipFile != null) {
    final file = File(skipFile);
    try {
      skipper = UrlSkipper(file.path, file.readAsLinesSync());
    } on FileSystemException catch (e) {
      print("Can't read skip file '$skipFile': $e");
      return 2;
    }
  }

  urls = urls.map(_sanitizeSeedUrl).toList();

  if (urls.isEmpty) {
    print('No URL given, checking $defaultUrl');
    urls.add(defaultUrl);
  } else if (verbose) {
    print('Reading URLs:');
    urls.forEach(print);
  }

  // TODO: exit gracefully if provided URL isn't a parseable URI
  final uris = urls.map((url) => Uri.parse(url)).toList(growable: false);
  Set<String> hosts;
  if ((argResults[hostsFlag] as Iterable<String>).isNotEmpty) {
    hosts = Set<String>.from(argResults[hostsFlag] as Iterable<String>);
  } else {
    // No host globs provided. Using the default (http://example.com/**).
    hosts = uris.map((uri) {
      var url = uri.toString();
      if (uri.path.isEmpty) return '$url/**';
      if (uri.path == '/') return '$url**';
      if (url.endsWith('/')) url = url.substring(0, url.length - 1);
      return '$url**';
    }).toSet();
  }

  // Start the actual crawl and await the result.
  final result = await crawl(uris, hosts, shouldCheckExternal, skipper, verbose,
      ansiTerm, ProcessSignal.sigint.watch(), stdout);

  final broken = result.destinations
      .where((destination) =>
          destination.wasTried &&
          destination.isBroken &&
          (!reportConnectionFailuresAsWarnings || !destination.didNotConnect))
      .length;

  final withWarning = result.links
      .where((link) =>
          link.hasWarning(shouldCheckAnchors) ||
          reportConnectionFailuresAsWarnings && link.destination.didNotConnect)
      .length;

  final withInfo = result.links.where((link) => link.hasInfo).length;

  final withRedirects =
      result.links.where((link) => link.destination.isRedirected).length;

  if (broken == 0 &&
      withWarning == 0 &&
      withInfo == 0 &&
      (!showRedirects || withRedirects == 0)) {
    printStats(result, broken, withWarning, withInfo, withRedirects,
        showRedirects, ansiTerm, stdout);
  } else {
    if (ansiTerm) {
      Console.write('\r');
      Console.eraseLine(3);
      print('Done crawling.                   ');
    }

    reportForWriters(
        result, ansiTerm, shouldCheckAnchors, showRedirects, stdout);

    printStats(result, broken, withWarning, withInfo, withRedirects,
        showRedirects, ansiTerm, stdout);
  }
  print('');

  if (broken > 0) return 2;
  if (withWarning > 0 || (showRedirects && withRedirects > 0)) return 1;
  return 0;
}