robots_txt 2.2.0+1 copy "robots_txt: ^2.2.0+1" to clipboard
robots_txt: ^2.2.0+1 copied to clipboard

A complete, dependency-less and fully documented `robots.txt` ruleset parser.

example/example.dart

import 'dart:convert';
import 'dart:io';

import 'package:robots_txt/robots_txt.dart';

Future<void> main() async {
  // Get the contents of the `robots.txt` file.
  final contents = await fetchFileContents(host: 'github.com');
  // Parse the contents.
  final robots = Robots.parse(contents);

  // Print the rulesets.
  for (final ruleset in robots.rulesets) {
    // Print the user-agent this ruleset applies to.
    print('User-agent: ${ruleset.userAgent}');

    if (ruleset.allows.isNotEmpty) {
      print('Allowed:');
    }
    // Print the regular expressions that match to paths allowed by this
    // ruleset.
    for (final rule in ruleset.allows) {
      print('  - ${rule.pattern}');
    }

    if (ruleset.disallows.isNotEmpty) {
      print('Disallowed:');
    }
    // Print the regular expressions that match to paths disallowed by this
    // ruleset.
    for (final rule in ruleset.disallows) {
      print('  - ${rule.pattern}');
    }
  }

  const userAgent = 'WordCollector';

  // False: it cannot.
  print(
    "Can '$userAgent' access /gist/? ${robots.verifyCanAccess('/gist/', userAgent: userAgent)}",
  );
  // True: it can.
  print(
    "Can '$userAgent' access /wordcollector/robots_txt/? ${robots.verifyCanAccess('/wordcollector/robots_txt/', userAgent: userAgent)}",
  );

  // Validating an invalid file will throw a `FormatException`.
  try {
    Robots.validate('This is an obviously invalid robots.txt file.');
  } on FormatException {
    print('As expected, the first file is flagged as invalid.');
  }

  // Validating an already valid file will not throw anything.
  try {
    Robots.validate('''
User-agent: *
Crawl-delay: 10
Disallow: /
Allow: /file.txt

Host: https://hosting.example.com/
Sitemap: https://example.com/sitemap.xml
''');
    print('As expected also, the second file is not flagged as invalid.');
  } on FormatException {
    print('Welp, this was not supposed to happen.');
  }

  late final String contentsFromBefore;

  // Validating a file with unsupported fields.
  try {
    Robots.validate(
      contentsFromBefore = '''
User-agent: *
Some-field: abcd.txt
''',
    );
  } on FormatException {
    print(
      'This file is invalid on the grounds that it contains fields we did not '
      'expect it to have.',
    );
    print(
      "Let's fix that by including the custom field in the call to validate().",
    );
    try {
      Robots.validate(contentsFromBefore, allowedFieldNames: {'Some-field'});
      print('Aha! Now there are no issues.');
    } on FormatException {
      print('Welp, this also was not supposed to happen.');
    }
  }
}

Future<String> fetchFileContents({required String host}) async {
  final client = HttpClient();

  final contents = await client
      .get(host, 80, '/robots.txt')
      .then((request) => request.close())
      .then((response) => response.transform(utf8.decoder).join());

  client.close();

  return contents;
}
4
likes
0
pub points
31%
popularity

Publisher

verified publishervxern.dev

A complete, dependency-less and fully documented `robots.txt` ruleset parser.

Repository (GitHub)
View/report issues

License

unknown (license)

Dependencies

meta

More

Packages that depend on robots_txt