parse function

Article? parse(
  1. String html, {
  2. ParserType parser = ParserType.jsdom,
  3. String? baseUri,
  4. bool debug = false,
  5. ReadabilityLogger? logger,
  6. int maxElemsToParse = 0,
  7. int numTopCandidates = 5,
  8. int charThreshold = 500,
  9. List<String> classesToPreserve = const [],
  10. bool keepClasses = false,
  11. String serializer(
    1. DomElement
    )?,
  12. bool enableJSONLD = true,
  13. RegExp? allowedVideoRegex,
  14. double linkDensityModifier = 0,
})

Parse HTML content and extract the main article.

This is the primary function for extracting readable content from HTML.

Example:

// Basic usage with defaults (uses JSDOMParser)
final article = parse(htmlString);
print(article?.title);
print(article?.content);

// With options
final article = parse(
  htmlString,
  parser: ParserType.html,
  baseUri: 'https://example.com',
  charThreshold: 1000,
);

// With custom logger
final article = parse(
  htmlString,
  logger: (msg) => print('[Readability] $msg'),
);

Parameters:

  • html: The HTML content to parse
  • parser: Which parser to use (default: ParserType.jsdom)
  • baseUri: Base URI for resolving relative URLs (only used with jsdom parser)
  • logger: Custom callback for debug messages (overrides debug flag)
  • All other parameters map to ReadabilityOptions fields

Returns the extracted Article or null if no readable content was found.

Implementation

Article? parse(
  String html, {
  ParserType parser = ParserType.jsdom,
  String? baseUri,
  bool debug = false,
  ReadabilityLogger? logger,
  int maxElemsToParse = 0,
  int numTopCandidates = 5,
  int charThreshold = 500,
  List<String> classesToPreserve = const [],
  bool keepClasses = false,
  String Function(DomElement)? serializer,
  bool enableJSONLD = true,
  RegExp? allowedVideoRegex,
  double linkDensityModifier = 0,
}) {
  final options = ReadabilityOptions(
    debug: debug,
    logger: logger,
    maxElemsToParse: maxElemsToParse,
    numTopCandidates: numTopCandidates,
    charThreshold: charThreshold,
    classesToPreserve: classesToPreserve,
    keepClasses: keepClasses,
    serializer: serializer,
    enableJSONLD: enableJSONLD,
    allowedVideoRegex: allowedVideoRegex,
    linkDensityModifier: linkDensityModifier,
  );

  final Readability reader;
  switch (parser) {
    case ParserType.jsdom:
      final jsdomParser = JSDOMParser();
      final doc = jsdomParser.parse(html, baseUri);
      reader = Readability(JsdomDomDocument(doc), options);
    case ParserType.html:
      final htmlDoc = html_parser.parse(html);
      reader = Readability(HtmlDomDocument(htmlDoc), options);
  }

  return reader.parse();
}