removeClutter static method

String removeClutter(
  1. String html
)

Remove clutter elements like ads, navigation, etc.

Implementation

static String removeClutter(String html) {
  String cleaned = html;

  // Remove script and style tags
  cleaned = cleaned.replaceAll(RegExp(
    r'<(script|style)[^>]*>.*?</\1>',
    caseSensitive: false, dotAll: true
  ), '');

  // Remove navigation elements
  cleaned = cleaned.replaceAll(RegExp(
    r'<(nav|header|footer|aside|menu)[^>]*>.*?</\1>',
    caseSensitive: false, dotAll: true
  ), '');

  // Remove elements with ad-related classes/ids
  final adPatterns = [
    r'ad', r'advertisement', r'banner', r'popup', r'modal',
    r'social', r'share', r'comment', r'sidebar', r'widget',
    r'navigation', r'nav', r'menu', r'breadcrumb'
  ];

  for (final pattern in adPatterns) {
    cleaned = cleaned.replaceAll(RegExp(
      r'<[^>]*(?:class|id)=[\"\x27][^\"\x27]*' + pattern + r'[^\"\x27]*[\"\x27][^>]*>.*?</[^>]*>',
      caseSensitive: false, dotAll: true
    ), '');
  }

  return cleaned;
}