normalizePunctuationHeavyText function

String normalizePunctuationHeavyText(
  1. String text
)

Normalizes lines that are overwhelmingly punctuation.

Implementation

String normalizePunctuationHeavyText(String text) {
  final List<String> lines = text.split('\n');
  final List<String> filtered = <String>[];

  for (final String line in lines) {
    if (line.isEmpty) {
      filtered.add(line);
      continue;
    }

    int punctuation = 0;
    int alphanumeric = 0;
    for (int i = 0; i < line.length; i++) {
      final int code = line.codeUnitAt(i);
      if (isLetter(code) || isDigit(code)) {
        alphanumeric++;
      } else if (line[i] != ' ') {
        punctuation++;
      }
    }

    if (line.length > _punctuationFilterShortLineMaxLength) {
      filtered.add(line);
      continue;
    }

    if (alphanumeric == 0 && punctuation > 0) {
      continue;
    }

    if (punctuation / (punctuation + alphanumeric) >
        _punctuationHeavyRatioThreshold) {
      continue;
    }

    filtered.add(line);
  }

  return filtered.join('\n');
}