postProcessText function

String postProcessText(
  1. String text, {
  2. bool applyDictionary = true,
})

Applies final normalization passes to OCR text output.

Implementation

String postProcessText(String text, {bool applyDictionary = true}) {
  if (text.isEmpty) {
    return text;
  }

  final List<String> lines = text.split('\n');
  final List<String> processed = <String>[];
  for (final String line in lines) {
    String value = resolveILAmbiguity(line);
    value = normalizeWordCaseCoherence(value);
    value = normalizeLineCase(value);
    value = normalizeNameLikeLineTitleCase(value);
    value = normalizeStructuredFieldLine(
      value,
      applyDictionary: applyDictionary,
    );
    value = normalizeRepeatedCommaSuffix(value);
    value = normalizeTrailingSingleUpperTokenSplit(value);
    value = normalizeRegionPostalCodeSpacing(value);
    value = normalizeStructuredNumericFieldValue(value);
    value = normalizeNumericGaps(value);
    value = normalizeDigitSegments(value);
    value = normalizeDateSeparators(value);
    value = normalizeCodeLikeTokens(value);
    value = normalizeStandaloneDecimalLikeToken(value);
    value = normalizeShortUppercaseDictionaryWords(value);
    value = normalizePriceLikeTableRow(value);
    value = normalizeStructuredNumericFieldValue(value);
    value = normalizeBracketAsLetterNoise(value);
    value = normalizeFragmentedLine(value, applyDictionary: applyDictionary);
    if (applyDictionary) {
      value = correctNearMissDictionaryWords(value);
      value = splitConcatenatedDictionaryWords(value);
    }
    processed.add(value);
  }

  final List<String> merged = mergeNoiseLines(processed);
  final List<String> shortNoisyFixed = normalizeShortNoisyLines(merged);
  final String joined = shortNoisyFixed.join('\n');
  final String normalized = normalizePunctuationHeavyText(joined);
  final String lettersFixed = normalizeLetterConfusions(normalized);
  final String punctuationFixed = normalizePunctuationSpacing(lettersFixed);
  final String trailingUpperFixed = normalizeTrailingSingleUpperTokenSplit(
    punctuationFixed,
  );
  final String upperDigitFixed = normalizeStandaloneUpperDigitTokenSplit(
    trailingUpperFixed,
  );
  return upperDigitFixed.split('\n').map(normalizePriceLikeTableRow).join('\n');
}