postProcessText function
Applies final normalization passes to OCR text output.
Implementation
String postProcessText(String text, {bool applyDictionary = true}) {
if (text.isEmpty) {
return text;
}
final List<String> lines = text.split('\n');
final List<String> processed = <String>[];
for (final String line in lines) {
String value = resolveILAmbiguity(line);
value = normalizeWordCaseCoherence(value);
value = normalizeLineCase(value);
value = normalizeNameLikeLineTitleCase(value);
value = normalizeStructuredFieldLine(
value,
applyDictionary: applyDictionary,
);
value = normalizeRepeatedCommaSuffix(value);
value = normalizeTrailingSingleUpperTokenSplit(value);
value = normalizeRegionPostalCodeSpacing(value);
value = normalizeStructuredNumericFieldValue(value);
value = normalizeNumericGaps(value);
value = normalizeDigitSegments(value);
value = normalizeDateSeparators(value);
value = normalizeCodeLikeTokens(value);
value = normalizeStandaloneDecimalLikeToken(value);
value = normalizeShortUppercaseDictionaryWords(value);
value = normalizePriceLikeTableRow(value);
value = normalizeStructuredNumericFieldValue(value);
value = normalizeBracketAsLetterNoise(value);
value = normalizeFragmentedLine(value, applyDictionary: applyDictionary);
if (applyDictionary) {
value = correctNearMissDictionaryWords(value);
value = splitConcatenatedDictionaryWords(value);
}
processed.add(value);
}
final List<String> merged = mergeNoiseLines(processed);
final List<String> shortNoisyFixed = normalizeShortNoisyLines(merged);
final String joined = shortNoisyFixed.join('\n');
final String normalized = normalizePunctuationHeavyText(joined);
final String lettersFixed = normalizeLetterConfusions(normalized);
final String punctuationFixed = normalizePunctuationSpacing(lettersFixed);
final String trailingUpperFixed = normalizeTrailingSingleUpperTokenSplit(
punctuationFixed,
);
final String upperDigitFixed = normalizeStandaloneUpperDigitTokenSplit(
trailingUpperFixed,
);
return upperDigitFixed.split('\n').map(normalizePriceLikeTableRow).join('\n');
}