normalizeStructuredNumericFieldValue function

String normalizeStructuredNumericFieldValue(
  1. String line
)

Normalizes numeric-like values in simple structured field lines.

This targets lines like Date: zoz5-06-15 and Amount: ], z5o.75, where the label is alphabetic and the value has a date-like or decimal-like numeric shape polluted only by OCR digit lookalikes.

Implementation

String normalizeStructuredNumericFieldValue(String line) {
  if (line.isEmpty) {
    return line;
  }

  final Match? match = RegExp(
    r'^\s*([A-Za-z]+(?:\s+[A-Za-z]+){0,2})\s*:\s*(.+?)\s*$',
  ).firstMatch(line);
  if (match == null) {
    return line;
  }

  final String label = match.group(_structuredFieldLabelGroup) ?? '';
  final String rawValue = match.group(_structuredFieldValueGroup) ?? '';
  if (label.isEmpty || rawValue.isEmpty) {
    return line;
  }

  final int labelWordCount = label
      .split(RegExp(r'\s+'))
      .where((String token) => token.isNotEmpty)
      .length;
  if (labelWordCount == 0 || labelWordCount > _structuredFieldMaxLabelWords) {
    return line;
  }

  String? normalizedValue;
  if (_looksDateLikeStructuredNumericValue(rawValue)) {
    normalizedValue = normalizeDateSeparators(
      _mapDigitLookalikesInValue(rawValue),
    );
  } else if (_looksDecimalStructuredNumericValue(rawValue)) {
    normalizedValue = normalizeDateSeparators(
      _mapDigitLookalikesInValue(rawValue),
    );
  }

  if (normalizedValue == null || normalizedValue == rawValue) {
    return line;
  }

  return '$label: $normalizedValue';
}