normalizeNumericGaps function
Repairs noisy separators and spacing in numeric expressions.
Handles cases like "1 . 23" -> "1.23" and detects digit-dominant lines to remove all whitespace.
Implementation
String normalizeNumericGaps(String line) {
if (line.isEmpty) {
return line;
}
bool hasNonDigitToken = false;
for (int i = 0; i < line.length; i++) {
final int code = line.codeUnitAt(i);
if (!isDigit(code) &&
code != spaceCodeUnit &&
code != tabCodeUnit &&
code != lineFeedCodeUnit &&
code != carriageReturnCodeUnit) {
hasNonDigitToken = true;
break;
}
}
final StringBuffer buffer = StringBuffer();
for (int i = 0; i < line.length; i++) {
final String ch = line[i];
final int code = ch.codeUnitAt(0);
final bool prevDigit = i > 0 && isDigit(line.codeUnitAt(i - 1));
final bool nextDigit =
i + 1 < line.length && isDigit(line.codeUnitAt(i + 1));
if (digitNonAlnumMap.containsKey(ch) && (prevDigit || nextDigit)) {
buffer.write(digitNonAlnumMap[ch]);
continue;
}
if (code == spaceCodeUnit ||
code == tabCodeUnit ||
code == lineFeedCodeUnit ||
code == carriageReturnCodeUnit) {
buffer.write(ch);
continue;
}
buffer.write(ch);
}
final String withMappedNonAlnum = buffer.toString();
if (!hasNonDigitToken) {
// Only collapse whitespace when every digit group is a single digit,
// which suggests a fragmented number (e.g. "1 2 3 4" → "1234").
// Multi-digit groups separated by spaces are distinct numbers
// (e.g. "4004 5005 6006") and should keep their spaces.
if (_allSingleDigitGroups(withMappedNonAlnum)) {
return withMappedNonAlnum.replaceAll(RegExp(r'\s+'), '');
}
return withMappedNonAlnum;
}
return withMappedNonAlnum.replaceAllMapped(
RegExp(r'(\d)\s+([A-Za-z])(?=\d)'),
(Match match) {
final String left = match.group(regexGroupFirst) ?? '';
final String mid = match.group(regexGroupSecond) ?? '';
final String mapped = digitConfusionMap[mid] ?? mid;
return '$left.$mapped';
},
);
}