normalizePriceLikeTableRow function
Repairs receipt-style quantity/price rows with noisy decimal separators.
OCR can merge quantity and price tokens into forms like 3.12-99, keep
decimal prices as 7-50, or emit comma decimals like 57,48. When the
line looks like an item row or uppercase summary row, normalize those
price-like tokens without touching short code rows such as SKU B1 7-50.
Implementation
String normalizePriceLikeTableRow(String line) {
if (line.isEmpty || line.contains(':')) {
return line;
}
final List<String> pieces = RegExp(
r'\S+|\s+',
).allMatches(line).map((Match match) => match.group(0) ?? '').toList();
if (pieces.isEmpty) {
return line;
}
final List<int> tokenIndexes = <int>[];
for (int i = 0; i < pieces.length; i++) {
if (pieces[i].trim().isNotEmpty) {
tokenIndexes.add(i);
}
}
if (tokenIndexes.isEmpty) {
return line;
}
bool hasStandalonePriceToken = false;
bool hasMergedQuantityPriceToken = false;
int quantityTokenCount = 0;
for (final int tokenIndex in tokenIndexes) {
final String token = pieces[tokenIndex];
if (_isStandalonePriceToken(token)) {
hasStandalonePriceToken = true;
continue;
}
if (_isMergedQuantityPriceToken(token)) {
hasMergedQuantityPriceToken = true;
continue;
}
if (RegExp(r'^\d+$').hasMatch(token)) {
quantityTokenCount++;
}
}
if (!hasStandalonePriceToken && !hasMergedQuantityPriceToken) {
return line;
}
final String firstToken = pieces[tokenIndexes.first];
final bool itemRow =
RegExp(r'^[A-Za-z]{3,}$').hasMatch(firstToken) &&
firstToken != firstToken.toUpperCase() &&
(hasMergedQuantityPriceToken ||
(hasStandalonePriceToken && quantityTokenCount > 0));
final bool summaryRow =
tokenIndexes.length == _receiptSummaryTokenCount &&
RegExp(r'^[A-Z]{5,}$').hasMatch(firstToken) &&
hasStandalonePriceToken;
if (!itemRow && !summaryRow) {
return line;
}
for (final int tokenIndex in tokenIndexes) {
final String token = pieces[tokenIndex];
if (_isMergedQuantityPriceToken(token)) {
pieces[tokenIndex] = _normalizeMergedQuantityPriceToken(token);
continue;
}
if (_isStandalonePriceToken(token)) {
pieces[tokenIndex] = _normalizeStandalonePriceToken(token);
}
}
if (itemRow &&
firstToken.length >= _receiptRowMinNameLength &&
firstToken == firstToken.toLowerCase()) {
pieces[tokenIndexes.first] = toTitleCaseWord(firstToken);
}
if (summaryRow && firstToken.length < _receiptSummaryMinLabelLength) {
return line;
}
return pieces.join();
}