normalizePriceLikeTableRow function

String normalizePriceLikeTableRow(
  1. String line
)

Repairs receipt-style quantity/price rows with noisy decimal separators.

OCR can merge quantity and price tokens into forms like 3.12-99, keep decimal prices as 7-50, or emit comma decimals like 57,48. When the line looks like an item row or uppercase summary row, normalize those price-like tokens without touching short code rows such as SKU B1 7-50.

Implementation

String normalizePriceLikeTableRow(String line) {
  if (line.isEmpty || line.contains(':')) {
    return line;
  }

  final List<String> pieces = RegExp(
    r'\S+|\s+',
  ).allMatches(line).map((Match match) => match.group(0) ?? '').toList();
  if (pieces.isEmpty) {
    return line;
  }

  final List<int> tokenIndexes = <int>[];
  for (int i = 0; i < pieces.length; i++) {
    if (pieces[i].trim().isNotEmpty) {
      tokenIndexes.add(i);
    }
  }
  if (tokenIndexes.isEmpty) {
    return line;
  }

  bool hasStandalonePriceToken = false;
  bool hasMergedQuantityPriceToken = false;
  int quantityTokenCount = 0;
  for (final int tokenIndex in tokenIndexes) {
    final String token = pieces[tokenIndex];
    if (_isStandalonePriceToken(token)) {
      hasStandalonePriceToken = true;
      continue;
    }
    if (_isMergedQuantityPriceToken(token)) {
      hasMergedQuantityPriceToken = true;
      continue;
    }
    if (RegExp(r'^\d+$').hasMatch(token)) {
      quantityTokenCount++;
    }
  }

  if (!hasStandalonePriceToken && !hasMergedQuantityPriceToken) {
    return line;
  }

  final String firstToken = pieces[tokenIndexes.first];
  final bool itemRow =
      RegExp(r'^[A-Za-z]{3,}$').hasMatch(firstToken) &&
      firstToken != firstToken.toUpperCase() &&
      (hasMergedQuantityPriceToken ||
          (hasStandalonePriceToken && quantityTokenCount > 0));
  final bool summaryRow =
      tokenIndexes.length == _receiptSummaryTokenCount &&
      RegExp(r'^[A-Z]{5,}$').hasMatch(firstToken) &&
      hasStandalonePriceToken;
  if (!itemRow && !summaryRow) {
    return line;
  }

  for (final int tokenIndex in tokenIndexes) {
    final String token = pieces[tokenIndex];
    if (_isMergedQuantityPriceToken(token)) {
      pieces[tokenIndex] = _normalizeMergedQuantityPriceToken(token);
      continue;
    }
    if (_isStandalonePriceToken(token)) {
      pieces[tokenIndex] = _normalizeStandalonePriceToken(token);
    }
  }

  if (itemRow &&
      firstToken.length >= _receiptRowMinNameLength &&
      firstToken == firstToken.toLowerCase()) {
    pieces[tokenIndexes.first] = toTitleCaseWord(firstToken);
  }

  if (summaryRow && firstToken.length < _receiptSummaryMinLabelLength) {
    return line;
  }

  return pieces.join();
}