findTextLineRects function

List<IntRect> findTextLineRects(
  1. Artifact image
)

Finds text line rows using horizontal projection (pixel count per row).

Scans the image row-by-row and groups contiguous non-empty rows into text line regions. Returns IntRect regions for each text line, sorted top to bottom.

For clean digital text, inter-line gaps have zero pixel rows.

Implementation

List<IntRect> findTextLineRects(Artifact image) {
  if (image.isEmpty) {
    return const [];
  }

  final List<IntRect> lines = [];
  final int width = image.cols;
  final int height = image.rows;

  // Build row histogram
  final List<int> rowHistogram = List<int>.filled(height, 0);
  for (int y = 0; y < height; y++) {
    for (int x = 0; x < width; x++) {
      if (image.cellGet(x, y)) {
        rowHistogram[y]++;
      }
    }
  }

  // For rows, use zero-based threshold: a row is empty only if it truly
  // has no pixels. The adaptive ratio used for columns would incorrectly
  // split lines with descenders (g, p, y) or dots (i, j).
  int lineStart = -1;

  for (int y = 0; y < height; y++) {
    if (rowHistogram[y] > _minPixelCount) {
      // Row has content
      if (lineStart < 0) {
        lineStart = y;
      }
    } else {
      // Blank row
      if (lineStart >= 0) {
        lines.add(IntRect.fromLTWH(0, lineStart, width, y - lineStart));
        lineStart = -1;
      }
    }
  }

  // Close final line if image ends with content
  if (lineStart >= 0) {
    lines.add(IntRect.fromLTWH(0, lineStart, width, height - lineStart));
  }

  return lines;
}