findCharacterRects function
Finds character segments within a text line using vertical projection (pixel count per column).
For clean digital text with separated characters, each character occupies a contiguous range of columns separated by zero-valued column gaps.
Multi-part characters (i, j, :, ;, !, %, =, ") naturally stay grouped because their components share the same column range.
When attemptSplitting is true, segments significantly wider than the
median width are checked for valleys and split if touching characters
are detected.
Returns IntRect regions for each character segment, sorted left to right.
Implementation
List<IntRect> findCharacterRects(
Artifact image, {
bool attemptSplitting = true,
}) {
if (image.isEmpty) {
return const [];
}
final int width = image.cols;
final int height = image.rows;
// Build column histogram
final List<int> colHistogram = List<int>.filled(width, 0);
for (int x = 0; x < width; x++) {
for (int y = 0; y < height; y++) {
if (image.cellGet(x, y)) {
colHistogram[x]++;
}
}
}
// Adaptive threshold: columns with very few pixels are treated as empty.
// This handles stray anti-aliasing pixels in inter-character gaps.
int maxCol = 0;
for (int x = 0; x < width; x++) {
if (colHistogram[x] > maxCol) {
maxCol = colHistogram[x];
}
}
final int emptyThreshold = max(1, (maxCol * _emptyColumnRatio).floor());
// Find contiguous column ranges with pixel density above the threshold
final List<IntRect> segments = [];
int segStart = -1;
for (int x = 0; x < width; x++) {
if (colHistogram[x] >= emptyThreshold) {
if (segStart < 0) {
segStart = x;
}
} else {
if (segStart >= 0) {
segments.add(IntRect.fromLTWH(segStart, 0, x - segStart, height));
segStart = -1;
}
}
}
// Close final segment
if (segStart >= 0) {
segments.add(IntRect.fromLTWH(segStart, 0, width - segStart, height));
}
if (!attemptSplitting || segments.length < _minSegmentsForMedian) {
return segments;
}
// Split touching characters: segments wider than threshold
return _splitTouchingSegments(image, segments);
}