calculateSpaceThreshold method
Calculates an appropriate threshold for determining if a gap should be considered a space
This method analyzes the distribution of gaps between artifacts to determine a suitable threshold for identifying spaces.
Returns: An integer representing the minimum gap width to be considered a space
Implementation
int calculateSpaceThreshold(List<int> gaps) {
if (gaps.isEmpty) {
return _minSpaceWidth;
}
gaps.sort();
// Cap the jump threshold at a reasonable multiple of average character width.
// Multi-scale gap distributions (e.g., monospace text with 1-, 2-, and 6-space
// gaps) can cause the algorithm to pick the top-end jump, producing an
// unreasonably high threshold that misses smaller but valid spaces.
// Only apply the cap when there are enough gaps for the median-based
// fallback to produce a reliable threshold.
final int maxReasonable = max(
_minSpaceWidth,
(averageWidth * _maxJumpThresholdWidthRatio).round(),
);
final int jumpThreshold = tryJumpThreshold(gaps, maxReasonable);
if (jumpThreshold > 0) {
final int minFromWidth = (averageWidth * _spaceMinWidthRatio).round();
return max(_minSpaceWidth, max(jumpThreshold, minFromWidth));
}
// If the uncapped best jump is valid but exceeded the cap,
// and we have very few gaps (cap may be unreliable), accept it.
if (gaps.length < _minGapsForJumpCap) {
final int uncapped = tryJumpThreshold(gaps, _maxIntValue);
if (uncapped > 0) {
return max(_minSpaceWidth, uncapped);
}
}
// Use lower percentile (~33rd) instead of median to avoid contamination
// from word-space gaps that inflate the base value in gradual distributions.
final int baseGap = gaps[gaps.length ~/ 3];
final int thresholdFromGaps = (baseGap * _spaceMedianMultiplier).round();
final int thresholdFromWidth = (averageWidth * _spaceMinWidthRatio).round();
return max(_minSpaceWidth, max(thresholdFromGaps, thresholdFromWidth));
}