extractText method
Extract plain text from the page.
Equivalent to PyMuPDF's textpage.extractText().
Implementation
String extractText() {
if (_textRuns.isEmpty) return '';
final sorted = List<_TextRun>.from(_textRuns)
..sort((a, b) {
final dy = a.y.compareTo(b.y);
if (dy != 0) return dy;
return a.x.compareTo(b.x);
});
final buffer = StringBuffer();
double lastY = double.negativeInfinity;
double lastX = 0;
final lineThreshold = 5.0;
for (final run in sorted) {
final text = run.text;
if (text.isEmpty) continue;
if ((run.y - lastY).abs() > lineThreshold) {
// New line
if (buffer.isNotEmpty) buffer.write('\n');
buffer.write(text);
} else {
// Same line — check x gap to decide if space is needed
final gap = run.x - lastX;
final spaceWidth = run.fontSize * 0.25;
if (gap > spaceWidth && !text.startsWith(' ')) {
buffer.write(' ');
}
buffer.write(text);
}
lastY = run.y;
lastX = run.x + run.approxWidth;
}
// PyMuPDF always appends a trailing newline
buffer.write('\n');
return buffer.toString();
}