extractImages method
Extrai imagens do PDF com suporte a faixa de páginas.
Se includeUnusedXObjects for true, considera todos os XObjects da
página, sem checar se foram usados no content stream.
Implementation
List<PdfImageInfo> extractImages({
int? fromPage,
int? toPage,
bool includeUnusedXObjects = false,
}) {
_ensureXrefParsed();
final trailer = _trailerInfo ??
PdfParserXref.readTrailerInfoFromReader(reader, xrefOffset);
final rootObjId = trailer.rootObj;
if (rootObjId == null) return const <PdfImageInfo>[];
final rootObj = _getObjectNoStream(rootObjId) ?? _getObject(rootObjId);
if (rootObj == null || rootObj.value is! PdfDictToken) {
return const <PdfImageInfo>[];
}
final rootDict = rootObj.value as PdfDictToken;
final pagesRef =
PdfParserObjects.asRef(rootDict.values[PdfNameTokens.pages]);
var pageRefs = pagesRef != null
? _collectPageRefs(pagesRef, maxPages: toPage)
: <PdfRefToken>[];
if ((_repairAttempted || pageRefs.isEmpty) && _allowRepair) {
pageRefs = _collectPageRefsByScan(maxPages: toPage);
}
// Deduplicate by image object id across pages, matching mutool info behavior.
final images = <PdfImageInfo>[];
final seenImageRefs = <int>{};
for (int i = 0; i < pageRefs.length; i++) {
final pageIndex = i + 1;
if (fromPage != null && pageIndex < fromPage) continue;
if (toPage != null && pageIndex > toPage) continue;
final pageRef = pageRefs[i];
final pageObj =
_getObjectNoStream(pageRef.obj) ?? _getObject(pageRef.obj);
if (pageObj == null || pageObj.value is! PdfDictToken) continue;
final pageDict = pageObj.value as PdfDictToken;
final resDict = _resolvePageResources(pageDict);
final usedXObjects = includeUnusedXObjects
? null
: _extractXObjectNamesFromContent(pageDict).toSet();
_collectImagesFromResources(
resources: resDict,
pageIndex: pageIndex,
pageRef: PdfIndirectRef(pageRef.obj, pageRef.gen),
out: images,
seenImageRefs: seenImageRefs,
allowedTopLevelXObjects: usedXObjects,
);
}
// Last resort fallback for damaged structures where resources cannot be
// resolved. Mapping by index is best-effort and less reliable than
// resource traversal.
if (images.isEmpty && _allowRepair && pageRefs.isNotEmpty) {
final scanned = _collectImagesByScan();
if (scanned.length == pageRefs.length) {
for (var i = 0; i < scanned.length; i++) {
final pageIndex = i + 1;
if (fromPage != null && pageIndex < fromPage) continue;
if (toPage != null && pageIndex > toPage) continue;
final pageRef = pageRefs[i];
final img = scanned[i];
images.add(PdfImageInfo(
pageIndex: pageIndex,
pageRef: PdfIndirectRef(pageRef.obj, pageRef.gen),
imageRef: img.imageRef,
width: img.width,
height: img.height,
bitsPerComponent: img.bitsPerComponent,
colorSpace: img.colorSpace,
filter: img.filter,
));
}
} else {
final firstPageRef = pageRefs.first;
for (var i = 0; i < scanned.length; i++) {
final pageIndex = i + 1;
if (fromPage != null && pageIndex < fromPage) continue;
if (toPage != null && pageIndex > toPage) continue;
final img = scanned[i];
images.add(PdfImageInfo(
pageIndex: pageIndex,
pageRef: PdfIndirectRef(firstPageRef.obj, firstPageRef.gen),
imageRef: img.imageRef,
width: img.width,
height: img.height,
bitsPerComponent: img.bitsPerComponent,
colorSpace: img.colorSpace,
filter: img.filter,
));
}
}
}
return images;
}