Merge pull request #14358 from Snuffleupagus/checkLastPage-improvements

Improve `PDFDocument.checkLastPage`/`Catalog.getAllPageDicts` for documents with corrupt XRef tables (PR 14311, 14335 follow-up)
This commit is contained in:
Tim van der Meij 2021-12-11 13:07:54 +01:00 committed by GitHub
commit a6dd39b645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 15 deletions

View File

@ -1225,7 +1225,7 @@ class Catalog {
* Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback. * Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback.
* @returns {Map} * @returns {Map}
*/ */
getAllPageDicts() { getAllPageDicts(recoveryMode = false) {
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }]; const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
const visitedNodes = new RefSet(); const visitedNodes = new RefSet();
const map = new Map(); const map = new Map();
@ -1234,8 +1234,8 @@ class Catalog {
function addPageDict(pageDict, pageRef) { function addPageDict(pageDict, pageRef) {
map.set(pageIndex++, [pageDict, pageRef]); map.set(pageIndex++, [pageDict, pageRef]);
} }
function addPageError(msg) { function addPageError(error) {
map.set(pageIndex++, [new FormatError(msg), null]); map.set(pageIndex++, [error, null]);
} }
while (queue.length > 0) { while (queue.length > 0) {
@ -1249,12 +1249,16 @@ class Catalog {
if (ex instanceof MissingDataException) { if (ex instanceof MissingDataException) {
throw ex; throw ex;
} }
if (ex instanceof XRefEntryException) { if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex; throw ex;
} }
addPageError(ex);
break;
} }
if (!Array.isArray(kids)) { if (!Array.isArray(kids)) {
addPageError("Page dictionary kids object is not an array."); addPageError(
new FormatError("Page dictionary kids object is not an array.")
);
break; break;
} }
@ -1272,13 +1276,17 @@ class Catalog {
if (ex instanceof MissingDataException) { if (ex instanceof MissingDataException) {
throw ex; throw ex;
} }
if (ex instanceof XRefEntryException) { if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex; throw ex;
} }
addPageError(ex);
break;
} }
// Prevent circular references in the /Pages tree. // Prevent circular references in the /Pages tree.
if (visitedNodes.has(kidObj)) { if (visitedNodes.has(kidObj)) {
addPageError("Pages tree contains circular reference."); addPageError(
new FormatError("Pages tree contains circular reference.")
);
break; break;
} }
visitedNodes.put(kidObj); visitedNodes.put(kidObj);
@ -1290,7 +1298,9 @@ class Catalog {
} }
if (!(obj instanceof Dict)) { if (!(obj instanceof Dict)) {
addPageError( addPageError(
"Page dictionary kid reference points to wrong type of object." new FormatError(
"Page dictionary kid reference points to wrong type of object."
)
); );
break; break;
} }

View File

@ -1344,7 +1344,7 @@ class PDFDocument {
// Clear out the various caches to ensure that we haven't stored any // Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break // inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls. // subsequent `this.getPage` calls.
this._pagePromises.clear(); this._pagePromises.delete(0);
await this.cleanup(); await this.cleanup();
throw new XRefParseException(); throw new XRefParseException();
@ -1380,20 +1380,25 @@ class PDFDocument {
} }
await this.getPage(numPages - 1); await this.getPage(numPages - 1);
} catch (reason) { } catch (reason) {
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
// Clear out the various caches to ensure that we haven't stored any // Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break // inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls. // subsequent `this.getPage` calls.
this._pagePromises.delete(numPages - 1);
await this.cleanup(); await this.cleanup();
if (reason instanceof XRefEntryException && !recoveryMode) {
throw new XRefParseException();
}
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
let pagesTree; let pagesTree;
try { try {
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts"); pagesTree = await pdfManager.ensureCatalog("getAllPageDicts", [
recoveryMode,
]);
} catch (reasonAll) { } catch (reasonAll) {
if (reasonAll instanceof XRefEntryException) { if (reasonAll instanceof XRefEntryException && !recoveryMode) {
if (!recoveryMode) { throw new XRefParseException();
throw new XRefParseException();
}
} }
catalog.setActualNumPages(1); catalog.setActualNumPages(1);
return; return;