Merge pull request #14358 from Snuffleupagus/checkLastPage-improvements

Improve `PDFDocument.checkLastPage`/`Catalog.getAllPageDicts` for documents with corrupt XRef tables (PR 14311, 14335 follow-up)
This commit is contained in:
Tim van der Meij 2021-12-11 13:07:54 +01:00 committed by GitHub
commit a6dd39b645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 15 deletions

View File

@ -1225,7 +1225,7 @@ class Catalog {
* Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback.
* @returns {Map}
*/
getAllPageDicts() {
getAllPageDicts(recoveryMode = false) {
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
const visitedNodes = new RefSet();
const map = new Map();
@ -1234,8 +1234,8 @@ class Catalog {
function addPageDict(pageDict, pageRef) {
map.set(pageIndex++, [pageDict, pageRef]);
}
function addPageError(msg) {
map.set(pageIndex++, [new FormatError(msg), null]);
function addPageError(error) {
map.set(pageIndex++, [error, null]);
}
while (queue.length > 0) {
@ -1249,12 +1249,16 @@ class Catalog {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException) {
if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex;
}
addPageError(ex);
break;
}
if (!Array.isArray(kids)) {
addPageError("Page dictionary kids object is not an array.");
addPageError(
new FormatError("Page dictionary kids object is not an array.")
);
break;
}
@ -1272,13 +1276,17 @@ class Catalog {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException) {
if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex;
}
addPageError(ex);
break;
}
// Prevent circular references in the /Pages tree.
if (visitedNodes.has(kidObj)) {
addPageError("Pages tree contains circular reference.");
addPageError(
new FormatError("Pages tree contains circular reference.")
);
break;
}
visitedNodes.put(kidObj);
@ -1290,7 +1298,9 @@ class Catalog {
}
if (!(obj instanceof Dict)) {
addPageError(
"Page dictionary kid reference points to wrong type of object."
new FormatError(
"Page dictionary kid reference points to wrong type of object."
)
);
break;
}

View File

@ -1344,7 +1344,7 @@ class PDFDocument {
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls.
this._pagePromises.clear();
this._pagePromises.delete(0);
await this.cleanup();
throw new XRefParseException();
@ -1380,20 +1380,25 @@ class PDFDocument {
}
await this.getPage(numPages - 1);
} catch (reason) {
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls.
this._pagePromises.delete(numPages - 1);
await this.cleanup();
if (reason instanceof XRefEntryException && !recoveryMode) {
throw new XRefParseException();
}
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
let pagesTree;
try {
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts");
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts", [
recoveryMode,
]);
} catch (reasonAll) {
if (reasonAll instanceof XRefEntryException) {
if (!recoveryMode) {
throw new XRefParseException();
}
if (reasonAll instanceof XRefEntryException && !recoveryMode) {
throw new XRefParseException();
}
catalog.setActualNumPages(1);
return;