Merge pull request #14358 from Snuffleupagus/checkLastPage-improvements
Improve `PDFDocument.checkLastPage`/`Catalog.getAllPageDicts` for documents with corrupt XRef tables (PR 14311, 14335 follow-up)
This commit is contained in:
commit
a6dd39b645
@ -1225,7 +1225,7 @@ class Catalog {
|
|||||||
* Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback.
|
* Eagerly fetches the entire /Pages-tree; should ONLY be used as a fallback.
|
||||||
* @returns {Map}
|
* @returns {Map}
|
||||||
*/
|
*/
|
||||||
getAllPageDicts() {
|
getAllPageDicts(recoveryMode = false) {
|
||||||
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
|
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
|
||||||
const visitedNodes = new RefSet();
|
const visitedNodes = new RefSet();
|
||||||
const map = new Map();
|
const map = new Map();
|
||||||
@ -1234,8 +1234,8 @@ class Catalog {
|
|||||||
function addPageDict(pageDict, pageRef) {
|
function addPageDict(pageDict, pageRef) {
|
||||||
map.set(pageIndex++, [pageDict, pageRef]);
|
map.set(pageIndex++, [pageDict, pageRef]);
|
||||||
}
|
}
|
||||||
function addPageError(msg) {
|
function addPageError(error) {
|
||||||
map.set(pageIndex++, [new FormatError(msg), null]);
|
map.set(pageIndex++, [error, null]);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (queue.length > 0) {
|
while (queue.length > 0) {
|
||||||
@ -1249,12 +1249,16 @@ class Catalog {
|
|||||||
if (ex instanceof MissingDataException) {
|
if (ex instanceof MissingDataException) {
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
if (ex instanceof XRefEntryException) {
|
if (ex instanceof XRefEntryException && !recoveryMode) {
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
addPageError(ex);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!Array.isArray(kids)) {
|
if (!Array.isArray(kids)) {
|
||||||
addPageError("Page dictionary kids object is not an array.");
|
addPageError(
|
||||||
|
new FormatError("Page dictionary kids object is not an array.")
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1272,13 +1276,17 @@ class Catalog {
|
|||||||
if (ex instanceof MissingDataException) {
|
if (ex instanceof MissingDataException) {
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
if (ex instanceof XRefEntryException) {
|
if (ex instanceof XRefEntryException && !recoveryMode) {
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
addPageError(ex);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
// Prevent circular references in the /Pages tree.
|
// Prevent circular references in the /Pages tree.
|
||||||
if (visitedNodes.has(kidObj)) {
|
if (visitedNodes.has(kidObj)) {
|
||||||
addPageError("Pages tree contains circular reference.");
|
addPageError(
|
||||||
|
new FormatError("Pages tree contains circular reference.")
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
visitedNodes.put(kidObj);
|
visitedNodes.put(kidObj);
|
||||||
@ -1290,7 +1298,9 @@ class Catalog {
|
|||||||
}
|
}
|
||||||
if (!(obj instanceof Dict)) {
|
if (!(obj instanceof Dict)) {
|
||||||
addPageError(
|
addPageError(
|
||||||
"Page dictionary kid reference points to wrong type of object."
|
new FormatError(
|
||||||
|
"Page dictionary kid reference points to wrong type of object."
|
||||||
|
)
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1344,7 +1344,7 @@ class PDFDocument {
|
|||||||
// Clear out the various caches to ensure that we haven't stored any
|
// Clear out the various caches to ensure that we haven't stored any
|
||||||
// inconsistent and/or incorrect state, since that could easily break
|
// inconsistent and/or incorrect state, since that could easily break
|
||||||
// subsequent `this.getPage` calls.
|
// subsequent `this.getPage` calls.
|
||||||
this._pagePromises.clear();
|
this._pagePromises.delete(0);
|
||||||
await this.cleanup();
|
await this.cleanup();
|
||||||
|
|
||||||
throw new XRefParseException();
|
throw new XRefParseException();
|
||||||
@ -1380,20 +1380,25 @@ class PDFDocument {
|
|||||||
}
|
}
|
||||||
await this.getPage(numPages - 1);
|
await this.getPage(numPages - 1);
|
||||||
} catch (reason) {
|
} catch (reason) {
|
||||||
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
|
|
||||||
// Clear out the various caches to ensure that we haven't stored any
|
// Clear out the various caches to ensure that we haven't stored any
|
||||||
// inconsistent and/or incorrect state, since that could easily break
|
// inconsistent and/or incorrect state, since that could easily break
|
||||||
// subsequent `this.getPage` calls.
|
// subsequent `this.getPage` calls.
|
||||||
|
this._pagePromises.delete(numPages - 1);
|
||||||
await this.cleanup();
|
await this.cleanup();
|
||||||
|
|
||||||
|
if (reason instanceof XRefEntryException && !recoveryMode) {
|
||||||
|
throw new XRefParseException();
|
||||||
|
}
|
||||||
|
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
|
||||||
|
|
||||||
let pagesTree;
|
let pagesTree;
|
||||||
try {
|
try {
|
||||||
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts");
|
pagesTree = await pdfManager.ensureCatalog("getAllPageDicts", [
|
||||||
|
recoveryMode,
|
||||||
|
]);
|
||||||
} catch (reasonAll) {
|
} catch (reasonAll) {
|
||||||
if (reasonAll instanceof XRefEntryException) {
|
if (reasonAll instanceof XRefEntryException && !recoveryMode) {
|
||||||
if (!recoveryMode) {
|
throw new XRefParseException();
|
||||||
throw new XRefParseException();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catalog.setActualNumPages(1);
|
catalog.setActualNumPages(1);
|
||||||
return;
|
return;
|
||||||
|
Loading…
Reference in New Issue
Block a user