From 47f9eef584390b1a5e7a5e923fbfe425e9560e35 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 10 Dec 2021 11:45:09 +0100 Subject: [PATCH] Improve `PDFDocument.checkLastPage` for documents with corrupt XRef tables (PR 14311, 14335 follow-up) Rather than trying, and failing, to fetch the entire /Pages-tree for documents with corrupt XRef tables, let's fallback to indexing all objects *before* trying to invoke the `Catalog.getAllPageDicts` method. --- src/core/document.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 0706eb888..5d7768a52 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1344,7 +1344,7 @@ class PDFDocument { // Clear out the various caches to ensure that we haven't stored any // inconsistent and/or incorrect state, since that could easily break // subsequent `this.getPage` calls. - this._pagePromises.clear(); + this._pagePromises.delete(0); await this.cleanup(); throw new XRefParseException(); @@ -1380,20 +1380,23 @@ class PDFDocument { } await this.getPage(numPages - 1); } catch (reason) { - warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`); // Clear out the various caches to ensure that we haven't stored any // inconsistent and/or incorrect state, since that could easily break // subsequent `this.getPage` calls. + this._pagePromises.delete(numPages - 1); await this.cleanup(); + if (reason instanceof XRefEntryException && !recoveryMode) { + throw new XRefParseException(); + } + warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`); + let pagesTree; try { pagesTree = await pdfManager.ensureCatalog("getAllPageDicts"); } catch (reasonAll) { - if (reasonAll instanceof XRefEntryException) { - if (!recoveryMode) { - throw new XRefParseException(); - } + if (reasonAll instanceof XRefEntryException && !recoveryMode) { + throw new XRefParseException(); } catalog.setActualNumPages(1); return;