diff --git a/src/core/xref.js b/src/core/xref.js index 7c2c2172a..acd3ec3a4 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -107,14 +107,26 @@ class XRef { } warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); } - if (root instanceof Dict && root.has("Pages")) { - this.root = root; - } else { - if (!recoveryMode) { - throw new XRefParseException(); + if (root instanceof Dict) { + try { + const pages = root.get("Pages"); + if (pages instanceof Dict) { + this.root = root; + return; + } + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Pages" reference: "${ex}".`); } - throw new FormatError("Invalid root reference"); } + + if (!recoveryMode) { + throw new XRefParseException(); + } + // Even recovery failed, there's nothing more we can do here. + throw new InvalidPDFException("Invalid Root reference."); } processXRefTable(parser) { @@ -417,6 +429,7 @@ class XRef { // Clear out any existing entries, since they may be bogus. this.entries.length = 0; + this._cacheMap.clear(); const stream = this.stream; stream.pos = 0; diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 3a2532584..94309d9e1 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -445,7 +445,7 @@ describe("api", function () { await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); - it("creates pdf doc from PDF file with bad XRef table", async function () { + it("creates pdf doc from PDF file with bad XRef entry", async function () { // A corrupt PDF file, where the XRef table have (some) bogus entries. const loadingTask = getDocument( buildGetDocumentParams("PDFBOX-4352-0.pdf", { @@ -468,6 +468,26 @@ describe("api", function () { await loadingTask.destroy(); }); + it("creates pdf doc from PDF file with bad XRef header", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf") + ); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument = await loadingTask.promise; + expect(pdfDocument.numPages).toEqual(1); + + const page = await pdfDocument.getPage(1); + expect(page instanceof PDFPageProxy).toEqual(true); + + const opList = await page.getOperatorList(); + expect(opList.fnArray.length).toEqual(0); + expect(opList.argsArray.length).toEqual(0); + expect(opList.lastChunk).toEqual(true); + + await loadingTask.destroy(); + }); + it("creates pdf doc from PDF file with bad XRef byteWidths", async function () { // A corrupt PDF file, where the XRef /W-array have (some) bogus entries. const loadingTask = getDocument( @@ -488,6 +508,25 @@ describe("api", function () { await loadingTask.destroy(); }); + it("creates pdf doc from PDF file with inaccessible /Pages tree", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("poppler-395-0-fuzzed.pdf") + ); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + try { + await loadingTask.promise; + + // Shouldn't get here. + expect(false).toEqual(true); + } catch (reason) { + expect(reason instanceof InvalidPDFException).toEqual(true); + expect(reason.message).toEqual("Invalid Root reference."); + } + + await loadingTask.destroy(); + }); + it("creates pdf doc from PDF files, with bad /Pages tree /Count", async function () { const loadingTask1 = getDocument( buildGetDocumentParams("poppler-67295-0.pdf") @@ -495,30 +534,23 @@ describe("api", function () { const loadingTask2 = getDocument( buildGetDocumentParams("poppler-85140-0.pdf") ); - const loadingTask3 = getDocument( - buildGetDocumentParams("poppler-395-0-fuzzed.pdf") - ); - const loadingTask4 = getDocument( - buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf") - ); expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); - expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true); - expect(loadingTask4 instanceof PDFDocumentLoadingTask).toEqual(true); const pdfDocument1 = await loadingTask1.promise; const pdfDocument2 = await loadingTask2.promise; - const pdfDocument3 = await loadingTask3.promise; - const pdfDocument4 = await loadingTask4.promise; expect(pdfDocument1.numPages).toEqual(1); expect(pdfDocument2.numPages).toEqual(1); - expect(pdfDocument3.numPages).toEqual(1); - expect(pdfDocument4.numPages).toEqual(1); - const pageA = await pdfDocument1.getPage(1); - expect(pageA instanceof PDFPageProxy).toEqual(true); + const page = await pdfDocument1.getPage(1); + expect(page instanceof PDFPageProxy).toEqual(true); + + const opList = await page.getOperatorList(); + expect(opList.fnArray.length).toBeGreaterThan(5); + expect(opList.argsArray.length).toBeGreaterThan(5); + expect(opList.lastChunk).toEqual(true); try { await pdfDocument2.getPage(1); @@ -529,28 +561,6 @@ describe("api", function () { expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R"); } - try { - await pdfDocument3.getPage(1); - - // Shouldn't get here. - expect(false).toEqual(true); - } catch (reason) { - expect(reason instanceof UnknownErrorException).toEqual(true); - expect(reason.message).toEqual( - "Page dictionary kid reference points to wrong type of object." - ); - } - try { - await pdfDocument4.getPage(1); - - // Shouldn't get here. - expect(false).toEqual(true); - } catch (reason) { - expect(reason instanceof UnknownErrorException).toEqual(true); - expect(reason.message).toEqual( - "Page dictionary kid reference points to wrong type of object." - ); - } await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); });