diff --git a/src/core/catalog.js b/src/core/catalog.js index 9bad3bd47..a5355ad57 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1091,15 +1091,14 @@ class Catalog { const visitedNodes = new RefSet(); const xref = this.xref, pageKidsCountCache = this.pageKidsCountCache; - let count, - currentPageIndex = 0; + let currentPageIndex = 0; function next() { while (nodesToVisit.length) { const currentNode = nodesToVisit.pop(); if (isRef(currentNode)) { - count = pageKidsCountCache.get(currentNode); + const count = pageKidsCountCache.get(currentNode); // Skip nodes where the page can't be. if (count > 0 && currentPageIndex + count < pageIndex) { currentPageIndex += count; @@ -1146,7 +1145,14 @@ class Catalog { return; } - count = currentNode.get("Count"); + let count; + try { + count = currentNode.get("Count"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + } if (Number.isInteger(count) && count >= 0 && !skipCount) { // Cache the Kids count, since it can reduce redundant lookups in // documents where all nodes are found at *one* level of the tree. @@ -1161,13 +1167,28 @@ class Catalog { } } - const kids = currentNode.get("Kids"); + let kids; + try { + kids = currentNode.get("Kids"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + } if (!Array.isArray(kids)) { // Prevent errors in corrupt PDF documents that violate the // specification by *inlining* Page dicts directly in the Kids // array, rather than using indirect objects (fixes issue9540.pdf). + let type; + try { + type = currentNode.get("Type"); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + } if ( - isName(currentNode.get("Type"), "Page") || + isName(type, "Page") || (!currentNode.has("Type") && currentNode.has("Contents")) ) { if (currentPageIndex === pageIndex) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d8681a1e7..d8f7c6a11 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -494,3 +494,5 @@ !poppler-85140-0.pdf !poppler-91414-0-53.pdf !poppler-91414-0-54.pdf +!poppler-742-0-fuzzed.pdf +!poppler-937-0-fuzzed.pdf diff --git a/test/pdfs/poppler-742-0-fuzzed.pdf b/test/pdfs/poppler-742-0-fuzzed.pdf new file mode 100644 index 000000000..cc9758b35 Binary files /dev/null and b/test/pdfs/poppler-742-0-fuzzed.pdf differ diff --git a/test/pdfs/poppler-937-0-fuzzed.pdf b/test/pdfs/poppler-937-0-fuzzed.pdf new file mode 100644 index 000000000..fe47fd57d Binary files /dev/null and b/test/pdfs/poppler-937-0-fuzzed.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 6b8c1c4f2..33226b46a 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -545,6 +545,48 @@ describe("api", function () { await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); + + it("creates pdf doc from PDF files, with bad /Pages tree /Kids entries", async function () { + const loadingTask1 = getDocument( + buildGetDocumentParams("poppler-742-0-fuzzed.pdf") + ); + const loadingTask2 = getDocument( + buildGetDocumentParams("poppler-937-0-fuzzed.pdf") + ); + expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); + expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument1 = await loadingTask1.promise; + const pdfDocument2 = await loadingTask2.promise; + + expect(pdfDocument1.numPages).toEqual(1); + expect(pdfDocument2.numPages).toEqual(1); + + try { + await pdfDocument1.getPage(1); + + // Shouldn't get here. + expect(false).toEqual(true); + } catch (reason) { + expect(reason instanceof UnknownErrorException).toEqual(true); + expect(reason.message).toEqual( + "Page dictionary kids object is not an array." + ); + } + try { + await pdfDocument2.getPage(1); + + // Shouldn't get here. + expect(false).toEqual(true); + } catch (reason) { + expect(reason instanceof UnknownErrorException).toEqual(true); + expect(reason.message).toEqual( + "Page dictionary kids object is not an array." + ); + } + + await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); + }); }); describe("PDFWorker", function () {