From d431ae069d1d431f33fceb0847cf39f103019c34 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 12 Mar 2018 14:00:37 +0100 Subject: [PATCH] Attempt to handle corrupt PDF documents that inline Page dictionaries in a Kids array (issue 9540) According to the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.1942297, the contents of a Kids array should be indirect objects. --- src/core/obj.js | 40 ++++++++++++++++++++++++++---------- test/pdfs/issue9540.pdf.link | 1 + test/test_manifest.json | 7 +++++++ web/pdf_link_service.js | 3 +++ 4 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 test/pdfs/issue9540.pdf.link diff --git a/src/core/obj.js b/src/core/obj.js index c7058689e..e155d0c24 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -501,20 +501,35 @@ var Catalog = (function CatalogClosure() { } count = currentNode.get('Count'); - // Cache the Kids count, since it can reduce redundant lookups in long - // documents where all nodes are found at *one* level of the tree. - var objId = currentNode.objId; - if (objId && !pageKidsCountCache.has(objId)) { - pageKidsCountCache.put(objId, count); - } - // Skip nodes where the page can't be. - if (currentPageIndex + count <= pageIndex) { - currentPageIndex += count; - continue; + if (Number.isInteger(count) && count >= 0) { + // Cache the Kids count, since it can reduce redundant lookups in + // documents where all nodes are found at *one* level of the tree. + var objId = currentNode.objId; + if (objId && !pageKidsCountCache.has(objId)) { + pageKidsCountCache.put(objId, count); + } + // Skip nodes where the page can't be. + if (currentPageIndex + count <= pageIndex) { + currentPageIndex += count; + continue; + } } var kids = currentNode.get('Kids'); if (!Array.isArray(kids)) { + // Prevent errors in corrupt PDF documents that violate the + // specification by *inlining* Page dicts directly in the Kids + // array, rather than using indirect objects (fixes issue9540.pdf). + if (isName(currentNode.get('Type'), 'Page') || + (!currentNode.has('Type') && currentNode.has('Contents'))) { + if (currentPageIndex === pageIndex) { + capability.resolve([currentNode, null]); + return; + } + currentPageIndex++; + continue; + } + capability.reject(new FormatError( 'page dictionary kids object is not an array')); return; @@ -574,11 +589,14 @@ var Catalog = (function CatalogClosure() { if (!isRef(kid)) { throw new FormatError('kid must be a Ref.'); } - if (kid.num === kidRef.num) { + if (isRefsEqual(kid, kidRef)) { found = true; break; } kidPromises.push(xref.fetchAsync(kid).then(function (kid) { + if (!isDict(kid)) { + throw new FormatError('kid node must be a Dict.'); + } if (kid.has('Count')) { var count = kid.get('Count'); total += count; diff --git a/test/pdfs/issue9540.pdf.link b/test/pdfs/issue9540.pdf.link new file mode 100644 index 000000000..80f03cae8 --- /dev/null +++ b/test/pdfs/issue9540.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/1793688/Problem.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 0906ffd9a..a17b05e82 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -875,6 +875,13 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue9540", + "file": "pdfs/issue9540.pdf", + "md5": "7de7979270c9136bdd737428185fbbed", + "rounds": 1, + "link": true, + "type": "eq" + }, { "id": "txt2pdf", "file": "pdfs/txt2pdf.pdf", "md5": "02cefa0f5e8d96313bb05163b2f88c8c", diff --git a/web/pdf_link_service.js b/web/pdf_link_service.js index 91df46320..2e4662f0e 100644 --- a/web/pdf_link_service.js +++ b/web/pdf_link_service.js @@ -354,6 +354,9 @@ class PDFLinkService { * @param {Object} pageRef - reference to the page. */ cachePageRef(pageNum, pageRef) { + if (!pageRef) { + return; + } let refStr = pageRef.num + ' ' + pageRef.gen + ' R'; this._pagesRefCache[refStr] = pageNum; }