Attempt to handle corrupt PDF documents that inline Page dictionaries in a Kids array (issue 9540)

According to the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.1942297, the contents of a Kids array should be indirect objects.
This commit is contained in:
Jonas Jenwald 2018-03-12 14:00:37 +01:00
parent 6662985a20
commit d431ae069d
4 changed files with 40 additions and 11 deletions

View File

@ -501,20 +501,35 @@ var Catalog = (function CatalogClosure() {
}
count = currentNode.get('Count');
// Cache the Kids count, since it can reduce redundant lookups in long
// documents where all nodes are found at *one* level of the tree.
var objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
var objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
}
var kids = currentNode.get('Kids');
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
if (isName(currentNode.get('Type'), 'Page') ||
(!currentNode.has('Type') && currentNode.has('Contents'))) {
if (currentPageIndex === pageIndex) {
capability.resolve([currentNode, null]);
return;
}
currentPageIndex++;
continue;
}
capability.reject(new FormatError(
'page dictionary kids object is not an array'));
return;
@ -574,11 +589,14 @@ var Catalog = (function CatalogClosure() {
if (!isRef(kid)) {
throw new FormatError('kid must be a Ref.');
}
if (kid.num === kidRef.num) {
if (isRefsEqual(kid, kidRef)) {
found = true;
break;
}
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
if (!isDict(kid)) {
throw new FormatError('kid node must be a Dict.');
}
if (kid.has('Count')) {
var count = kid.get('Count');
total += count;

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/1793688/Problem.pdf

View File

@ -875,6 +875,13 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue9540",
"file": "pdfs/issue9540.pdf",
"md5": "7de7979270c9136bdd737428185fbbed",
"rounds": 1,
"link": true,
"type": "eq"
},
{ "id": "txt2pdf",
"file": "pdfs/txt2pdf.pdf",
"md5": "02cefa0f5e8d96313bb05163b2f88c8c",

View File

@ -354,6 +354,9 @@ class PDFLinkService {
* @param {Object} pageRef - reference to the page.
*/
cachePageRef(pageNum, pageRef) {
if (!pageRef) {
return;
}
let refStr = pageRef.num + ' ' + pageRef.gen + ' R';
this._pagesRefCache[refStr] = pageNum;
}