Attempt to handle corrupt PDF documents that inline Page dictionaries in a Kids array (issue 9540)
According to the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.1942297, the contents of a Kids array should be indirect objects.
This commit is contained in:
parent
6662985a20
commit
d431ae069d
@ -501,20 +501,35 @@ var Catalog = (function CatalogClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
count = currentNode.get('Count');
|
count = currentNode.get('Count');
|
||||||
// Cache the Kids count, since it can reduce redundant lookups in long
|
if (Number.isInteger(count) && count >= 0) {
|
||||||
// documents where all nodes are found at *one* level of the tree.
|
// Cache the Kids count, since it can reduce redundant lookups in
|
||||||
var objId = currentNode.objId;
|
// documents where all nodes are found at *one* level of the tree.
|
||||||
if (objId && !pageKidsCountCache.has(objId)) {
|
var objId = currentNode.objId;
|
||||||
pageKidsCountCache.put(objId, count);
|
if (objId && !pageKidsCountCache.has(objId)) {
|
||||||
}
|
pageKidsCountCache.put(objId, count);
|
||||||
// Skip nodes where the page can't be.
|
}
|
||||||
if (currentPageIndex + count <= pageIndex) {
|
// Skip nodes where the page can't be.
|
||||||
currentPageIndex += count;
|
if (currentPageIndex + count <= pageIndex) {
|
||||||
continue;
|
currentPageIndex += count;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var kids = currentNode.get('Kids');
|
var kids = currentNode.get('Kids');
|
||||||
if (!Array.isArray(kids)) {
|
if (!Array.isArray(kids)) {
|
||||||
|
// Prevent errors in corrupt PDF documents that violate the
|
||||||
|
// specification by *inlining* Page dicts directly in the Kids
|
||||||
|
// array, rather than using indirect objects (fixes issue9540.pdf).
|
||||||
|
if (isName(currentNode.get('Type'), 'Page') ||
|
||||||
|
(!currentNode.has('Type') && currentNode.has('Contents'))) {
|
||||||
|
if (currentPageIndex === pageIndex) {
|
||||||
|
capability.resolve([currentNode, null]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
currentPageIndex++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
capability.reject(new FormatError(
|
capability.reject(new FormatError(
|
||||||
'page dictionary kids object is not an array'));
|
'page dictionary kids object is not an array'));
|
||||||
return;
|
return;
|
||||||
@ -574,11 +589,14 @@ var Catalog = (function CatalogClosure() {
|
|||||||
if (!isRef(kid)) {
|
if (!isRef(kid)) {
|
||||||
throw new FormatError('kid must be a Ref.');
|
throw new FormatError('kid must be a Ref.');
|
||||||
}
|
}
|
||||||
if (kid.num === kidRef.num) {
|
if (isRefsEqual(kid, kidRef)) {
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
|
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
|
||||||
|
if (!isDict(kid)) {
|
||||||
|
throw new FormatError('kid node must be a Dict.');
|
||||||
|
}
|
||||||
if (kid.has('Count')) {
|
if (kid.has('Count')) {
|
||||||
var count = kid.get('Count');
|
var count = kid.get('Count');
|
||||||
total += count;
|
total += count;
|
||||||
|
1
test/pdfs/issue9540.pdf.link
Normal file
1
test/pdfs/issue9540.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://github.com/mozilla/pdf.js/files/1793688/Problem.pdf
|
@ -875,6 +875,13 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue9540",
|
||||||
|
"file": "pdfs/issue9540.pdf",
|
||||||
|
"md5": "7de7979270c9136bdd737428185fbbed",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": true,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "txt2pdf",
|
{ "id": "txt2pdf",
|
||||||
"file": "pdfs/txt2pdf.pdf",
|
"file": "pdfs/txt2pdf.pdf",
|
||||||
"md5": "02cefa0f5e8d96313bb05163b2f88c8c",
|
"md5": "02cefa0f5e8d96313bb05163b2f88c8c",
|
||||||
|
@ -354,6 +354,9 @@ class PDFLinkService {
|
|||||||
* @param {Object} pageRef - reference to the page.
|
* @param {Object} pageRef - reference to the page.
|
||||||
*/
|
*/
|
||||||
cachePageRef(pageNum, pageRef) {
|
cachePageRef(pageNum, pageRef) {
|
||||||
|
if (!pageRef) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
let refStr = pageRef.num + ' ' + pageRef.gen + ' R';
|
let refStr = pageRef.num + ' ' + pageRef.gen + ' R';
|
||||||
this._pagesRefCache[refStr] = pageNum;
|
this._pagesRefCache[refStr] = pageNum;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user