Always check all Kids nodes, in Catalog.getPageDict, to avoid getting stuck in an empty node further down in the Pages tree (issue 8088)

As discussed on IRC, we need to check all nodes at the *bottom* of the tree to ensure that we find the correct `Page` dict.
Furthermore, this patch also gets rid of the caching present in a previous version, since it's not clear if that really helps.

Note that this patch purposely adds an `eq` test, using a reduced test-case, so that we can be sure that the algorithm actually finds the correct `Page` dict for each `pageIndex`.

Fixes 8088.
This commit is contained in:
Jonas Jenwald 2017-02-23 13:28:50 +01:00
parent cada411af4
commit 1ce295541c
4 changed files with 182 additions and 25 deletions

View File

@ -452,7 +452,6 @@ var Catalog = (function CatalogClosure() {
var nodesToVisit = [this.catDict.getRaw('Pages')];
var currentPageIndex = 0;
var xref = this.xref;
var checkAllKids = false;
function next() {
while (nodesToVisit.length) {
@ -476,16 +475,10 @@ var Catalog = (function CatalogClosure() {
}
// Must be a child page dictionary.
assert(
isDict(currentNode),
'page dictionary kid reference points to wrong type of object'
);
assert(isDict(currentNode),
'page dictionary kid reference points to wrong type of object');
var count = currentNode.get('Count');
// If the current node doesn't have any children, avoid getting stuck
// in an empty node further down in the tree (see issue5644.pdf).
if (count === 0) {
checkAllKids = true;
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
@ -494,23 +487,14 @@ var Catalog = (function CatalogClosure() {
var kids = currentNode.get('Kids');
assert(isArray(kids), 'page dictionary kids object is not an array');
if (!checkAllKids && count === kids.length) {
// Nodes that don't have the page have been skipped and this is the
// bottom of the tree which means the page requested must be a
// descendant of this pages node. Ideally we would just resolve the
// promise with the page ref here, but there is the case where more
// pages nodes could link to single a page (see issue 3666 pdf). To
// handle this push it back on the queue so if it is a pages node it
// will be descended into.
nodesToVisit = [kids[pageIndex - currentPageIndex]];
currentPageIndex = pageIndex;
continue;
} else {
// Always check all `Kids` nodes, to avoid getting stuck in an empty
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
// and to ensure that we actually find the correct `Page` dict.
for (var last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
}
}
}
capability.reject('Page index ' + pageIndex + ' not found.');
}
next();

View File

@ -46,6 +46,7 @@
!issue7872.pdf
!issue7901.pdf
!issue8061.pdf
!issue8088.pdf
!bad-PageLabels.pdf
!filled-background.pdf
!ArabicCIDTrueType.pdf

163
test/pdfs/issue8088.pdf Normal file
View File

@ -0,0 +1,163 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Parent 2 0 R
/Resources
<<
/Font
<<
/F1 3 0 R
>>
>>
/MediaBox [0 0 200 50]
/Type /Page
/Contents 4 0 R
>>
endobj
4 0 obj
<<
/Length 50
>>
stream
BT
10 20 TD
/F1 20 Tf
(Issue 8088 - Page 2) Tj
ET
endstream
endobj
5 0 obj
<<
/Parent 6 0 R
/Resources
<<
/Font
<<
/F1 3 0 R
>>
>>
/MediaBox [0 0 200 50]
/Type /Page
/Contents 7 0 R
>>
endobj
7 0 obj
<<
/Length 50
>>
stream
BT
10 20 TD
/F1 20 Tf
(Issue 8088 - Page 3) Tj
ET
endstream
endobj
8 0 obj
<<
/Parent 9 0 R
/Kids []
/Type /Pages
/Count 0
>>
endobj
2 0 obj
<<
/Parent 9 0 R
/Kids [10 0 R 1 0 R]
/Type /Pages
/Count 2
>>
endobj
9 0 obj
<<
/Parent 11 0 R
/Kids [8 0 R 2 0 R]
/Type /Pages
/Count 2
>>
endobj
6 0 obj
<<
/Parent 11 0 R
/Kids [5 0 R]
/Type /Pages
/Count 1
>>
endobj
11 0 obj
<<
/Kids [9 0 R 6 0 R]
/Type /Pages
/Count 3
>>
endobj
12 0 obj
<<
/Pages 11 0 R
/Type /Catalog
>>
endobj
10 0 obj
<<
/Parent 2 0 R
/Resources
<<
/Font
<<
/F1 3 0 R
>>
>>
/MediaBox [0 0 200 50]
/Type /Page
/Contents 13 0 R
>>
endobj
13 0 obj
<<
/Length 50
>>
stream
BT
10 20 TD
/F1 20 Tf
(Issue 8088 - Page 1) Tj
ET
endstream
endobj
3 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Encoding /WinAnsiEncoding
/Type /Font
>>
endobj xref
0 14
0000000000 65535 f
0000000015 00000 n
0000000547 00000 n
0000001135 00000 n
0000000144 00000 n
0000000247 00000 n
0000000707 00000 n
0000000376 00000 n
0000000479 00000 n
0000000627 00000 n
0000000900 00000 n
0000000781 00000 n
0000000847 00000 n
0000001031 00000 n
trailer
<<
/Root 12 0 R
/Size 14
>>
startxref
1235
%%EOF

View File

@ -1318,6 +1318,15 @@
"lastPage": 6,
"type": "eq"
},
{ "id": "issue8088",
"file": "pdfs/issue8088.pdf",
"md5": "5bbc33c7433799487518eb0d8094348c",
"rounds": 1,
"link": false,
"firstPage": 1,
"lastPage": 3,
"type": "eq"
},
{ "id": "bug866395",
"file": "pdfs/bug866395.pdf",
"md5": "f03bc77e84637241980b09a0a220f575",