Always parse the entire startXRefQueue in XRef.readXRef (issue 15833)

Previously we'd abort all parsing if an Error was encountered, despite the fact that multiple `startXRefQueue`-entries may be available and that continued parsing could thus eventually be able to find usable data.

Note that in the referenced PDF document the `startxref`-operator, at the end of the file, points to a position in the middle of an arbitrary `stream` which is why things break.
This commit is contained in:
Jonas Jenwald 2022-12-15 13:35:39 +01:00
parent 8587ce6afd
commit 26135b0313
3 changed files with 19 additions and 12 deletions

View File

@ -665,8 +665,8 @@ class XRef {
// circular dependency between tables (fixes bug1393476.pdf).
const startXRefParsedCache = new Set();
try {
while (this.startXRefQueue.length) {
while (this.startXRefQueue.length) {
try {
const startXRef = this.startXRefQueue[0];
if (startXRefParsedCache.has(startXRef)) {
@ -734,20 +734,18 @@ class XRef {
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num);
}
this.startXRefQueue.shift();
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
info("(while reading XRef): " + e);
}
return this.topDict;
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
info("(while reading XRef): " + e);
this.startXRefQueue.shift();
}
if (this.topDict) {
return this.topDict;
}
if (recoveryMode) {
return undefined;
}

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/10236962/issue15833.pdf

View File

@ -3522,6 +3522,14 @@
"37R": false
}
},
{ "id": "issue15833",
"file": "pdfs/issue15833.pdf",
"md5": "9562d027b980ea2779dcfb1669f9cf7e",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue11242",
"file": "pdfs/issue11242_reduced.pdf",
"md5": "ba50b6ee537f3e815ccfe0c99e598e05",