Always parse the entire startXRefQueue in XRef.readXRef (issue 15833)

Previously we'd abort all parsing if an Error was encountered, despite the fact that multiple `startXRefQueue`-entries may be available and that continued parsing could thus eventually be able to find usable data. Note that in the referenced PDF document the `startxref`-operator, at the end of the file, points to a position in the middle of an arbitrary `stream` which is why things break.
2022-12-15 13:35:39 +01:00 · 2022-12-15 13:35:39 +01:00 · 26135b0313
commit 26135b0313
parent 8587ce6afd
3 changed files with 19 additions and 12 deletions
--- a/src/core/xref.js
+++ b/src/core/xref.js
@ -665,8 +665,8 @@ class XRef {
    // circular dependency between tables (fixes bug1393476.pdf).
    const startXRefParsedCache = new Set();

-    try {
-      while (this.startXRefQueue.length) {
+    while (this.startXRefQueue.length) {
+      try {
        const startXRef = this.startXRefQueue[0];

        if (startXRefParsedCache.has(startXRef)) {
@ -734,20 +734,18 @@ class XRef {
          // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
          this.startXRefQueue.push(obj.num);
        }
-
-        this.startXRefQueue.shift();
+      } catch (e) {
+        if (e instanceof MissingDataException) {
+          throw e;
+        }
+        info("(while reading XRef): " + e);
      }
-
-      return this.topDict;
-    } catch (e) {
-      if (e instanceof MissingDataException) {
-        throw e;
-      }
-      info("(while reading XRef): " + e);
-
      this.startXRefQueue.shift();
    }

+    if (this.topDict) {
+      return this.topDict;
+    }
    if (recoveryMode) {
      return undefined;
    }
--- a/test/pdfs/issue15833.pdf.link
+++ b/test/pdfs/issue15833.pdf.link
@ -0,0 +1 @@
+https://github.com/mozilla/pdf.js/files/10236962/issue15833.pdf
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@ -3522,6 +3522,14 @@
         "37R": false
       }
    },
+    {  "id": "issue15833",
+       "file": "pdfs/issue15833.pdf",
+       "md5": "9562d027b980ea2779dcfb1669f9cf7e",
+       "link": true,
+       "rounds": 1,
+       "lastPage": 1,
+       "type": "eq"
+    },
    {  "id": "issue11242",
       "file": "pdfs/issue11242_reduced.pdf",
       "md5": "ba50b6ee537f3e815ccfe0c99e598e05",
				`@ -0,0 +1 @@`
				`https://github.com/mozilla/pdf.js/files/10236962/issue15833.pdf`