From afcc99a86d62e1c4d21b5bda27d71fe2ab36c01c Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 13 Nov 2021 13:06:21 +0100 Subject: [PATCH] When parsing corrupt documents without any trailer-dictionary, fallback to the "top"-dictionary (issue 14269) There's obviously no guarantee that this will work in general, if the document is sufficiently corrupt, but it should hopefully be better than just throwing `InvalidPDFException` as currently happens. Please note that, as is often the case with corrupt documents, it's somewhat difficult to know if we're rendering the document "correctly" with this patch[1]. In this case even Adobe Reader cannot open the document, which is always a good sign that it's *really* corrupt, however we're at least able to render *something* with this patch. --- [1] Whatever "correct" even means when dealing with corrupt PDF documents, where often times different PDF viewers won't agree completely. --- src/core/xref.js | 6 ++++++ test/pdfs/issue14269.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 3 files changed, 15 insertions(+) create mode 100644 test/pdfs/issue14269.pdf.link diff --git a/src/core/xref.js b/src/core/xref.js index 401fb0024..35ea6e7d9 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -590,6 +590,10 @@ class XRef { if (trailerDict) { return trailerDict; } + // No trailer dictionary found, taking the "top"-dictionary (if exists). + if (this.topDict) { + return this.topDict; + } // nothing helps throw new InvalidPDFException("Invalid PDF structure."); } @@ -680,6 +684,8 @@ class XRef { throw e; } info("(while reading XRef): " + e); + + this.startXRefQueue.shift(); } if (recoveryMode) { diff --git a/test/pdfs/issue14269.pdf.link b/test/pdfs/issue14269.pdf.link new file mode 100644 index 000000000..02d7011a1 --- /dev/null +++ b/test/pdfs/issue14269.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/7529789/test.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index d06be788a..1bd10bd55 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -91,6 +91,14 @@ "rounds": 1, "type": "eq" }, + { "id": "issue14269", + "file": "pdfs/issue14269.pdf", + "md5": "f34abf77a418f54e13fbcd03b063432e", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "issue11549", "file": "pdfs/issue11549_reduced.pdf", "md5": "a1ea636f413e02e10dbdf379ab4a99ae",