From 3ce8fe79270e9a58ec17eb9e837f2cfdbc048322 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 4 Mar 2019 23:17:21 +0100 Subject: [PATCH] Handle corrupt ASCII85Decode inline images with whitespace "inside" of the EOD marker (issue 10614) There's a number of things wrong with the PDF document, since its inline images are first all *a lot* larger than the 4 KB limit (as mandated by the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.1852045). Furthermore the actual ASCII85Decode data is interspersed with *a lot* of needless whitespace, in particular also "inside" of the EOD (end-of-data) marker which thus completely breaks the detection. Note that according to the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.1940130, this patch should be safe since it explicitly mentions that *all* whitespace should be ignored. --- src/core/parser.js | 18 +++++++++++++----- test/pdfs/issue10614.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 test/pdfs/issue10614.pdf.link diff --git a/src/core/parser.js b/src/core/parser.js index c01001fca..8eb8a578c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -336,14 +336,22 @@ var Parser = (function ParserClosure() { * Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream. * @returns {number} The inline stream length. */ - findASCII85DecodeInlineStreamEnd: - function Parser_findASCII85DecodeInlineStreamEnd(stream) { + findASCII85DecodeInlineStreamEnd(stream) { var TILDE = 0x7E, GT = 0x3E; var startPos = stream.pos, ch, length; while ((ch = stream.getByte()) !== -1) { - if (ch === TILDE && stream.peekByte() === GT) { - stream.skip(); - break; + if (ch === TILDE) { + ch = stream.peekByte(); + // Handle corrupt PDF documents which contains whitespace "inside" of + // the EOD marker (fixes issue10614.pdf). + while (isSpace(ch)) { + stream.skip(); + ch = stream.peekByte(); + } + if (ch === GT) { + stream.skip(); + break; + } } } length = stream.pos - startPos; diff --git a/test/pdfs/issue10614.pdf.link b/test/pdfs/issue10614.pdf.link new file mode 100644 index 000000000..203d534fa --- /dev/null +++ b/test/pdfs/issue10614.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/2927954/FOX.ALEXANDER.F.VS.FOX.BARBARA.E.2015-004684-FC-04.Doc-46-Memorandum-of-Law.Fla.11th.Cir.Ct.May.31.2016.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 5baf33905..b5d8649f0 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1465,6 +1465,14 @@ "link": false, "type": "text" }, + { "id": "issue10614", + "file": "pdfs/issue10614.pdf", + "md5": "c41da60ce9af100cb78e1c2a6ba18232", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "issue6071", "file": "pdfs/issue6071.pdf", "md5": "2e08526d8e7c9ba4269fc12ef488d3eb",