From 28d2ada59ca2a19c0206f06aeb29fefb5309ec28 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Jun 2020 12:36:28 +0200 Subject: [PATCH] Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124) This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1] There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here. *Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere. --- [1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618. --- src/core/parser.js | 48 +++++++++++++++++++++++++++++++++++++--- test/pdfs/.gitignore | 1 + test/pdfs/issue11124.pdf | 33 +++++++++++++++++++++++++++ test/test_manifest.json | 6 +++++ 4 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue11124.pdf diff --git a/src/core/parser.js b/src/core/parser.js index 06c8bd855..25ec213f0 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -203,10 +203,11 @@ class Parser { I = 0x49, SPACE = 0x20, LF = 0xa, - CR = 0xd; - const n = 10, + CR = 0xd, NUL = 0x0; - const startPos = stream.pos; + const lexer = this.lexer, + startPos = stream.pos, + n = 10; let state = 0, ch, maybeEIPos; @@ -243,6 +244,25 @@ class Parser { break; } } + + if (state !== 2) { + continue; + } + // Check that the "EI" sequence isn't part of the image data, since + // that would cause the image to be truncated (fixes issue11124.pdf). + if (lexer.knownCommands) { + const nextObj = lexer.peekObj(); + if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) { + // Not a valid command, i.e. the inline image data *itself* + // contains an "EI" sequence. Resetting the state. + state = 0; + } + } else { + warn( + "findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined." + ); + } + if (state === 2) { break; // Finished! } @@ -1276,6 +1296,28 @@ class Lexer { return Cmd.get(str); } + peekObj() { + const streamPos = this.stream.pos, + currentChar = this.currentChar, + beginInlineImagePos = this.beginInlineImagePos; + + let nextObj; + try { + nextObj = this.getObj(); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`peekObj: ${ex}`); + } + // Ensure that we reset *all* relevant `Lexer`-instance state. + this.stream.pos = streamPos; + this.currentChar = currentChar; + this.beginInlineImagePos = beginInlineImagePos; + + return nextObj; + } + skipToNextLine() { let ch = this.currentChar; while (ch >= 0) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 4d463ddc9..c8cca34e8 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -254,6 +254,7 @@ !issue6336.pdf !issue6387.pdf !issue6410.pdf +!issue11124.pdf !issue8586.pdf !jbig2_symbol_offset.pdf !gradientfill.pdf diff --git a/test/pdfs/issue11124.pdf b/test/pdfs/issue11124.pdf new file mode 100644 index 000000000..3ad8b25f3 --- /dev/null +++ b/test/pdfs/issue11124.pdf @@ -0,0 +1,33 @@ +%PDF-1.3 +%âãÏÓ +1 0 obj<> +endobj +2 0 obj<> +endobj +3 0 obj<> +endobj +4 0 obj<>>> +endobj +5 0 obj<> +stream +100 0 0 100 0 0 cm +BI /W 4 /H 4 /CS /RGB /BPC 8 +ID +00000z0z00zzz00z0zzz0zzzEI aazazaazzzaazazzzazzz +EI + +endstream +endobj +xref +0 6 +0000000000 65535 f +0000000015 00000 n +0000000059 00000 n +0000000156 00000 n +0000000207 00000 n +0000000341 00000 n +trailer +<]/Info 2 0 R/Root 1 0 R/Size 6>> +startxref +492 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index f724bff8c..24f2570f8 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3147,6 +3147,12 @@ "type": "text", "about": "Invisible (and broken) TrueType font used for text-selection." }, + { "id": "issue11124", + "file": "pdfs/issue11124.pdf", + "md5": "9bde831515dc6b8bb2c7c00c8189aca9", + "rounds": 1, + "type": "eq" + }, { "id": "issue11768", "file": "pdfs/issue11768_reduced.pdf", "md5": "0cafde97d78bb6883531a325a996a5ef",