From e211c25f069f1eba005a9c2f37c5eea13107c256 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Sat, 11 Jul 2015 12:15:43 +0200 Subject: [PATCH] Improve robustness of stream parser (invalid length) When the parser finds a stream, it retrieves the Length from the stream dictionary and advances the lexer to the offset as specified in Length. If this Length is incorrect, the lexer could end up anywhere. When the lexer gets in an invalid state, it could throw errors. For example, in issue 6108, the lexer ends up inside the stream data. This stream has the ASCIIHexDecode filter, so all characters are made up from ASCII characters, and the lexer interprets it as a command token. Tokens cannot be longer than 127 bytes, so eventually 128 bytes are consumed and the lexer throws "Command token too long" error. Another possible error is "Illegal character: 41" when the lexer happens to end up at a ')' due to the length mismatch. These problems are solved by catching lexer errors and recovering the parser via the existing stream length detection branch. --- src/core/parser.js | 17 ++++++++++++++--- test/pdfs/.gitignore | 1 + test/pdfs/issue6108.pdf | 28 ++++++++++++++++++++++++++++ test/test_manifest.json | 6 ++++++ 4 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue6108.pdf diff --git a/src/core/parser.js b/src/core/parser.js index 3fc9034f7..6c98b085a 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -53,6 +53,16 @@ var Parser = (function ParserClosure() { this.buf2 = this.lexer.getObj(); } }, + tryShift: function Parser_tryShift() { + try { + this.shift(); + return true; + } catch (e) { + // Upon failure, the caller should reset this.lexer.pos to a known good + // state and call this.shift() twice to reset the buffers. + return false; + } + }, getObj: function Parser_getObj(cipherTransform) { var buf1 = this.buf1; this.shift(); @@ -426,9 +436,10 @@ var Parser = (function ParserClosure() { stream.pos = pos + length; lexer.nextChar(); - this.shift(); // '>>' - this.shift(); // 'stream' - if (!isCmd(this.buf1, 'endstream')) { + // Shift '>>' and check whether the new object marks the end of the stream + if (this.tryShift() && isCmd(this.buf2, 'endstream')) { + this.shift(); // 'stream' + } else { // bad stream length, scanning for endstream stream.pos = pos; var SCAN_BLOCK_SIZE = 2048; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d428da4f4..a79dc29b0 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -146,3 +146,4 @@ !issue6068.pdf !issue6081.pdf !issue6069.pdf +!issue6108.pdf diff --git a/test/pdfs/issue6108.pdf b/test/pdfs/issue6108.pdf new file mode 100644 index 000000000..ccf374caa --- /dev/null +++ b/test/pdfs/issue6108.pdf @@ -0,0 +1,28 @@ +%PDF-1.0 +1 0 obj +<> +endobj +2 0 obj +<> +endobj +3 0 obj +<>>>>>/Contents 4 0 R>> +endobj +4 0 obj +<> +stream +42542F4631203132205466203230203230205464202841534349494865784465636F64652066696C7465722C20736D616C6C204C656E6774682C2073697A652064696666206973206D6F7265207468616E203133352062797465732920546A204554> +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000009 00000 n +0000000054 00000 n +0000000127 00000 n +0000000254 00000 n +trailer +<> +startxref +519 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index c303e9f6b..43d01e4f7 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2284,5 +2284,11 @@ "md5": "d0ad8871f4116bca8e39513ffa8b7d8e", "rounds": 1, "type": "load" + }, + { "id": "issue6108", + "file": "pdfs/issue6108.pdf", + "md5": "8961cb55149495989a80bf0487e0f076", + "rounds": 1, + "type": "load" } ]