Improve robustness of stream parser (invalid length)

When the parser finds a stream, it retrieves the Length from the stream
dictionary and advances the lexer to the offset as specified in Length.
If this Length is incorrect, the lexer could end up anywhere.

When the lexer gets in an invalid state, it could throw errors. For
example, in issue 6108, the lexer ends up inside the stream data. This
stream has the ASCIIHexDecode filter, so all characters are made up from
ASCII characters, and the lexer interprets it as a command token. Tokens
cannot be longer than 127 bytes, so eventually 128 bytes are consumed
and the lexer throws "Command token too long" error.

Another possible error is "Illegal character: 41" when the lexer happens
to end up at a ')' due to the length mismatch.

These problems are solved by catching lexer errors and recovering the
parser via the existing stream length detection branch.
This commit is contained in:
Rob Wu 2015-07-11 12:15:43 +02:00
parent 7d4303b7c4
commit e211c25f06
4 changed files with 49 additions and 3 deletions

View File

@ -53,6 +53,16 @@ var Parser = (function ParserClosure() {
this.buf2 = this.lexer.getObj();
}
},
tryShift: function Parser_tryShift() {
try {
this.shift();
return true;
} catch (e) {
// Upon failure, the caller should reset this.lexer.pos to a known good
// state and call this.shift() twice to reset the buffers.
return false;
}
},
getObj: function Parser_getObj(cipherTransform) {
var buf1 = this.buf1;
this.shift();
@ -426,9 +436,10 @@ var Parser = (function ParserClosure() {
stream.pos = pos + length;
lexer.nextChar();
this.shift(); // '>>'
this.shift(); // 'stream'
if (!isCmd(this.buf1, 'endstream')) {
// Shift '>>' and check whether the new object marks the end of the stream
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream'
} else {
// bad stream length, scanning for endstream
stream.pos = pos;
var SCAN_BLOCK_SIZE = 2048;

View File

@ -146,3 +146,4 @@
!issue6068.pdf
!issue6081.pdf
!issue6069.pdf
!issue6108.pdf

28
test/pdfs/issue6108.pdf Normal file
View File

@ -0,0 +1,28 @@
%PDF-1.0
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 400 50]>>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Arial>>>>>>/Contents 4 0 R>>
endobj
4 0 obj
<</Length 9/Filter/ASCIIHexDecode>>
stream
42542F4631203132205466203230203230205464202841534349494865784465636F64652066696C7465722C20736D616C6C204C656E6774682C2073697A652064696666206973206D6F7265207468616E203133352062797465732920546A204554>
endstream
endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000054 00000 n
0000000127 00000 n
0000000254 00000 n
trailer
<</Root 1 0 R/Size 5>>
startxref
519
%%EOF

View File

@ -2284,5 +2284,11 @@
"md5": "d0ad8871f4116bca8e39513ffa8b7d8e",
"rounds": 1,
"type": "load"
},
{ "id": "issue6108",
"file": "pdfs/issue6108.pdf",
"md5": "8961cb55149495989a80bf0487e0f076",
"rounds": 1,
"type": "load"
}
]