From d1974eae34bce9e275f1a9c3505012513d7aa45e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 11 Sep 2014 16:33:49 +0200 Subject: [PATCH 1/2] Add peekByte method to Stream, DecodeStream and ChunkedStream --- src/core/chunked_stream.js | 6 ++++++ src/core/parser.js | 2 +- src/core/stream.js | 12 +++++++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/core/chunked_stream.js b/src/core/chunked_stream.js index 7c2aa5bd4..9578c79da 100644 --- a/src/core/chunked_stream.js +++ b/src/core/chunked_stream.js @@ -195,6 +195,12 @@ var ChunkedStream = (function ChunkedStreamClosure() { return bytes.subarray(pos, end); }, + peekByte: function ChunkedStream_peekByte() { + var peekedByte = this.getByte(); + this.pos--; + return peekedByte; + }, + peekBytes: function ChunkedStream_peekBytes(length) { var bytes = this.getBytes(length); this.pos -= bytes.length; diff --git a/src/core/parser.js b/src/core/parser.js index 8b83c5da4..4af57fb26 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -478,7 +478,7 @@ var Lexer = (function LexerClosure() { return (this.currentChar = this.stream.getByte()); }, peekChar: function Lexer_peekChar() { - return this.stream.peekBytes(1)[0]; + return this.stream.peekByte(); }, getNumber: function Lexer_getNumber() { var ch = this.currentChar; diff --git a/src/core/stream.js b/src/core/stream.js index 482091ed7..bd244735c 100644 --- a/src/core/stream.js +++ b/src/core/stream.js @@ -73,6 +73,11 @@ var Stream = (function StreamClosure() { this.pos = end; return bytes.subarray(pos, end); }, + peekByte: function Stream_peekByte() { + var peekedByte = this.getByte(); + this.pos--; + return peekedByte; + }, peekBytes: function Stream_peekBytes(length) { var bytes = this.getBytes(length); this.pos -= bytes.length; @@ -202,6 +207,11 @@ var DecodeStream = (function DecodeStreamClosure() { this.pos = end; return this.buffer.subarray(pos, end); }, + peekByte: function DecodeStream_peekByte() { + var peekedByte = this.getByte(); + this.pos--; + return peekedByte; + }, peekBytes: function DecodeStream_peekBytes(length) { var bytes = this.getBytes(length); this.pos -= bytes.length; @@ -527,7 +537,7 @@ var FlateStream = (function FlateStreamClosure() { var end = bufferLength + blockLen; this.bufferLength = end; if (blockLen === 0) { - if (str.peekBytes(1).length === 0) { + if (str.peekByte() === -1) { this.eof = true; } } else { From 2003d83ea6cfad16b147c86adfe555a998523e4c Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 11 Sep 2014 01:49:14 +0200 Subject: [PATCH 2/2] Fix loading of inline JPEG images --- src/core/parser.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/core/parser.js b/src/core/parser.js index 4af57fb26..73eb75622 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -372,6 +372,22 @@ var Parser = (function ParserClosure() { return new LZWStream(stream, maybeLength, earlyChange); } if (name === 'DCTDecode' || name === 'DCT') { + // According to the specification: for inline images, the ID operator + // shall be followed by a single whitespace character (unless it uses + // ASCII85Decode or ASCIIHexDecode filters). + // In practice this only seems to be followed for inline JPEG images, + // and generally ignoring the first byte of the stream if it is a + // whitespace char can even *cause* issues (e.g. in the CCITTFaxDecode + // filters used in issue2984.pdf). + // Hence when the first byte of the stream of an inline JPEG image is + // a whitespace character, we thus simply skip over it. + if (isCmd(this.buf1, 'ID')) { + var firstByte = stream.peekByte(); + if (firstByte === 0x0A /* LF */ || firstByte === 0x0D /* CR */ || + firstByte === 0x20 /* SPACE */) { + stream.skip(); + } + } xrefStreamStats[StreamType.DCT] = true; return new JpegStream(stream, maybeLength, stream.dict, this.xref); }