diff --git a/src/core/parser.js b/src/core/parser.js index 371eed242..8a0c92e58 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -26,6 +26,7 @@ import { MissingDataException, ParserEOFException, } from "./core_utils.js"; +import { NullStream, Stream } from "./stream.js"; import { Ascii85Stream } from "./ascii_85_stream.js"; import { AsciiHexStream } from "./ascii_hex_stream.js"; import { CCITTFaxStream } from "./ccitt_stream.js"; @@ -34,7 +35,6 @@ import { Jbig2Stream } from "./jbig2_stream.js"; import { JpegStream } from "./jpeg_stream.js"; import { JpxStream } from "./jpx_stream.js"; import { LZWStream } from "./lzw_stream.js"; -import { NullStream } from "./stream.js"; import { PredictorStream } from "./predictor_stream.js"; import { RunLengthStream } from "./run_length_stream.js"; @@ -190,9 +190,9 @@ class Parser { LF = 0xa, CR = 0xd, NUL = 0x0; - const lexer = this.lexer, + const { knownCommands } = this.lexer, startPos = stream.pos, - n = 10; + n = 15; let state = 0, ch, maybeEIPos; @@ -209,7 +209,12 @@ class Parser { maybeEIPos = stream.pos; // Let's check that the next `n` bytes are ASCII... just to be sure. const followingBytes = stream.peekBytes(n); - for (let i = 0, ii = followingBytes.length; i < ii; i++) { + + const ii = followingBytes.length; + if (ii === 0) { + break; // The end of the stream was reached, nothing to check. + } + for (let i = 0; i < ii; i++) { ch = followingBytes[i]; if (ch === NUL && followingBytes[i + 1] !== NUL) { // NUL bytes are not supposed to occur *outside* of inline @@ -235,19 +240,47 @@ class Parser { if (state !== 2) { continue; } - // Check that the "EI" sequence isn't part of the image data, since - // that would cause the image to be truncated (fixes issue11124.pdf). - if (lexer.knownCommands) { - const nextObj = lexer.peekObj(); - if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) { - // Not a valid command, i.e. the inline image data *itself* - // contains an "EI" sequence. Resetting the state. - state = 0; - } - } else { + if (!knownCommands) { warn( "findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined." ); + continue; + } + // Check that the "EI" sequence isn't part of the image data, since + // that would cause the image to be truncated (fixes issue11124.pdf). + const tmpLexer = new Lexer( + new Stream(followingBytes.slice()), + knownCommands + ); + // Reduce the number of (potential) warning messages. + tmpLexer._hexStringWarn = () => {}; + let numArgs = 0; + + while (true) { + const nextObj = tmpLexer.getObj(); + + if (nextObj === EOF) { + state = 0; // No valid command found, resetting the state. + break; + } + if (nextObj instanceof Cmd) { + const knownCommand = knownCommands[nextObj.cmd]; + if (!knownCommand) { + // Not a valid command, i.e. the inline image data *itself* + // contains an "EI" sequence. Resetting the state. + state = 0; + break; + } else if ( + knownCommand.variableArgs + ? numArgs <= knownCommand.numArgs + : numArgs === knownCommand.numArgs + ) { + break; // Valid command found. + } + numArgs = 0; + continue; + } + numArgs++; } if (state === 2) { @@ -1284,28 +1317,6 @@ class Lexer { return Cmd.get(str); } - peekObj() { - const streamPos = this.stream.pos, - currentChar = this.currentChar, - beginInlineImagePos = this.beginInlineImagePos; - - let nextObj; - try { - nextObj = this.getObj(); - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - warn(`peekObj: ${ex}`); - } - // Ensure that we reset *all* relevant `Lexer`-instance state. - this.stream.pos = streamPos; - this.currentChar = currentChar; - this.beginInlineImagePos = beginInlineImagePos; - - return nextObj; - } - skipToNextLine() { let ch = this.currentChar; while (ch >= 0) { diff --git a/test/pdfs/issue16454.pdf.link b/test/pdfs/issue16454.pdf.link new file mode 100644 index 000000000..228782c9f --- /dev/null +++ b/test/pdfs/issue16454.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/11537582/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index f461a447a..bb17c1f18 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -5963,6 +5963,14 @@ "link": true, "type": "eq" }, + { "id": "issue16454", + "file": "pdfs/issue16454.pdf", + "md5": "82fe0c54a96667472ce999be7a789199", + "rounds": 1, + "lastPage": 1, + "link": true, + "type": "eq" + }, { "id": "decodeACSuccessive", "file": "pdfs/decodeACSuccessive.pdf", "md5": "7749c032624fe27ab8e8d7d5e9a4a93f",