Merge pull request #16461 from Snuffleupagus/issue-16454

Improve "EI" detection in inline images (PR 12028 follow-up, issue 16454)
This commit is contained in:
Jonas Jenwald 2023-05-23 22:23:22 +02:00 committed by GitHub
commit a6f9505a39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 56 additions and 36 deletions

View File

@ -26,6 +26,7 @@ import {
MissingDataException,
ParserEOFException,
} from "./core_utils.js";
import { NullStream, Stream } from "./stream.js";
import { Ascii85Stream } from "./ascii_85_stream.js";
import { AsciiHexStream } from "./ascii_hex_stream.js";
import { CCITTFaxStream } from "./ccitt_stream.js";
@ -34,7 +35,6 @@ import { Jbig2Stream } from "./jbig2_stream.js";
import { JpegStream } from "./jpeg_stream.js";
import { JpxStream } from "./jpx_stream.js";
import { LZWStream } from "./lzw_stream.js";
import { NullStream } from "./stream.js";
import { PredictorStream } from "./predictor_stream.js";
import { RunLengthStream } from "./run_length_stream.js";
@ -190,9 +190,9 @@ class Parser {
LF = 0xa,
CR = 0xd,
NUL = 0x0;
const lexer = this.lexer,
const { knownCommands } = this.lexer,
startPos = stream.pos,
n = 10;
n = 15;
let state = 0,
ch,
maybeEIPos;
@ -209,7 +209,12 @@ class Parser {
maybeEIPos = stream.pos;
// Let's check that the next `n` bytes are ASCII... just to be sure.
const followingBytes = stream.peekBytes(n);
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
const ii = followingBytes.length;
if (ii === 0) {
break; // The end of the stream was reached, nothing to check.
}
for (let i = 0; i < ii; i++) {
ch = followingBytes[i];
if (ch === NUL && followingBytes[i + 1] !== NUL) {
// NUL bytes are not supposed to occur *outside* of inline
@ -235,19 +240,47 @@ class Parser {
if (state !== 2) {
continue;
}
// Check that the "EI" sequence isn't part of the image data, since
// that would cause the image to be truncated (fixes issue11124.pdf).
if (lexer.knownCommands) {
const nextObj = lexer.peekObj();
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
// Not a valid command, i.e. the inline image data *itself*
// contains an "EI" sequence. Resetting the state.
state = 0;
}
} else {
if (!knownCommands) {
warn(
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
);
continue;
}
// Check that the "EI" sequence isn't part of the image data, since
// that would cause the image to be truncated (fixes issue11124.pdf).
const tmpLexer = new Lexer(
new Stream(followingBytes.slice()),
knownCommands
);
// Reduce the number of (potential) warning messages.
tmpLexer._hexStringWarn = () => {};
let numArgs = 0;
while (true) {
const nextObj = tmpLexer.getObj();
if (nextObj === EOF) {
state = 0; // No valid command found, resetting the state.
break;
}
if (nextObj instanceof Cmd) {
const knownCommand = knownCommands[nextObj.cmd];
if (!knownCommand) {
// Not a valid command, i.e. the inline image data *itself*
// contains an "EI" sequence. Resetting the state.
state = 0;
break;
} else if (
knownCommand.variableArgs
? numArgs <= knownCommand.numArgs
: numArgs === knownCommand.numArgs
) {
break; // Valid command found.
}
numArgs = 0;
continue;
}
numArgs++;
}
if (state === 2) {
@ -1284,28 +1317,6 @@ class Lexer {
return Cmd.get(str);
}
peekObj() {
const streamPos = this.stream.pos,
currentChar = this.currentChar,
beginInlineImagePos = this.beginInlineImagePos;
let nextObj;
try {
nextObj = this.getObj();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`peekObj: ${ex}`);
}
// Ensure that we reset *all* relevant `Lexer`-instance state.
this.stream.pos = streamPos;
this.currentChar = currentChar;
this.beginInlineImagePos = beginInlineImagePos;
return nextObj;
}
skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) {

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/11537582/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf

View File

@ -5972,6 +5972,14 @@
"link": true,
"type": "eq"
},
{ "id": "issue16454",
"file": "pdfs/issue16454.pdf",
"md5": "82fe0c54a96667472ce999be7a789199",
"rounds": 1,
"lastPage": 1,
"link": true,
"type": "eq"
},
{ "id": "decodeACSuccessive",
"file": "pdfs/decodeACSuccessive.pdf",
"md5": "7749c032624fe27ab8e8d7d5e9a4a93f",