Merge pull request #16461 from Snuffleupagus/issue-16454
Improve "EI" detection in inline images (PR 12028 follow-up, issue 16454)
This commit is contained in:
commit
a6f9505a39
@ -26,6 +26,7 @@ import {
|
||||
MissingDataException,
|
||||
ParserEOFException,
|
||||
} from "./core_utils.js";
|
||||
import { NullStream, Stream } from "./stream.js";
|
||||
import { Ascii85Stream } from "./ascii_85_stream.js";
|
||||
import { AsciiHexStream } from "./ascii_hex_stream.js";
|
||||
import { CCITTFaxStream } from "./ccitt_stream.js";
|
||||
@ -34,7 +35,6 @@ import { Jbig2Stream } from "./jbig2_stream.js";
|
||||
import { JpegStream } from "./jpeg_stream.js";
|
||||
import { JpxStream } from "./jpx_stream.js";
|
||||
import { LZWStream } from "./lzw_stream.js";
|
||||
import { NullStream } from "./stream.js";
|
||||
import { PredictorStream } from "./predictor_stream.js";
|
||||
import { RunLengthStream } from "./run_length_stream.js";
|
||||
|
||||
@ -190,9 +190,9 @@ class Parser {
|
||||
LF = 0xa,
|
||||
CR = 0xd,
|
||||
NUL = 0x0;
|
||||
const lexer = this.lexer,
|
||||
const { knownCommands } = this.lexer,
|
||||
startPos = stream.pos,
|
||||
n = 10;
|
||||
n = 15;
|
||||
let state = 0,
|
||||
ch,
|
||||
maybeEIPos;
|
||||
@ -209,7 +209,12 @@ class Parser {
|
||||
maybeEIPos = stream.pos;
|
||||
// Let's check that the next `n` bytes are ASCII... just to be sure.
|
||||
const followingBytes = stream.peekBytes(n);
|
||||
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
|
||||
|
||||
const ii = followingBytes.length;
|
||||
if (ii === 0) {
|
||||
break; // The end of the stream was reached, nothing to check.
|
||||
}
|
||||
for (let i = 0; i < ii; i++) {
|
||||
ch = followingBytes[i];
|
||||
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
||||
// NUL bytes are not supposed to occur *outside* of inline
|
||||
@ -235,19 +240,47 @@ class Parser {
|
||||
if (state !== 2) {
|
||||
continue;
|
||||
}
|
||||
// Check that the "EI" sequence isn't part of the image data, since
|
||||
// that would cause the image to be truncated (fixes issue11124.pdf).
|
||||
if (lexer.knownCommands) {
|
||||
const nextObj = lexer.peekObj();
|
||||
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
||||
// Not a valid command, i.e. the inline image data *itself*
|
||||
// contains an "EI" sequence. Resetting the state.
|
||||
state = 0;
|
||||
}
|
||||
} else {
|
||||
if (!knownCommands) {
|
||||
warn(
|
||||
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
||||
);
|
||||
continue;
|
||||
}
|
||||
// Check that the "EI" sequence isn't part of the image data, since
|
||||
// that would cause the image to be truncated (fixes issue11124.pdf).
|
||||
const tmpLexer = new Lexer(
|
||||
new Stream(followingBytes.slice()),
|
||||
knownCommands
|
||||
);
|
||||
// Reduce the number of (potential) warning messages.
|
||||
tmpLexer._hexStringWarn = () => {};
|
||||
let numArgs = 0;
|
||||
|
||||
while (true) {
|
||||
const nextObj = tmpLexer.getObj();
|
||||
|
||||
if (nextObj === EOF) {
|
||||
state = 0; // No valid command found, resetting the state.
|
||||
break;
|
||||
}
|
||||
if (nextObj instanceof Cmd) {
|
||||
const knownCommand = knownCommands[nextObj.cmd];
|
||||
if (!knownCommand) {
|
||||
// Not a valid command, i.e. the inline image data *itself*
|
||||
// contains an "EI" sequence. Resetting the state.
|
||||
state = 0;
|
||||
break;
|
||||
} else if (
|
||||
knownCommand.variableArgs
|
||||
? numArgs <= knownCommand.numArgs
|
||||
: numArgs === knownCommand.numArgs
|
||||
) {
|
||||
break; // Valid command found.
|
||||
}
|
||||
numArgs = 0;
|
||||
continue;
|
||||
}
|
||||
numArgs++;
|
||||
}
|
||||
|
||||
if (state === 2) {
|
||||
@ -1284,28 +1317,6 @@ class Lexer {
|
||||
return Cmd.get(str);
|
||||
}
|
||||
|
||||
peekObj() {
|
||||
const streamPos = this.stream.pos,
|
||||
currentChar = this.currentChar,
|
||||
beginInlineImagePos = this.beginInlineImagePos;
|
||||
|
||||
let nextObj;
|
||||
try {
|
||||
nextObj = this.getObj();
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
warn(`peekObj: ${ex}`);
|
||||
}
|
||||
// Ensure that we reset *all* relevant `Lexer`-instance state.
|
||||
this.stream.pos = streamPos;
|
||||
this.currentChar = currentChar;
|
||||
this.beginInlineImagePos = beginInlineImagePos;
|
||||
|
||||
return nextObj;
|
||||
}
|
||||
|
||||
skipToNextLine() {
|
||||
let ch = this.currentChar;
|
||||
while (ch >= 0) {
|
||||
|
1
test/pdfs/issue16454.pdf.link
Normal file
1
test/pdfs/issue16454.pdf.link
Normal file
@ -0,0 +1 @@
|
||||
https://github.com/mozilla/pdf.js/files/11537582/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf
|
@ -5972,6 +5972,14 @@
|
||||
"link": true,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue16454",
|
||||
"file": "pdfs/issue16454.pdf",
|
||||
"md5": "82fe0c54a96667472ce999be7a789199",
|
||||
"rounds": 1,
|
||||
"lastPage": 1,
|
||||
"link": true,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "decodeACSuccessive",
|
||||
"file": "pdfs/decodeACSuccessive.pdf",
|
||||
"md5": "7749c032624fe27ab8e8d7d5e9a4a93f",
|
||||
|
Loading…
Reference in New Issue
Block a user