Merge pull request #16461 from Snuffleupagus/issue-16454
Improve "EI" detection in inline images (PR 12028 follow-up, issue 16454)
This commit is contained in:
commit
a6f9505a39
@ -26,6 +26,7 @@ import {
|
|||||||
MissingDataException,
|
MissingDataException,
|
||||||
ParserEOFException,
|
ParserEOFException,
|
||||||
} from "./core_utils.js";
|
} from "./core_utils.js";
|
||||||
|
import { NullStream, Stream } from "./stream.js";
|
||||||
import { Ascii85Stream } from "./ascii_85_stream.js";
|
import { Ascii85Stream } from "./ascii_85_stream.js";
|
||||||
import { AsciiHexStream } from "./ascii_hex_stream.js";
|
import { AsciiHexStream } from "./ascii_hex_stream.js";
|
||||||
import { CCITTFaxStream } from "./ccitt_stream.js";
|
import { CCITTFaxStream } from "./ccitt_stream.js";
|
||||||
@ -34,7 +35,6 @@ import { Jbig2Stream } from "./jbig2_stream.js";
|
|||||||
import { JpegStream } from "./jpeg_stream.js";
|
import { JpegStream } from "./jpeg_stream.js";
|
||||||
import { JpxStream } from "./jpx_stream.js";
|
import { JpxStream } from "./jpx_stream.js";
|
||||||
import { LZWStream } from "./lzw_stream.js";
|
import { LZWStream } from "./lzw_stream.js";
|
||||||
import { NullStream } from "./stream.js";
|
|
||||||
import { PredictorStream } from "./predictor_stream.js";
|
import { PredictorStream } from "./predictor_stream.js";
|
||||||
import { RunLengthStream } from "./run_length_stream.js";
|
import { RunLengthStream } from "./run_length_stream.js";
|
||||||
|
|
||||||
@ -190,9 +190,9 @@ class Parser {
|
|||||||
LF = 0xa,
|
LF = 0xa,
|
||||||
CR = 0xd,
|
CR = 0xd,
|
||||||
NUL = 0x0;
|
NUL = 0x0;
|
||||||
const lexer = this.lexer,
|
const { knownCommands } = this.lexer,
|
||||||
startPos = stream.pos,
|
startPos = stream.pos,
|
||||||
n = 10;
|
n = 15;
|
||||||
let state = 0,
|
let state = 0,
|
||||||
ch,
|
ch,
|
||||||
maybeEIPos;
|
maybeEIPos;
|
||||||
@ -209,7 +209,12 @@ class Parser {
|
|||||||
maybeEIPos = stream.pos;
|
maybeEIPos = stream.pos;
|
||||||
// Let's check that the next `n` bytes are ASCII... just to be sure.
|
// Let's check that the next `n` bytes are ASCII... just to be sure.
|
||||||
const followingBytes = stream.peekBytes(n);
|
const followingBytes = stream.peekBytes(n);
|
||||||
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
|
|
||||||
|
const ii = followingBytes.length;
|
||||||
|
if (ii === 0) {
|
||||||
|
break; // The end of the stream was reached, nothing to check.
|
||||||
|
}
|
||||||
|
for (let i = 0; i < ii; i++) {
|
||||||
ch = followingBytes[i];
|
ch = followingBytes[i];
|
||||||
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
||||||
// NUL bytes are not supposed to occur *outside* of inline
|
// NUL bytes are not supposed to occur *outside* of inline
|
||||||
@ -235,19 +240,47 @@ class Parser {
|
|||||||
if (state !== 2) {
|
if (state !== 2) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check that the "EI" sequence isn't part of the image data, since
|
if (!knownCommands) {
|
||||||
// that would cause the image to be truncated (fixes issue11124.pdf).
|
|
||||||
if (lexer.knownCommands) {
|
|
||||||
const nextObj = lexer.peekObj();
|
|
||||||
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
|
||||||
// Not a valid command, i.e. the inline image data *itself*
|
|
||||||
// contains an "EI" sequence. Resetting the state.
|
|
||||||
state = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
warn(
|
warn(
|
||||||
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
||||||
);
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Check that the "EI" sequence isn't part of the image data, since
|
||||||
|
// that would cause the image to be truncated (fixes issue11124.pdf).
|
||||||
|
const tmpLexer = new Lexer(
|
||||||
|
new Stream(followingBytes.slice()),
|
||||||
|
knownCommands
|
||||||
|
);
|
||||||
|
// Reduce the number of (potential) warning messages.
|
||||||
|
tmpLexer._hexStringWarn = () => {};
|
||||||
|
let numArgs = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const nextObj = tmpLexer.getObj();
|
||||||
|
|
||||||
|
if (nextObj === EOF) {
|
||||||
|
state = 0; // No valid command found, resetting the state.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (nextObj instanceof Cmd) {
|
||||||
|
const knownCommand = knownCommands[nextObj.cmd];
|
||||||
|
if (!knownCommand) {
|
||||||
|
// Not a valid command, i.e. the inline image data *itself*
|
||||||
|
// contains an "EI" sequence. Resetting the state.
|
||||||
|
state = 0;
|
||||||
|
break;
|
||||||
|
} else if (
|
||||||
|
knownCommand.variableArgs
|
||||||
|
? numArgs <= knownCommand.numArgs
|
||||||
|
: numArgs === knownCommand.numArgs
|
||||||
|
) {
|
||||||
|
break; // Valid command found.
|
||||||
|
}
|
||||||
|
numArgs = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
numArgs++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state === 2) {
|
if (state === 2) {
|
||||||
@ -1284,28 +1317,6 @@ class Lexer {
|
|||||||
return Cmd.get(str);
|
return Cmd.get(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
peekObj() {
|
|
||||||
const streamPos = this.stream.pos,
|
|
||||||
currentChar = this.currentChar,
|
|
||||||
beginInlineImagePos = this.beginInlineImagePos;
|
|
||||||
|
|
||||||
let nextObj;
|
|
||||||
try {
|
|
||||||
nextObj = this.getObj();
|
|
||||||
} catch (ex) {
|
|
||||||
if (ex instanceof MissingDataException) {
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
warn(`peekObj: ${ex}`);
|
|
||||||
}
|
|
||||||
// Ensure that we reset *all* relevant `Lexer`-instance state.
|
|
||||||
this.stream.pos = streamPos;
|
|
||||||
this.currentChar = currentChar;
|
|
||||||
this.beginInlineImagePos = beginInlineImagePos;
|
|
||||||
|
|
||||||
return nextObj;
|
|
||||||
}
|
|
||||||
|
|
||||||
skipToNextLine() {
|
skipToNextLine() {
|
||||||
let ch = this.currentChar;
|
let ch = this.currentChar;
|
||||||
while (ch >= 0) {
|
while (ch >= 0) {
|
||||||
|
1
test/pdfs/issue16454.pdf.link
Normal file
1
test/pdfs/issue16454.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://github.com/mozilla/pdf.js/files/11537582/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf
|
@ -5972,6 +5972,14 @@
|
|||||||
"link": true,
|
"link": true,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue16454",
|
||||||
|
"file": "pdfs/issue16454.pdf",
|
||||||
|
"md5": "82fe0c54a96667472ce999be7a789199",
|
||||||
|
"rounds": 1,
|
||||||
|
"lastPage": 1,
|
||||||
|
"link": true,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "decodeACSuccessive",
|
{ "id": "decodeACSuccessive",
|
||||||
"file": "pdfs/decodeACSuccessive.pdf",
|
"file": "pdfs/decodeACSuccessive.pdf",
|
||||||
"md5": "7749c032624fe27ab8e8d7d5e9a4a93f",
|
"md5": "7749c032624fe27ab8e8d7d5e9a4a93f",
|
||||||
|
Loading…
Reference in New Issue
Block a user