Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124)
This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1] There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here. *Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere. --- [1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
This commit is contained in:
parent
276d917b7c
commit
28d2ada59c
@ -203,10 +203,11 @@ class Parser {
|
||||
I = 0x49,
|
||||
SPACE = 0x20,
|
||||
LF = 0xa,
|
||||
CR = 0xd;
|
||||
const n = 10,
|
||||
CR = 0xd,
|
||||
NUL = 0x0;
|
||||
const startPos = stream.pos;
|
||||
const lexer = this.lexer,
|
||||
startPos = stream.pos,
|
||||
n = 10;
|
||||
let state = 0,
|
||||
ch,
|
||||
maybeEIPos;
|
||||
@ -243,6 +244,25 @@ class Parser {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (state !== 2) {
|
||||
continue;
|
||||
}
|
||||
// Check that the "EI" sequence isn't part of the image data, since
|
||||
// that would cause the image to be truncated (fixes issue11124.pdf).
|
||||
if (lexer.knownCommands) {
|
||||
const nextObj = lexer.peekObj();
|
||||
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
||||
// Not a valid command, i.e. the inline image data *itself*
|
||||
// contains an "EI" sequence. Resetting the state.
|
||||
state = 0;
|
||||
}
|
||||
} else {
|
||||
warn(
|
||||
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
||||
);
|
||||
}
|
||||
|
||||
if (state === 2) {
|
||||
break; // Finished!
|
||||
}
|
||||
@ -1276,6 +1296,28 @@ class Lexer {
|
||||
return Cmd.get(str);
|
||||
}
|
||||
|
||||
peekObj() {
|
||||
const streamPos = this.stream.pos,
|
||||
currentChar = this.currentChar,
|
||||
beginInlineImagePos = this.beginInlineImagePos;
|
||||
|
||||
let nextObj;
|
||||
try {
|
||||
nextObj = this.getObj();
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
warn(`peekObj: ${ex}`);
|
||||
}
|
||||
// Ensure that we reset *all* relevant `Lexer`-instance state.
|
||||
this.stream.pos = streamPos;
|
||||
this.currentChar = currentChar;
|
||||
this.beginInlineImagePos = beginInlineImagePos;
|
||||
|
||||
return nextObj;
|
||||
}
|
||||
|
||||
skipToNextLine() {
|
||||
let ch = this.currentChar;
|
||||
while (ch >= 0) {
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -254,6 +254,7 @@
|
||||
!issue6336.pdf
|
||||
!issue6387.pdf
|
||||
!issue6410.pdf
|
||||
!issue11124.pdf
|
||||
!issue8586.pdf
|
||||
!jbig2_symbol_offset.pdf
|
||||
!gradientfill.pdf
|
||||
|
33
test/pdfs/issue11124.pdf
Normal file
33
test/pdfs/issue11124.pdf
Normal file
@ -0,0 +1,33 @@
|
||||
%PDF-1.3
|
||||
%âãÏÓ
|
||||
1 0 obj<</Type/Catalog/Pages 3 0 R>>
|
||||
endobj
|
||||
2 0 obj<</CreationDate(D:20190906183146+02'00')/Producer(PoDoFo - http://podofo.sf.net)>>
|
||||
endobj
|
||||
3 0 obj<</Type/Pages/Count 1/Kids[ 4 0 R]>>
|
||||
endobj
|
||||
4 0 obj<</Type/Page/Contents 5 0 R/MediaBox[ 0 0 100 100]/Parent 3 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>>>
|
||||
endobj
|
||||
5 0 obj<</Length 103>>
|
||||
stream
|
||||
100 0 0 100 0 0 cm
|
||||
BI /W 4 /H 4 /CS /RGB /BPC 8
|
||||
ID
|
||||
00000z0z00zzz00z0zzz0zzzEI aazazaazzzaazazzzazzz
|
||||
EI
|
||||
|
||||
endstream
|
||||
endobj
|
||||
xref
|
||||
0 6
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000059 00000 n
|
||||
0000000156 00000 n
|
||||
0000000207 00000 n
|
||||
0000000341 00000 n
|
||||
trailer
|
||||
<</ID[<D047079C2B662F2617BF6BC31251DAB1><D047079C2B662F2617BF6BC31251DAB1>]/Info 2 0 R/Root 1 0 R/Size 6>>
|
||||
startxref
|
||||
492
|
||||
%%EOF
|
@ -3147,6 +3147,12 @@
|
||||
"type": "text",
|
||||
"about": "Invisible (and broken) TrueType font used for text-selection."
|
||||
},
|
||||
{ "id": "issue11124",
|
||||
"file": "pdfs/issue11124.pdf",
|
||||
"md5": "9bde831515dc6b8bb2c7c00c8189aca9",
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue11768",
|
||||
"file": "pdfs/issue11768_reduced.pdf",
|
||||
"md5": "0cafde97d78bb6883531a325a996a5ef",
|
||||
|
Loading…
Reference in New Issue
Block a user