Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124)

This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1]
There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here.

*Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere.

---
[1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
This commit is contained in:
Jonas Jenwald 2020-06-26 12:36:28 +02:00
parent 276d917b7c
commit 28d2ada59c
4 changed files with 85 additions and 3 deletions

View File

@ -203,10 +203,11 @@ class Parser {
I = 0x49,
SPACE = 0x20,
LF = 0xa,
CR = 0xd;
const n = 10,
CR = 0xd,
NUL = 0x0;
const startPos = stream.pos;
const lexer = this.lexer,
startPos = stream.pos,
n = 10;
let state = 0,
ch,
maybeEIPos;
@ -243,6 +244,25 @@ class Parser {
break;
}
}
if (state !== 2) {
continue;
}
// Check that the "EI" sequence isn't part of the image data, since
// that would cause the image to be truncated (fixes issue11124.pdf).
if (lexer.knownCommands) {
const nextObj = lexer.peekObj();
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
// Not a valid command, i.e. the inline image data *itself*
// contains an "EI" sequence. Resetting the state.
state = 0;
}
} else {
warn(
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
);
}
if (state === 2) {
break; // Finished!
}
@ -1276,6 +1296,28 @@ class Lexer {
return Cmd.get(str);
}
peekObj() {
const streamPos = this.stream.pos,
currentChar = this.currentChar,
beginInlineImagePos = this.beginInlineImagePos;
let nextObj;
try {
nextObj = this.getObj();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`peekObj: ${ex}`);
}
// Ensure that we reset *all* relevant `Lexer`-instance state.
this.stream.pos = streamPos;
this.currentChar = currentChar;
this.beginInlineImagePos = beginInlineImagePos;
return nextObj;
}
skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) {

View File

@ -254,6 +254,7 @@
!issue6336.pdf
!issue6387.pdf
!issue6410.pdf
!issue11124.pdf
!issue8586.pdf
!jbig2_symbol_offset.pdf
!gradientfill.pdf

33
test/pdfs/issue11124.pdf Normal file
View File

@ -0,0 +1,33 @@
%PDF-1.3
%âãÏÓ
1 0 obj<</Type/Catalog/Pages 3 0 R>>
endobj
2 0 obj<</CreationDate(D:20190906183146+02'00')/Producer(PoDoFo - http://podofo.sf.net)>>
endobj
3 0 obj<</Type/Pages/Count 1/Kids[ 4 0 R]>>
endobj
4 0 obj<</Type/Page/Contents 5 0 R/MediaBox[ 0 0 100 100]/Parent 3 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>>>
endobj
5 0 obj<</Length 103>>
stream
100 0 0 100 0 0 cm
BI /W 4 /H 4 /CS /RGB /BPC 8
ID
00000z0z00zzz00z0zzz0zzzEI aazazaazzzaazazzzazzz
EI
endstream
endobj
xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000059 00000 n
0000000156 00000 n
0000000207 00000 n
0000000341 00000 n
trailer
<</ID[<D047079C2B662F2617BF6BC31251DAB1><D047079C2B662F2617BF6BC31251DAB1>]/Info 2 0 R/Root 1 0 R/Size 6>>
startxref
492
%%EOF

View File

@ -3147,6 +3147,12 @@
"type": "text",
"about": "Invisible (and broken) TrueType font used for text-selection."
},
{ "id": "issue11124",
"file": "pdfs/issue11124.pdf",
"md5": "9bde831515dc6b8bb2c7c00c8189aca9",
"rounds": 1,
"type": "eq"
},
{ "id": "issue11768",
"file": "pdfs/issue11768_reduced.pdf",
"md5": "0cafde97d78bb6883531a325a996a5ef",