Attempt to improve the EI
detection heuristics, for inline images, in streams containing NUL
bytes (issue 8823)
Since this patch will now treat (some) `NUL` bytes as "ASCII", the number of `followingBytes` checked are thus increased to (hopefully) reduce the risk of introducing new false positives. Fixes 8823.
This commit is contained in:
parent
7cc7260634
commit
49b8cd5a6a
@ -148,7 +148,8 @@ var Parser = (function ParserClosure() {
|
|||||||
* @returns {number} The inline stream length.
|
* @returns {number} The inline stream length.
|
||||||
*/
|
*/
|
||||||
findDefaultInlineStreamEnd(stream) {
|
findDefaultInlineStreamEnd(stream) {
|
||||||
const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD, n = 5;
|
const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD;
|
||||||
|
const n = 10, NUL = 0x0;
|
||||||
let startPos = stream.pos, state = 0, ch, maybeEIPos;
|
let startPos = stream.pos, state = 0, ch, maybeEIPos;
|
||||||
while ((ch = stream.getByte()) !== -1) {
|
while ((ch = stream.getByte()) !== -1) {
|
||||||
if (state === 0) {
|
if (state === 0) {
|
||||||
@ -159,10 +160,23 @@ var Parser = (function ParserClosure() {
|
|||||||
assert(state === 2);
|
assert(state === 2);
|
||||||
if (ch === SPACE || ch === LF || ch === CR) {
|
if (ch === SPACE || ch === LF || ch === CR) {
|
||||||
maybeEIPos = stream.pos;
|
maybeEIPos = stream.pos;
|
||||||
// Let's check the next `n` bytes are ASCII... just be sure.
|
// Let's check that the next `n` bytes are ASCII... just to be sure.
|
||||||
let followingBytes = stream.peekBytes(n);
|
let followingBytes = stream.peekBytes(n);
|
||||||
for (let i = 0; i < n; i++) {
|
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
|
||||||
ch = followingBytes[i];
|
ch = followingBytes[i];
|
||||||
|
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
||||||
|
// NUL bytes are not supposed to occur *outside* of inline
|
||||||
|
// images, but some PDF generators violate that assumption,
|
||||||
|
// thus breaking the EI detection heuristics used below.
|
||||||
|
//
|
||||||
|
// However, we can't unconditionally treat NUL bytes as "ASCII",
|
||||||
|
// since that *could* result in inline images being truncated.
|
||||||
|
//
|
||||||
|
// To attempt to address this, we'll still treat any *sequence*
|
||||||
|
// of NUL bytes as non-ASCII, but for a *single* NUL byte we'll
|
||||||
|
// continue checking the `followingBytes` (fixes issue8823.pdf).
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
|
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
|
||||||
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
|
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
|
||||||
// it's binary stuff. Resetting the state.
|
// it's binary stuff. Resetting the state.
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -58,6 +58,7 @@
|
|||||||
!issue8697.pdf
|
!issue8697.pdf
|
||||||
!issue8707.pdf
|
!issue8707.pdf
|
||||||
!issue8798r.pdf
|
!issue8798r.pdf
|
||||||
|
!issue8823.pdf
|
||||||
!bad-PageLabels.pdf
|
!bad-PageLabels.pdf
|
||||||
!filled-background.pdf
|
!filled-background.pdf
|
||||||
!ArabicCIDTrueType.pdf
|
!ArabicCIDTrueType.pdf
|
||||||
|
BIN
test/pdfs/issue8823.pdf
Normal file
BIN
test/pdfs/issue8823.pdf
Normal file
Binary file not shown.
@ -2976,9 +2976,15 @@
|
|||||||
{ "id": "issue8798",
|
{ "id": "issue8798",
|
||||||
"file": "pdfs/issue8798r.pdf",
|
"file": "pdfs/issue8798r.pdf",
|
||||||
"md5": "3a0e29f013d9edcceb5d852e37738a77",
|
"md5": "3a0e29f013d9edcceb5d852e37738a77",
|
||||||
|
"link": false,
|
||||||
|
"rounds": 1,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{ "id": "issue8823",
|
||||||
|
"file": "pdfs/issue8823.pdf",
|
||||||
|
"md5": "ad02d4aa374b315bf1766038d002d57a",
|
||||||
|
"link": false,
|
||||||
"rounds": 1,
|
"rounds": 1,
|
||||||
"lastPage": 1,
|
|
||||||
"link": true,
|
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
{ "id": "issue8613",
|
{ "id": "issue8613",
|
||||||
|
Loading…
Reference in New Issue
Block a user