Merge pull request #11523 from Snuffleupagus/issue-10880
Add a heuristic, in `src/core/jpg.js`, to handle JPEG images with a wildly incorrect SOF (Start of Frame) `scanLines` parameter (issue 10880)
This commit is contained in:
commit
1a97c142b3
@ -512,7 +512,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
this.xref,
|
this.xref,
|
||||||
resources,
|
resources,
|
||||||
this.pdfFunctionFactory
|
this.pdfFunctionFactory
|
||||||
)
|
) &&
|
||||||
|
image.maybeValidDimensions
|
||||||
) {
|
) {
|
||||||
// These JPEGs don't need any more processing so we can just send it.
|
// These JPEGs don't need any more processing so we can just send it.
|
||||||
return this.handler
|
return this.handler
|
||||||
|
@ -41,7 +41,8 @@ class NativeImageDecoder {
|
|||||||
this.xref,
|
this.xref,
|
||||||
this.resources,
|
this.resources,
|
||||||
this.pdfFunctionFactory
|
this.pdfFunctionFactory
|
||||||
)
|
) &&
|
||||||
|
image.maybeValidDimensions
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +109,135 @@ const JpegStream = (function JpegStreamClosure() {
|
|||||||
this.eof = true;
|
this.eof = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
|
||||||
|
get: function JpegStream_maybeValidDimensions() {
|
||||||
|
const { dict, stream } = this;
|
||||||
|
const dictHeight = dict.get("Height", "H");
|
||||||
|
const startPos = stream.pos;
|
||||||
|
|
||||||
|
let validDimensions = true,
|
||||||
|
foundSOF = false,
|
||||||
|
b;
|
||||||
|
while ((b = stream.getByte()) !== -1) {
|
||||||
|
if (b !== 0xff) {
|
||||||
|
// Not a valid marker.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch (stream.getByte()) {
|
||||||
|
case 0xc0: // SOF0
|
||||||
|
case 0xc1: // SOF1
|
||||||
|
case 0xc2: // SOF2
|
||||||
|
// These three SOF{n} markers are the only ones that the built-in
|
||||||
|
// PDF.js JPEG decoder currently supports.
|
||||||
|
foundSOF = true;
|
||||||
|
|
||||||
|
stream.pos += 2; // Skip marker length.
|
||||||
|
stream.pos += 1; // Skip precision.
|
||||||
|
const scanLines = stream.getUint16();
|
||||||
|
|
||||||
|
// The "normal" case, where the image data and dictionary agrees.
|
||||||
|
if (scanLines === dictHeight) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// A DNL (Define Number of Lines) marker is expected,
|
||||||
|
// which browsers (usually) cannot decode natively.
|
||||||
|
if (scanLines === 0) {
|
||||||
|
validDimensions = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// The dimensions of the image, among other properties, should
|
||||||
|
// always be taken from the image data *itself* rather than the
|
||||||
|
// XObject dictionary. However there's cases of corrupt images that
|
||||||
|
// browsers cannot decode natively, for example:
|
||||||
|
// - JPEG images with DNL markers, where the SOF `scanLines`
|
||||||
|
// parameter has an unexpected value (see issue 8614).
|
||||||
|
// - JPEG images with too large SOF `scanLines` parameter, where
|
||||||
|
// the EOI marker is encountered prematurely (see issue 10880).
|
||||||
|
// In an attempt to handle these kinds of corrupt images, compare
|
||||||
|
// the dimensions in the image data with the dictionary and *always*
|
||||||
|
// let the PDF.js JPEG decoder (rather than the browser) handle the
|
||||||
|
// image if the difference is larger than one order of magnitude
|
||||||
|
// (since that would generally suggest that something is off).
|
||||||
|
if (scanLines > dictHeight * 10) {
|
||||||
|
validDimensions = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0xc3: // SOF3
|
||||||
|
/* falls through */
|
||||||
|
case 0xc5: // SOF5
|
||||||
|
case 0xc6: // SOF6
|
||||||
|
case 0xc7: // SOF7
|
||||||
|
/* falls through */
|
||||||
|
case 0xc9: // SOF9
|
||||||
|
case 0xca: // SOF10
|
||||||
|
case 0xcb: // SOF11
|
||||||
|
/* falls through */
|
||||||
|
case 0xcd: // SOF13
|
||||||
|
case 0xce: // SOF14
|
||||||
|
case 0xcf: // SOF15
|
||||||
|
foundSOF = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0xc4: // DHT
|
||||||
|
case 0xcc: // DAC
|
||||||
|
/* falls through */
|
||||||
|
case 0xda: // SOS
|
||||||
|
case 0xdb: // DQT
|
||||||
|
case 0xdc: // DNL
|
||||||
|
case 0xdd: // DRI
|
||||||
|
case 0xde: // DHP
|
||||||
|
case 0xdf: // EXP
|
||||||
|
/* falls through */
|
||||||
|
case 0xe0: // APP0
|
||||||
|
case 0xe1: // APP1
|
||||||
|
case 0xe2: // APP2
|
||||||
|
case 0xe3: // APP3
|
||||||
|
case 0xe4: // APP4
|
||||||
|
case 0xe5: // APP5
|
||||||
|
case 0xe6: // APP6
|
||||||
|
case 0xe7: // APP7
|
||||||
|
case 0xe8: // APP8
|
||||||
|
case 0xe9: // APP9
|
||||||
|
case 0xea: // APP10
|
||||||
|
case 0xeb: // APP11
|
||||||
|
case 0xec: // APP12
|
||||||
|
case 0xed: // APP13
|
||||||
|
case 0xee: // APP14
|
||||||
|
case 0xef: // APP15
|
||||||
|
/* falls through */
|
||||||
|
case 0xfe: // COM
|
||||||
|
const markerLength = stream.getUint16();
|
||||||
|
if (markerLength > 2) {
|
||||||
|
stream.skip(markerLength - 2); // Jump to the next marker.
|
||||||
|
} else {
|
||||||
|
// The marker length is invalid, resetting the stream position.
|
||||||
|
stream.skip(-2);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0xff: // Fill byte.
|
||||||
|
// Avoid skipping a valid marker, resetting the stream position.
|
||||||
|
stream.skip(-1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0xd9: // EOI
|
||||||
|
foundSOF = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (foundSOF) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Finally, don't forget to reset the stream position.
|
||||||
|
stream.pos = startPos;
|
||||||
|
|
||||||
|
return shadow(this, "maybeValidDimensions", validDimensions);
|
||||||
|
},
|
||||||
|
configurable: true,
|
||||||
|
});
|
||||||
|
|
||||||
JpegStream.prototype.getIR = function(forceDataSchema = false) {
|
JpegStream.prototype.getIR = function(forceDataSchema = false) {
|
||||||
return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
|
return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
|
||||||
};
|
};
|
||||||
|
@ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() {
|
|||||||
if (bitsData === 0xff) {
|
if (bitsData === 0xff) {
|
||||||
var nextByte = data[offset++];
|
var nextByte = data[offset++];
|
||||||
if (nextByte) {
|
if (nextByte) {
|
||||||
if (nextByte === 0xdc && parseDNLMarker) {
|
if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) {
|
||||||
offset += 2; // Skip marker length.
|
offset += 2; // Skip marker length.
|
||||||
|
|
||||||
const scanLines = readUint16(data, offset);
|
const scanLines = readUint16(data, offset);
|
||||||
@ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() {
|
|||||||
scanLines
|
scanLines
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else if (nextByte === 0xd9) {
|
} else if (nextByte === /* EOI = */ 0xd9) {
|
||||||
|
if (parseDNLMarker) {
|
||||||
|
// NOTE: only 8-bit JPEG images are supported in this decoder.
|
||||||
|
const maybeScanLines = blockRow * 8;
|
||||||
|
// Heuristic to attempt to handle corrupt JPEG images with too
|
||||||
|
// large `scanLines` parameter, by falling back to the currently
|
||||||
|
// parsed number of scanLines when it's at least one order of
|
||||||
|
// magnitude smaller than expected (fixes issue10880.pdf).
|
||||||
|
if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) {
|
||||||
|
throw new DNLMarkerError(
|
||||||
|
"Found EOI marker (0xFFD9) while parsing scan data, " +
|
||||||
|
"possibly caused by incorrect `scanLines` parameter",
|
||||||
|
maybeScanLines
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
throw new EOIMarkerError(
|
throw new EOIMarkerError(
|
||||||
"Found EOI marker (0xFFD9) while parsing scan data"
|
"Found EOI marker (0xFFD9) while parsing scan data"
|
||||||
);
|
);
|
||||||
@ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let blockRow = 0;
|
||||||
function decodeMcu(component, decode, mcu, row, col) {
|
function decodeMcu(component, decode, mcu, row, col) {
|
||||||
var mcuRow = (mcu / mcusPerLine) | 0;
|
var mcuRow = (mcu / mcusPerLine) | 0;
|
||||||
var mcuCol = mcu % mcusPerLine;
|
var mcuCol = mcu % mcusPerLine;
|
||||||
var blockRow = mcuRow * component.v + row;
|
blockRow = mcuRow * component.v + row;
|
||||||
var blockCol = mcuCol * component.h + col;
|
var blockCol = mcuCol * component.h + col;
|
||||||
var offset = getBlockBufferOffset(component, blockRow, blockCol);
|
var offset = getBlockBufferOffset(component, blockRow, blockCol);
|
||||||
decode(component, offset);
|
decode(component, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
function decodeBlock(component, decode, mcu) {
|
function decodeBlock(component, decode, mcu) {
|
||||||
var blockRow = (mcu / component.blocksPerLine) | 0;
|
blockRow = (mcu / component.blocksPerLine) | 0;
|
||||||
var blockCol = mcu % component.blocksPerLine;
|
var blockCol = mcu % component.blocksPerLine;
|
||||||
var offset = getBlockBufferOffset(component, blockRow, blockCol);
|
var offset = getBlockBufferOffset(component, blockRow, blockCol);
|
||||||
decode(component, offset);
|
decode(component, offset);
|
||||||
|
1
test/pdfs/issue10880.pdf.link
Normal file
1
test/pdfs/issue10880.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf
|
@ -3634,6 +3634,15 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue10880",
|
||||||
|
"file": "pdfs/issue10880.pdf",
|
||||||
|
"md5": "244ee5ee3ab88db8d8eb51d4416e2c97",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": true,
|
||||||
|
"firstPage": 7,
|
||||||
|
"lastPage": 7,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "issue9650",
|
{ "id": "issue9650",
|
||||||
"file": "pdfs/issue9650.pdf",
|
"file": "pdfs/issue9650.pdf",
|
||||||
"md5": "20d50bda6b1080b6d9088811299c791e",
|
"md5": "20d50bda6b1080b6d9088811299c791e",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user