Add a heuristic, in src/core/jpg.js, to handle JPEG images with a wildly incorrect SOF (Start of Frame) scanLines parameter (issue 10880)

*This whole patch feels somewhat arbitrary, and I'd be slightly worried about possibly breaking something else.*

To limit the impact of these changes, we only re-parse JPEG images using a reduced `scanLines` value if and only if: An unexpected EOI (End of Image) marker was encountered during decoding of Scan data *and* the "actual" `scanLines` value is at least one order of magnitude smaller than expected.
This commit is contained in:
Jonas Jenwald 2020-01-18 12:53:40 +01:00
parent 5494f7d5bc
commit c3c3b8cd81
3 changed files with 30 additions and 4 deletions

View File

@ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() {
if (bitsData === 0xff) {
var nextByte = data[offset++];
if (nextByte) {
if (nextByte === 0xdc && parseDNLMarker) {
if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) {
offset += 2; // Skip marker length.
const scanLines = readUint16(data, offset);
@ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() {
scanLines
);
}
} else if (nextByte === 0xd9) {
} else if (nextByte === /* EOI = */ 0xd9) {
if (parseDNLMarker) {
// NOTE: only 8-bit JPEG images are supported in this decoder.
const maybeScanLines = blockRow * 8;
// Heuristic to attempt to handle corrupt JPEG images with too
// large `scanLines` parameter, by falling back to the currently
// parsed number of scanLines when it's at least one order of
// magnitude smaller than expected (fixes issue10880.pdf).
if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) {
throw new DNLMarkerError(
"Found EOI marker (0xFFD9) while parsing scan data, " +
"possibly caused by incorrect `scanLines` parameter",
maybeScanLines
);
}
}
throw new EOIMarkerError(
"Found EOI marker (0xFFD9) while parsing scan data"
);
@ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() {
}
}
let blockRow = 0;
function decodeMcu(component, decode, mcu, row, col) {
var mcuRow = (mcu / mcusPerLine) | 0;
var mcuCol = mcu % mcusPerLine;
var blockRow = mcuRow * component.v + row;
blockRow = mcuRow * component.v + row;
var blockCol = mcuCol * component.h + col;
var offset = getBlockBufferOffset(component, blockRow, blockCol);
decode(component, offset);
}
function decodeBlock(component, decode, mcu) {
var blockRow = (mcu / component.blocksPerLine) | 0;
blockRow = (mcu / component.blocksPerLine) | 0;
var blockCol = mcu % component.blocksPerLine;
var offset = getBlockBufferOffset(component, blockRow, blockCol);
decode(component, offset);

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf

View File

@ -3614,6 +3614,15 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue10880",
"file": "pdfs/issue10880.pdf",
"md5": "244ee5ee3ab88db8d8eb51d4416e2c97",
"rounds": 1,
"link": true,
"firstPage": 7,
"lastPage": 7,
"type": "eq"
},
{ "id": "issue9650",
"file": "pdfs/issue9650.pdf",
"md5": "20d50bda6b1080b6d9088811299c791e",