From c3c3b8cd815edbb85524436c730ef339762209b1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 18 Jan 2020 12:53:40 +0100 Subject: [PATCH] Add a heuristic, in `src/core/jpg.js`, to handle JPEG images with a wildly incorrect SOF (Start of Frame) `scanLines` parameter (issue 10880) *This whole patch feels somewhat arbitrary, and I'd be slightly worried about possibly breaking something else.* To limit the impact of these changes, we only re-parse JPEG images using a reduced `scanLines` value if and only if: An unexpected EOI (End of Image) marker was encountered during decoding of Scan data *and* the "actual" `scanLines` value is at least one order of magnitude smaller than expected. --- src/core/jpg.js | 24 ++++++++++++++++++++---- test/pdfs/issue10880.pdf.link | 1 + test/test_manifest.json | 9 +++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 test/pdfs/issue10880.pdf.link diff --git a/src/core/jpg.js b/src/core/jpg.js index c11321e78..b80739a7d 100644 --- a/src/core/jpg.js +++ b/src/core/jpg.js @@ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() { if (bitsData === 0xff) { var nextByte = data[offset++]; if (nextByte) { - if (nextByte === 0xdc && parseDNLMarker) { + if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) { offset += 2; // Skip marker length. const scanLines = readUint16(data, offset); @@ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() { scanLines ); } - } else if (nextByte === 0xd9) { + } else if (nextByte === /* EOI = */ 0xd9) { + if (parseDNLMarker) { + // NOTE: only 8-bit JPEG images are supported in this decoder. + const maybeScanLines = blockRow * 8; + // Heuristic to attempt to handle corrupt JPEG images with too + // large `scanLines` parameter, by falling back to the currently + // parsed number of scanLines when it's at least one order of + // magnitude smaller than expected (fixes issue10880.pdf). + if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) { + throw new DNLMarkerError( + "Found EOI marker (0xFFD9) while parsing scan data, " + + "possibly caused by incorrect `scanLines` parameter", + maybeScanLines + ); + } + } throw new EOIMarkerError( "Found EOI marker (0xFFD9) while parsing scan data" ); @@ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() { } } + let blockRow = 0; function decodeMcu(component, decode, mcu, row, col) { var mcuRow = (mcu / mcusPerLine) | 0; var mcuCol = mcu % mcusPerLine; - var blockRow = mcuRow * component.v + row; + blockRow = mcuRow * component.v + row; var blockCol = mcuCol * component.h + col; var offset = getBlockBufferOffset(component, blockRow, blockCol); decode(component, offset); } function decodeBlock(component, decode, mcu) { - var blockRow = (mcu / component.blocksPerLine) | 0; + blockRow = (mcu / component.blocksPerLine) | 0; var blockCol = mcu % component.blocksPerLine; var offset = getBlockBufferOffset(component, blockRow, blockCol); decode(component, offset); diff --git a/test/pdfs/issue10880.pdf.link b/test/pdfs/issue10880.pdf.link new file mode 100644 index 000000000..10f4e7b79 --- /dev/null +++ b/test/pdfs/issue10880.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index d80a1dc25..baa8bece8 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3614,6 +3614,15 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue10880", + "file": "pdfs/issue10880.pdf", + "md5": "244ee5ee3ab88db8d8eb51d4416e2c97", + "rounds": 1, + "link": true, + "firstPage": 7, + "lastPage": 7, + "type": "eq" + }, { "id": "issue9650", "file": "pdfs/issue9650.pdf", "md5": "20d50bda6b1080b6d9088811299c791e",