From ec85d5c6252f3eb9064db450a48b05a05853f9c4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 15:44:49 +0100 Subject: [PATCH 1/6] Change the signature of `PartialEvaluator.buildPaintImageXObject` to take a parameter object This method currently requires a fair number of parameters, which creates quite unwieldy call-sites. When invoking `buildPaintImageXObject`, you have to remember not only which arguments to supply, but also the correct order, to prevent run-time errors. --- src/core/evaluator.js | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index acb7aa0df..397d0d048 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -349,10 +349,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); }, - buildPaintImageXObject: - function PartialEvaluator_buildPaintImageXObject(resources, image, - inline, operatorList, - cacheKey, imageCache) { + buildPaintImageXObject({ resources, image, isInline = false, operatorList, + cacheKey, imageCache, }) { var dict = image.dict; var w = dict.get('Width', 'W'); var h = dict.get('Height', 'H'); @@ -406,13 +404,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var SMALL_IMAGE_DIMENSIONS = 200; // Inlining small images into the queue as RGB data - if (inline && !softMask && !mask && !(image instanceof JpegStream) && + if (isInline && !softMask && !mask && !(image instanceof JpegStream) && (w + h) < SMALL_IMAGE_DIMENSIONS) { let imageObj = new PDFImage({ xref: this.xref, res: resources, image, - isInline: inline, + isInline, pdfFunctionFactory: this.pdfFunctionFactory, }); // We force the use of RGBA_32BPP images here, because we can't handle @@ -465,7 +463,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { xref: this.xref, res: resources, image, - isInline: inline, + isInline, nativeDecoder: nativeImageDecoder, pdfFunctionFactory: this.pdfFunctionFactory, }).then((imageObj) => { @@ -989,8 +987,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, rejectXObject); return; } else if (type.name === 'Image') { - self.buildPaintImageXObject(resources, xobj, false, - operatorList, name, imageCache); + self.buildPaintImageXObject({ + resources, + image: xobj, + operatorList, + cacheKey: name, + imageCache, + }); } else if (type.name === 'PS') { // PostScript XObjects are unused when viewing documents. // See section 4.7.1 of Adobe's PDF reference. @@ -1032,8 +1035,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { continue; } } - self.buildPaintImageXObject(resources, args[0], true, - operatorList, cacheKey, imageCache); + self.buildPaintImageXObject({ + resources, + image: args[0], + isInline: true, + operatorList, + cacheKey, + imageCache, + }); args = null; continue; case OPS.showText: From 7f73fc9ace05ca1c64dda71b23e9a00465c7535e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 15:53:37 +0100 Subject: [PATCH 2/6] Re-factor `PartialEvaluator.buildPaintImageXObject` to make it asynchronous This is necessary for upcoming changes, which will add fallback code-paths to allow graceful handling of native image decoding failures. --- src/core/evaluator.js | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 397d0d048..207cdc101 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -357,12 +357,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (!(w && isNum(w)) || !(h && isNum(h))) { warn('Image dimensions are missing, or not numbers.'); - return; + return Promise.resolve(); } var maxImageSize = this.options.maxImageSize; if (maxImageSize !== -1 && w * h > maxImageSize) { warn('Image exceeded maximum allowed size and was removed.'); - return; + return Promise.resolve(); } var imageMask = (dict.get('ImageMask', 'IM') || false); @@ -396,7 +396,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args, }; } - return; + return Promise.resolve(); } var softMask = (dict.get('SMask', 'SM') || false); @@ -417,7 +417,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // any other kind. imgData = imageObj.createImageData(/* forceRGBA = */ true); operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); - return; + return Promise.resolve(); } var nativeImageDecoderSupport = this.options.nativeImageDecoderSupport; @@ -441,7 +441,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args, }; } - return; + return Promise.resolve(); } // Creates native image decoder only if a JPEG image or mask is present. @@ -482,6 +482,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args, }; } + return Promise.resolve(); }, handleSMask: function PartialEvaluator_handleSmask(smask, resources, @@ -993,7 +994,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList, cacheKey: name, imageCache, - }); + }).then(resolveXObject, rejectXObject); + return; } else if (type.name === 'PS') { // PostScript XObjects are unused when viewing documents. // See section 4.7.1 of Adobe's PDF reference. @@ -1035,16 +1037,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { continue; } } - self.buildPaintImageXObject({ + next(self.buildPaintImageXObject({ resources, image: args[0], isInline: true, operatorList, cacheKey, imageCache, - }); - args = null; - continue; + })); + return; case OPS.showText: args[0] = self.handleText(args[0], stateManager.state); break; From 2570717e77845fb04bd0ad4da459fc7de6f39ca4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 15:38:17 +0100 Subject: [PATCH 3/6] Inline the code in `loadJpegStream` at the only call-site in `src/display/api.js`.js` Since `loadJpegStream` is only used at a *single* spot in the code-base, and given that it's very heavily tailored to the calling code (since it relies on the data structure of `PDFObjects`), this patch simply inlines the code in `src/display/api.js` instead. --- src/display/api.js | 23 ++++++++++++++++++----- src/shared/util.js | 13 ------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index b4603ca95..d50651c42 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -16,10 +16,9 @@ import { assert, createPromiseCapability, getVerbosityLevel, info, InvalidPDFException, - isArrayBuffer, isSameOrigin, loadJpegStream, MessageHandler, - MissingPDFException, NativeImageDecoding, PageViewport, PasswordException, - stringToBytes, UnexpectedResponseException, UnknownErrorException, - unreachable, Util, warn + isArrayBuffer, isSameOrigin, MessageHandler, MissingPDFException, + NativeImageDecoding, PageViewport, PasswordException, stringToBytes, + UnexpectedResponseException, UnknownErrorException, unreachable, Util, warn } from '../shared/util'; import { DOMCanvasFactory, DOMCMapReaderFactory, DummyStatTimer, getDefaultSetting, @@ -1818,7 +1817,21 @@ var WorkerTransport = (function WorkerTransportClosure() { switch (type) { case 'JpegStream': imageData = data[3]; - loadJpegStream(id, imageData, pageProxy.objs); + new Promise((resolve, reject) => { + const img = new Image(); + img.onload = function() { + resolve(img); + }; + img.onerror = function() { + reject(new Error('Error during JPEG image loading')); + }; + img.src = imageData; + }).then((img) => { + pageProxy.objs.resolve(id, img); + }, (reason) => { + warn(reason); + pageProxy.objs.resolve(id, null); + }); break; case 'Image': imageData = data[3]; diff --git a/src/shared/util.js b/src/shared/util.js index 593b0135f..daac9f7bf 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -1569,18 +1569,6 @@ MessageHandler.prototype = { }, }; -function loadJpegStream(id, imageUrl, objs) { - var img = new Image(); - img.onload = (function loadJpegStream_onloadClosure() { - objs.resolve(id, img); - }); - img.onerror = (function loadJpegStream_onerrorClosure() { - objs.resolve(id, null); - warn('Error during JPEG image loading'); - }); - img.src = imageUrl; -} - export { FONT_IDENTITY_MATRIX, IDENTITY_MATRIX, @@ -1632,7 +1620,6 @@ export { createValidAbsoluteUrl, isLittleEndian, isEvalSupported, - loadJpegStream, log2, readInt8, readUint16, From 76afe1018bb4fc7e08fc9bf16ad1084756bfc1c4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 15:32:09 +0100 Subject: [PATCH 4/6] Fallback to built-in image decoding if the `NativeImageDecoder` fails In particular this means that if 'JpegDecode', in `src/display/api.js`, fails we'll fallback to the built-in JPEG decoder. --- src/core/evaluator.js | 9 ++++----- src/core/image.js | 6 +++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 207cdc101..2ad04dbca 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -80,11 +80,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var colorSpace = dict.get('ColorSpace', 'CS'); colorSpace = ColorSpace.parse(colorSpace, this.xref, this.resources, this.pdfFunctionFactory); - var numComps = colorSpace.numComps; - var decodePromise = this.handler.sendWithPromise('JpegDecode', - [image.getIR(this.forceDataSchema), numComps]); - return decodePromise.then(function (message) { - var data = message.data; + + return this.handler.sendWithPromise('JpegDecode', [ + image.getIR(this.forceDataSchema), colorSpace.numComps + ]).then(function({ data, width, height, }) { return new Stream(data, 0, data.length, image.dict); }); }, diff --git a/src/core/image.js b/src/core/image.js index 9fa090365..bd1896802 100644 --- a/src/core/image.js +++ b/src/core/image.js @@ -27,7 +27,11 @@ var PDFImage = (function PDFImageClosure() { */ function handleImageData(image, nativeDecoder) { if (nativeDecoder && nativeDecoder.canDecode(image)) { - return nativeDecoder.decode(image); + return nativeDecoder.decode(image).catch((reason) => { + warn('Native image decoding failed -- trying to recover: ' + + (reason && reason.message)); + return image; + }); } return Promise.resolve(image); } From 80441346a3fe91b52a08331ff278da6a43cf0971 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 16:43:10 +0100 Subject: [PATCH 5/6] Fallback to the built-in JPEG decoder if 'JpegStream', in `src/display/api.js`, fails to load the image This works by making `PartialEvaluator.buildPaintImageXObject` wait for the success/failure of `loadJpegStream` on the API side *before* parsing continues. Please note that in practice, it should be quite rare for the browser to fail loading/decoding of a JPEG image. In the general case, it should thus not be completely surprising if even `src/core/jpg.js` will fail to decode the image. --- src/core/evaluator.js | 52 +++++++++++++++++++++++++++++++------------ src/display/api.js | 10 ++++----- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 2ad04dbca..302ed26a5 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -349,7 +349,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, buildPaintImageXObject({ resources, image, isInline = false, operatorList, - cacheKey, imageCache, }) { + cacheKey, imageCache, + forceDisableNativeImageDecoder = false, }) { var dict = image.dict; var w = dict.get('Width', 'W'); var h = dict.get('Height', 'H'); @@ -419,28 +420,47 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return Promise.resolve(); } - var nativeImageDecoderSupport = this.options.nativeImageDecoderSupport; + const nativeImageDecoderSupport = forceDisableNativeImageDecoder ? + NativeImageDecoding.NONE : this.options.nativeImageDecoderSupport; // If there is no imageMask, create the PDFImage and a lot // of image processing can be done here. var objId = 'img_' + this.idFactory.createObjId(); - operatorList.addDependency(objId); - args = [objId, w, h]; if (nativeImageDecoderSupport !== NativeImageDecoding.NONE && !softMask && !mask && image instanceof JpegStream && NativeImageDecoder.isSupported(image, this.xref, resources, this.pdfFunctionFactory)) { // These JPEGs don't need any more processing so we can just send it. - operatorList.addOp(OPS.paintJpegXObject, args); - this.handler.send('obj', [objId, this.pageIndex, 'JpegStream', - image.getIR(this.options.forceDataSchema)]); - if (cacheKey) { - imageCache[cacheKey] = { - fn: OPS.paintJpegXObject, - args, - }; - } - return Promise.resolve(); + return this.handler.sendWithPromise('obj', [ + objId, this.pageIndex, 'JpegStream', + image.getIR(this.options.forceDataSchema) + ]).then(function() { + // Only add the dependency once we know that the native JPEG decoding + // succeeded, to ensure that rendering will always complete. + operatorList.addDependency(objId); + args = [objId, w, h]; + + operatorList.addOp(OPS.paintJpegXObject, args); + if (cacheKey) { + imageCache[cacheKey] = { + fn: OPS.paintJpegXObject, + args, + }; + } + }, (reason) => { + warn('Native JPEG decoding failed -- trying to recover: ' + + (reason && reason.message)); + // Try to decode the JPEG image with the built-in decoder instead. + return this.buildPaintImageXObject({ + resources, + image, + isInline, + operatorList, + cacheKey, + imageCache, + forceDisableNativeImageDecoder: true, + }); + }); } // Creates native image decoder only if a JPEG image or mask is present. @@ -457,6 +477,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); } + // Ensure that the dependency is added before the image is decoded. + operatorList.addDependency(objId); + args = [objId, w, h]; + PDFImage.buildImage({ handler: this.handler, xref: this.xref, diff --git a/src/display/api.js b/src/display/api.js index d50651c42..0819b9736 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1817,22 +1817,22 @@ var WorkerTransport = (function WorkerTransportClosure() { switch (type) { case 'JpegStream': imageData = data[3]; - new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const img = new Image(); img.onload = function() { resolve(img); }; img.onerror = function() { reject(new Error('Error during JPEG image loading')); + // Note that when the browser image loading/decoding fails, + // we'll fallback to the built-in PDF.js JPEG decoder; see + // `PartialEvaluator.buildPaintImageXObject` in the + // `src/core/evaluator.js` file. }; img.src = imageData; }).then((img) => { pageProxy.objs.resolve(id, img); - }, (reason) => { - warn(reason); - pageProxy.objs.resolve(id, null); }); - break; case 'Image': imageData = data[3]; pageProxy.objs.resolve(id, imageData); From bf4166e6c99f73b940337940ab20d0735bed6664 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 1 Feb 2018 20:43:01 +0100 Subject: [PATCH 6/6] Attempt to handle DNL (Define Number of Lines) markers when parsing JPEG images (issue 8614) Please refer to the specification, found at https://www.w3.org/Graphics/JPEG/itu-t81.pdf#page=49 Given how the JPEG decoder is currently implemented, we need to know the value of the scanLines parameter (among others) *before* parsing of the SOS (Start of Scan) data begins. Hence the best solution I could come up with here, is to re-parse the image in the *hopefully* rare case of JPEG images that include a DNL (Define Number of Lines) marker. Fixes 8614. --- src/core/jpg.js | 61 +++++++++++++++++++++++++++++++----- test/pdfs/issue8614.pdf.link | 1 + test/test_manifest.json | 8 +++++ 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 test/pdfs/issue8614.pdf.link diff --git a/src/core/jpg.js b/src/core/jpg.js index 9f25683a8..0842dd0ae 100644 --- a/src/core/jpg.js +++ b/src/core/jpg.js @@ -28,6 +28,19 @@ let JpegError = (function JpegErrorClosure() { return JpegError; })(); +let DNLMarkerError = (function DNLMarkerErrorClosure() { + function DNLMarkerError(message, scanLines) { + this.message = message; + this.scanLines = scanLines; + } + + DNLMarkerError.prototype = new Error(); + DNLMarkerError.prototype.name = 'DNLMarkerError'; + DNLMarkerError.constructor = DNLMarkerError; + + return DNLMarkerError; +})(); + /** * This code was forked from https://github.com/notmasteryet/jpgjs. * The original version was created by GitHub user notmasteryet. @@ -112,7 +125,8 @@ var JpegImage = (function JpegImageClosure() { } function decodeScan(data, offset, frame, components, resetInterval, - spectralStart, spectralEnd, successivePrev, successive) { + spectralStart, spectralEnd, successivePrev, successive, + parseDNLMarker = false) { var mcusPerLine = frame.mcusPerLine; var progressive = frame.progressive; @@ -127,6 +141,14 @@ var JpegImage = (function JpegImageClosure() { if (bitsData === 0xFF) { var nextByte = data[offset++]; if (nextByte) { + if (nextByte === 0xDC && parseDNLMarker) { // DNL == 0xFFDC + offset += 2; // Skip data length. + const scanLines = (data[offset++] << 8) | data[offset++]; + if (scanLines > 0 && scanLines !== frame.scanLines) { + throw new DNLMarkerError( + 'Found DNL marker (0xFFDC) while parsing scan data', scanLines); + } + } throw new JpegError( `unexpected marker ${((bitsData << 8) | nextByte).toString(16)}`); } @@ -635,7 +657,7 @@ var JpegImage = (function JpegImageClosure() { } JpegImage.prototype = { - parse: function parse(data) { + parse(data, { dnlScanLines = null, } = {}) { function readUint16() { var value = (data[offset] << 8) | data[offset + 1]; @@ -685,6 +707,7 @@ var JpegImage = (function JpegImageClosure() { var jfif = null; var adobe = null; var frame, resetInterval; + let numSOSMarkers = 0; var quantizationTables = []; var huffmanTablesAC = [], huffmanTablesDC = []; var fileMarker = readUint16(); @@ -781,7 +804,8 @@ var JpegImage = (function JpegImageClosure() { frame.extended = (fileMarker === 0xFFC1); frame.progressive = (fileMarker === 0xFFC2); frame.precision = data[offset++]; - frame.scanLines = readUint16(); + const sofScanLines = readUint16(); + frame.scanLines = dnlScanLines || sofScanLines; frame.samplesPerLine = readUint16(); frame.components = []; frame.componentIds = {}; @@ -839,6 +863,12 @@ var JpegImage = (function JpegImageClosure() { break; case 0xFFDA: // SOS (Start of Scan) + // A DNL marker (0xFFDC), if it exists, is only allowed at the end + // of the first scan segment and may only occur once in an image. + // Furthermore, to prevent an infinite loop, do *not* attempt to + // parse DNL markers during re-parsing of the JPEG scan data. + const parseDNLMarker = (++numSOSMarkers) === 1 && !dnlScanLines; + readUint16(); // scanLength var selectorsCount = data[offset++]; var components = [], component; @@ -853,11 +883,26 @@ var JpegImage = (function JpegImageClosure() { var spectralStart = data[offset++]; var spectralEnd = data[offset++]; var successiveApproximation = data[offset++]; - var processed = decodeScan(data, offset, - frame, components, resetInterval, - spectralStart, spectralEnd, - successiveApproximation >> 4, successiveApproximation & 15); - offset += processed; + try { + var processed = decodeScan(data, offset, + frame, components, resetInterval, + spectralStart, spectralEnd, + successiveApproximation >> 4, successiveApproximation & 15, + parseDNLMarker); + offset += processed; + } catch (ex) { + if (ex instanceof DNLMarkerError) { + warn('Attempting to re-parse JPEG image using "scanLines" ' + + 'parameter found in DNL marker (0xFFDC) segment.'); + return this.parse(data, { dnlScanLines: ex.scanLines, }); + } + throw ex; + } + break; + + case 0xFFDC: // DNL (Define Number of Lines) + // Ignore the marker, since it's being handled in `decodeScan`. + offset += 4; break; case 0xFFFF: // Fill bytes diff --git a/test/pdfs/issue8614.pdf.link b/test/pdfs/issue8614.pdf.link new file mode 100644 index 000000000..04cccb01c --- /dev/null +++ b/test/pdfs/issue8614.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/1125123/OBW-OVK.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 5d56a8a00..7a68a2963 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3194,6 +3194,14 @@ "link": true, "type": "eq" }, + { "id": "issue8614", + "file": "pdfs/issue8614.pdf", + "md5": "7e8b66cf674ac2b79d6b267d0c6f2fa2", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "bug1108753", "file": "pdfs/bug1108753.pdf", "md5": "a7aaf92d55b4602afb0ca3d75198b56b",