Merge pull request #9427 from Snuffleupagus/native-JPEG-decoding-fallback

Fallback to the built-in JPEG decoder when browser decoding fails, and attempt to handle JPEG images with DNL (Define Number of Lines) markers (issue 8614)
This commit is contained in:
Tim van der Meij 2018-02-09 21:36:08 +01:00 committed by GitHub
commit 7bb066494f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 154 additions and 63 deletions

View File

@ -80,11 +80,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var colorSpace = dict.get('ColorSpace', 'CS'); var colorSpace = dict.get('ColorSpace', 'CS');
colorSpace = ColorSpace.parse(colorSpace, this.xref, this.resources, colorSpace = ColorSpace.parse(colorSpace, this.xref, this.resources,
this.pdfFunctionFactory); this.pdfFunctionFactory);
var numComps = colorSpace.numComps;
var decodePromise = this.handler.sendWithPromise('JpegDecode', return this.handler.sendWithPromise('JpegDecode', [
[image.getIR(this.forceDataSchema), numComps]); image.getIR(this.forceDataSchema), colorSpace.numComps
return decodePromise.then(function (message) { ]).then(function({ data, width, height, }) {
var data = message.data;
return new Stream(data, 0, data.length, image.dict); return new Stream(data, 0, data.length, image.dict);
}); });
}, },
@ -349,22 +348,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}); });
}, },
buildPaintImageXObject: buildPaintImageXObject({ resources, image, isInline = false, operatorList,
function PartialEvaluator_buildPaintImageXObject(resources, image, cacheKey, imageCache,
inline, operatorList, forceDisableNativeImageDecoder = false, }) {
cacheKey, imageCache) {
var dict = image.dict; var dict = image.dict;
var w = dict.get('Width', 'W'); var w = dict.get('Width', 'W');
var h = dict.get('Height', 'H'); var h = dict.get('Height', 'H');
if (!(w && isNum(w)) || !(h && isNum(h))) { if (!(w && isNum(w)) || !(h && isNum(h))) {
warn('Image dimensions are missing, or not numbers.'); warn('Image dimensions are missing, or not numbers.');
return; return Promise.resolve();
} }
var maxImageSize = this.options.maxImageSize; var maxImageSize = this.options.maxImageSize;
if (maxImageSize !== -1 && w * h > maxImageSize) { if (maxImageSize !== -1 && w * h > maxImageSize) {
warn('Image exceeded maximum allowed size and was removed.'); warn('Image exceeded maximum allowed size and was removed.');
return; return Promise.resolve();
} }
var imageMask = (dict.get('ImageMask', 'IM') || false); var imageMask = (dict.get('ImageMask', 'IM') || false);
@ -398,7 +396,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
args, args,
}; };
} }
return; return Promise.resolve();
} }
var softMask = (dict.get('SMask', 'SM') || false); var softMask = (dict.get('SMask', 'SM') || false);
@ -406,44 +404,63 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var SMALL_IMAGE_DIMENSIONS = 200; var SMALL_IMAGE_DIMENSIONS = 200;
// Inlining small images into the queue as RGB data // Inlining small images into the queue as RGB data
if (inline && !softMask && !mask && !(image instanceof JpegStream) && if (isInline && !softMask && !mask && !(image instanceof JpegStream) &&
(w + h) < SMALL_IMAGE_DIMENSIONS) { (w + h) < SMALL_IMAGE_DIMENSIONS) {
let imageObj = new PDFImage({ let imageObj = new PDFImage({
xref: this.xref, xref: this.xref,
res: resources, res: resources,
image, image,
isInline: inline, isInline,
pdfFunctionFactory: this.pdfFunctionFactory, pdfFunctionFactory: this.pdfFunctionFactory,
}); });
// We force the use of RGBA_32BPP images here, because we can't handle // We force the use of RGBA_32BPP images here, because we can't handle
// any other kind. // any other kind.
imgData = imageObj.createImageData(/* forceRGBA = */ true); imgData = imageObj.createImageData(/* forceRGBA = */ true);
operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); operatorList.addOp(OPS.paintInlineImageXObject, [imgData]);
return; return Promise.resolve();
} }
var nativeImageDecoderSupport = this.options.nativeImageDecoderSupport; const nativeImageDecoderSupport = forceDisableNativeImageDecoder ?
NativeImageDecoding.NONE : this.options.nativeImageDecoderSupport;
// If there is no imageMask, create the PDFImage and a lot // If there is no imageMask, create the PDFImage and a lot
// of image processing can be done here. // of image processing can be done here.
var objId = 'img_' + this.idFactory.createObjId(); var objId = 'img_' + this.idFactory.createObjId();
operatorList.addDependency(objId);
args = [objId, w, h];
if (nativeImageDecoderSupport !== NativeImageDecoding.NONE && if (nativeImageDecoderSupport !== NativeImageDecoding.NONE &&
!softMask && !mask && image instanceof JpegStream && !softMask && !mask && image instanceof JpegStream &&
NativeImageDecoder.isSupported(image, this.xref, resources, NativeImageDecoder.isSupported(image, this.xref, resources,
this.pdfFunctionFactory)) { this.pdfFunctionFactory)) {
// These JPEGs don't need any more processing so we can just send it. // These JPEGs don't need any more processing so we can just send it.
operatorList.addOp(OPS.paintJpegXObject, args); return this.handler.sendWithPromise('obj', [
this.handler.send('obj', [objId, this.pageIndex, 'JpegStream', objId, this.pageIndex, 'JpegStream',
image.getIR(this.options.forceDataSchema)]); image.getIR(this.options.forceDataSchema)
if (cacheKey) { ]).then(function() {
imageCache[cacheKey] = { // Only add the dependency once we know that the native JPEG decoding
fn: OPS.paintJpegXObject, // succeeded, to ensure that rendering will always complete.
args, operatorList.addDependency(objId);
}; args = [objId, w, h];
}
return; operatorList.addOp(OPS.paintJpegXObject, args);
if (cacheKey) {
imageCache[cacheKey] = {
fn: OPS.paintJpegXObject,
args,
};
}
}, (reason) => {
warn('Native JPEG decoding failed -- trying to recover: ' +
(reason && reason.message));
// Try to decode the JPEG image with the built-in decoder instead.
return this.buildPaintImageXObject({
resources,
image,
isInline,
operatorList,
cacheKey,
imageCache,
forceDisableNativeImageDecoder: true,
});
});
} }
// Creates native image decoder only if a JPEG image or mask is present. // Creates native image decoder only if a JPEG image or mask is present.
@ -460,12 +477,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}); });
} }
// Ensure that the dependency is added before the image is decoded.
operatorList.addDependency(objId);
args = [objId, w, h];
PDFImage.buildImage({ PDFImage.buildImage({
handler: this.handler, handler: this.handler,
xref: this.xref, xref: this.xref,
res: resources, res: resources,
image, image,
isInline: inline, isInline,
nativeDecoder: nativeImageDecoder, nativeDecoder: nativeImageDecoder,
pdfFunctionFactory: this.pdfFunctionFactory, pdfFunctionFactory: this.pdfFunctionFactory,
}).then((imageObj) => { }).then((imageObj) => {
@ -484,6 +505,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
args, args,
}; };
} }
return Promise.resolve();
}, },
handleSMask: function PartialEvaluator_handleSmask(smask, resources, handleSMask: function PartialEvaluator_handleSmask(smask, resources,
@ -989,8 +1011,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, rejectXObject); }, rejectXObject);
return; return;
} else if (type.name === 'Image') { } else if (type.name === 'Image') {
self.buildPaintImageXObject(resources, xobj, false, self.buildPaintImageXObject({
operatorList, name, imageCache); resources,
image: xobj,
operatorList,
cacheKey: name,
imageCache,
}).then(resolveXObject, rejectXObject);
return;
} else if (type.name === 'PS') { } else if (type.name === 'PS') {
// PostScript XObjects are unused when viewing documents. // PostScript XObjects are unused when viewing documents.
// See section 4.7.1 of Adobe's PDF reference. // See section 4.7.1 of Adobe's PDF reference.
@ -1032,10 +1060,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
continue; continue;
} }
} }
self.buildPaintImageXObject(resources, args[0], true, next(self.buildPaintImageXObject({
operatorList, cacheKey, imageCache); resources,
args = null; image: args[0],
continue; isInline: true,
operatorList,
cacheKey,
imageCache,
}));
return;
case OPS.showText: case OPS.showText:
args[0] = self.handleText(args[0], stateManager.state); args[0] = self.handleText(args[0], stateManager.state);
break; break;

View File

@ -27,7 +27,11 @@ var PDFImage = (function PDFImageClosure() {
*/ */
function handleImageData(image, nativeDecoder) { function handleImageData(image, nativeDecoder) {
if (nativeDecoder && nativeDecoder.canDecode(image)) { if (nativeDecoder && nativeDecoder.canDecode(image)) {
return nativeDecoder.decode(image); return nativeDecoder.decode(image).catch((reason) => {
warn('Native image decoding failed -- trying to recover: ' +
(reason && reason.message));
return image;
});
} }
return Promise.resolve(image); return Promise.resolve(image);
} }

View File

@ -28,6 +28,19 @@ let JpegError = (function JpegErrorClosure() {
return JpegError; return JpegError;
})(); })();
let DNLMarkerError = (function DNLMarkerErrorClosure() {
function DNLMarkerError(message, scanLines) {
this.message = message;
this.scanLines = scanLines;
}
DNLMarkerError.prototype = new Error();
DNLMarkerError.prototype.name = 'DNLMarkerError';
DNLMarkerError.constructor = DNLMarkerError;
return DNLMarkerError;
})();
/** /**
* This code was forked from https://github.com/notmasteryet/jpgjs. * This code was forked from https://github.com/notmasteryet/jpgjs.
* The original version was created by GitHub user notmasteryet. * The original version was created by GitHub user notmasteryet.
@ -112,7 +125,8 @@ var JpegImage = (function JpegImageClosure() {
} }
function decodeScan(data, offset, frame, components, resetInterval, function decodeScan(data, offset, frame, components, resetInterval,
spectralStart, spectralEnd, successivePrev, successive) { spectralStart, spectralEnd, successivePrev, successive,
parseDNLMarker = false) {
var mcusPerLine = frame.mcusPerLine; var mcusPerLine = frame.mcusPerLine;
var progressive = frame.progressive; var progressive = frame.progressive;
@ -127,6 +141,14 @@ var JpegImage = (function JpegImageClosure() {
if (bitsData === 0xFF) { if (bitsData === 0xFF) {
var nextByte = data[offset++]; var nextByte = data[offset++];
if (nextByte) { if (nextByte) {
if (nextByte === 0xDC && parseDNLMarker) { // DNL == 0xFFDC
offset += 2; // Skip data length.
const scanLines = (data[offset++] << 8) | data[offset++];
if (scanLines > 0 && scanLines !== frame.scanLines) {
throw new DNLMarkerError(
'Found DNL marker (0xFFDC) while parsing scan data', scanLines);
}
}
throw new JpegError( throw new JpegError(
`unexpected marker ${((bitsData << 8) | nextByte).toString(16)}`); `unexpected marker ${((bitsData << 8) | nextByte).toString(16)}`);
} }
@ -635,7 +657,7 @@ var JpegImage = (function JpegImageClosure() {
} }
JpegImage.prototype = { JpegImage.prototype = {
parse: function parse(data) { parse(data, { dnlScanLines = null, } = {}) {
function readUint16() { function readUint16() {
var value = (data[offset] << 8) | data[offset + 1]; var value = (data[offset] << 8) | data[offset + 1];
@ -685,6 +707,7 @@ var JpegImage = (function JpegImageClosure() {
var jfif = null; var jfif = null;
var adobe = null; var adobe = null;
var frame, resetInterval; var frame, resetInterval;
let numSOSMarkers = 0;
var quantizationTables = []; var quantizationTables = [];
var huffmanTablesAC = [], huffmanTablesDC = []; var huffmanTablesAC = [], huffmanTablesDC = [];
var fileMarker = readUint16(); var fileMarker = readUint16();
@ -781,7 +804,8 @@ var JpegImage = (function JpegImageClosure() {
frame.extended = (fileMarker === 0xFFC1); frame.extended = (fileMarker === 0xFFC1);
frame.progressive = (fileMarker === 0xFFC2); frame.progressive = (fileMarker === 0xFFC2);
frame.precision = data[offset++]; frame.precision = data[offset++];
frame.scanLines = readUint16(); const sofScanLines = readUint16();
frame.scanLines = dnlScanLines || sofScanLines;
frame.samplesPerLine = readUint16(); frame.samplesPerLine = readUint16();
frame.components = []; frame.components = [];
frame.componentIds = {}; frame.componentIds = {};
@ -839,6 +863,12 @@ var JpegImage = (function JpegImageClosure() {
break; break;
case 0xFFDA: // SOS (Start of Scan) case 0xFFDA: // SOS (Start of Scan)
// A DNL marker (0xFFDC), if it exists, is only allowed at the end
// of the first scan segment and may only occur once in an image.
// Furthermore, to prevent an infinite loop, do *not* attempt to
// parse DNL markers during re-parsing of the JPEG scan data.
const parseDNLMarker = (++numSOSMarkers) === 1 && !dnlScanLines;
readUint16(); // scanLength readUint16(); // scanLength
var selectorsCount = data[offset++]; var selectorsCount = data[offset++];
var components = [], component; var components = [], component;
@ -853,11 +883,26 @@ var JpegImage = (function JpegImageClosure() {
var spectralStart = data[offset++]; var spectralStart = data[offset++];
var spectralEnd = data[offset++]; var spectralEnd = data[offset++];
var successiveApproximation = data[offset++]; var successiveApproximation = data[offset++];
var processed = decodeScan(data, offset, try {
frame, components, resetInterval, var processed = decodeScan(data, offset,
spectralStart, spectralEnd, frame, components, resetInterval,
successiveApproximation >> 4, successiveApproximation & 15); spectralStart, spectralEnd,
offset += processed; successiveApproximation >> 4, successiveApproximation & 15,
parseDNLMarker);
offset += processed;
} catch (ex) {
if (ex instanceof DNLMarkerError) {
warn('Attempting to re-parse JPEG image using "scanLines" ' +
'parameter found in DNL marker (0xFFDC) segment.');
return this.parse(data, { dnlScanLines: ex.scanLines, });
}
throw ex;
}
break;
case 0xFFDC: // DNL (Define Number of Lines)
// Ignore the marker, since it's being handled in `decodeScan`.
offset += 4;
break; break;
case 0xFFFF: // Fill bytes case 0xFFFF: // Fill bytes

View File

@ -16,10 +16,9 @@
import { import {
assert, createPromiseCapability, getVerbosityLevel, info, InvalidPDFException, assert, createPromiseCapability, getVerbosityLevel, info, InvalidPDFException,
isArrayBuffer, isSameOrigin, loadJpegStream, MessageHandler, isArrayBuffer, isSameOrigin, MessageHandler, MissingPDFException,
MissingPDFException, NativeImageDecoding, PageViewport, PasswordException, NativeImageDecoding, PageViewport, PasswordException, stringToBytes,
stringToBytes, UnexpectedResponseException, UnknownErrorException, UnexpectedResponseException, UnknownErrorException, unreachable, Util, warn
unreachable, Util, warn
} from '../shared/util'; } from '../shared/util';
import { import {
DOMCanvasFactory, DOMCMapReaderFactory, DummyStatTimer, getDefaultSetting, DOMCanvasFactory, DOMCMapReaderFactory, DummyStatTimer, getDefaultSetting,
@ -1818,8 +1817,22 @@ var WorkerTransport = (function WorkerTransportClosure() {
switch (type) { switch (type) {
case 'JpegStream': case 'JpegStream':
imageData = data[3]; imageData = data[3];
loadJpegStream(id, imageData, pageProxy.objs); return new Promise((resolve, reject) => {
break; const img = new Image();
img.onload = function() {
resolve(img);
};
img.onerror = function() {
reject(new Error('Error during JPEG image loading'));
// Note that when the browser image loading/decoding fails,
// we'll fallback to the built-in PDF.js JPEG decoder; see
// `PartialEvaluator.buildPaintImageXObject` in the
// `src/core/evaluator.js` file.
};
img.src = imageData;
}).then((img) => {
pageProxy.objs.resolve(id, img);
});
case 'Image': case 'Image':
imageData = data[3]; imageData = data[3];
pageProxy.objs.resolve(id, imageData); pageProxy.objs.resolve(id, imageData);

View File

@ -1569,18 +1569,6 @@ MessageHandler.prototype = {
}, },
}; };
function loadJpegStream(id, imageUrl, objs) {
var img = new Image();
img.onload = (function loadJpegStream_onloadClosure() {
objs.resolve(id, img);
});
img.onerror = (function loadJpegStream_onerrorClosure() {
objs.resolve(id, null);
warn('Error during JPEG image loading');
});
img.src = imageUrl;
}
export { export {
FONT_IDENTITY_MATRIX, FONT_IDENTITY_MATRIX,
IDENTITY_MATRIX, IDENTITY_MATRIX,
@ -1632,7 +1620,6 @@ export {
createValidAbsoluteUrl, createValidAbsoluteUrl,
isLittleEndian, isLittleEndian,
isEvalSupported, isEvalSupported,
loadJpegStream,
log2, log2,
readInt8, readInt8,
readUint16, readUint16,

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/1125123/OBW-OVK.pdf

View File

@ -3201,6 +3201,14 @@
"link": true, "link": true,
"type": "eq" "type": "eq"
}, },
{ "id": "issue8614",
"file": "pdfs/issue8614.pdf",
"md5": "7e8b66cf674ac2b79d6b267d0c6f2fa2",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq"
},
{ "id": "bug1108753", { "id": "bug1108753",
"file": "pdfs/bug1108753.pdf", "file": "pdfs/bug1108753.pdf",
"md5": "a7aaf92d55b4602afb0ca3d75198b56b", "md5": "a7aaf92d55b4602afb0ca3d75198b56b",