From be604bd195b18320bc8b73c0e3e2e47b75a444b5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 11 Apr 2019 12:26:15 +0200 Subject: [PATCH] Support (rare) Type3 fonts which contains image resources (issue 10717) The Type3 font type is not commonly used in PDF documents, as can be seen from telemetry data such as: https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&end_date=2019-04-09&include_spill=0&keys=__none__!__none__!__none__&max_channel_version=nightly%252F68&measure=PDF_VIEWER_FONT_TYPES&min_channel_version=nightly%252F57&processType=*&product=Firefox&sanitize=1&sort_by_value=0&sort_keys=submissions&start_date=2019-03-18&table=0&trim=1&use_submission_date=0 (see also https://github.com/mozilla/pdf.js/wiki/Enumeration-Assignments-for-the-Telemetry-Histograms#pdf_viewer_font_types). Type3 fonts containing image resources are *very* rare in practice, usually they only contain path rendering operators, but as the issue shows they unfortunately do exist. Currently these Type3-related image resources are not handled in any special way, and given that fonts are document rather than page specific rendering breaks since the image resources are thus not available to the *entire* document. Fortunately fixing this isn't too difficult, but it does require adding a couple of Type3-specific code-paths to the `PartialEvaluator`. In order to keep the implementation simple, particularily on the main-thread, these Type3 image resources are completely decoded on the worker-thread to avoid adding too many special cases. This should not cause any issues, only marginally less efficient code, but given how rare this kind of Type3 font is adding premature optimizations didn't seem at all warranted at this point. --- src/core/evaluator.js | 50 ++++++++++++++++++++++++++++------- src/display/api.js | 1 + src/display/canvas.js | 16 +++++------ test/pdfs/issue10717.pdf.link | 1 + test/test_manifest.json | 10 +++++++ 5 files changed, 60 insertions(+), 18 deletions(-) create mode 100644 test/pdfs/issue10717.pdf.link diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 2376799c3..d21e27c0b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -73,6 +73,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { this.builtInCMapCache = builtInCMapCache; this.options = options || DefaultPartialEvaluatorOptions; this.pdfFunctionFactory = pdfFunctionFactory; + this.parsingType3Font = false; this.fetchBuiltInCMap = async (name) => { if (this.builtInCMapCache.has(name)) { @@ -293,21 +294,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); }, - buildPaintImageXObject({ resources, image, isInline = false, operatorList, - cacheKey, imageCache, - forceDisableNativeImageDecoder = false, }) { + async buildPaintImageXObject({ resources, image, isInline = false, + operatorList, cacheKey, imageCache, + forceDisableNativeImageDecoder = false, }) { var dict = image.dict; var w = dict.get('Width', 'W'); var h = dict.get('Height', 'H'); if (!(w && isNum(w)) || !(h && isNum(h))) { warn('Image dimensions are missing, or not numbers.'); - return Promise.resolve(); + return; } var maxImageSize = this.options.maxImageSize; if (maxImageSize !== -1 && w * h > maxImageSize) { warn('Image exceeded maximum allowed size and was removed.'); - return Promise.resolve(); + return; } var imageMask = (dict.get('ImageMask', 'IM') || false); @@ -343,7 +344,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args, }; } - return Promise.resolve(); + return; } var softMask = (dict.get('SMask', 'SM') || false); @@ -364,14 +365,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // any other kind. imgData = imageObj.createImageData(/* forceRGBA = */ true); operatorList.addOp(OPS.paintInlineImageXObject, [imgData]); - return Promise.resolve(); + return; } const nativeImageDecoderSupport = forceDisableNativeImageDecoder ? NativeImageDecoding.NONE : this.options.nativeImageDecoderSupport; // If there is no imageMask, create the PDFImage and a lot // of image processing can be done here. - var objId = 'img_' + this.idFactory.createObjId(); + let objId = 'img_' + this.idFactory.createObjId(); + + if (this.parsingType3Font) { + assert(nativeImageDecoderSupport === NativeImageDecoding.NONE, + 'Type3 image resources should be completely decoded in the worker.'); + + objId = `g_${this.pdfManager.docId}_type3res_${objId}`; + } if (nativeImageDecoderSupport !== NativeImageDecoding.NONE && !softMask && !mask && image instanceof JpegStream && @@ -428,7 +436,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addDependency(objId); args = [objId, w, h]; - PDFImage.buildImage({ + const imgPromise = PDFImage.buildImage({ handler: this.handler, xref: this.xref, res: resources, @@ -438,13 +446,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { pdfFunctionFactory: this.pdfFunctionFactory, }).then((imageObj) => { var imgData = imageObj.createImageData(/* forceRGBA = */ false); + + if (this.parsingType3Font) { + return this.handler.sendWithPromise('commonobj', + [objId, 'FontType3Res', imgData], [imgData.data.buffer]); + } this.handler.send('obj', [objId, this.pageIndex, 'Image', imgData], [imgData.data.buffer]); }).catch((reason) => { warn('Unable to decode image: ' + reason); + + if (this.parsingType3Font) { + return this.handler.sendWithPromise('commonobj', + [objId, 'FontType3Res', null]); + } this.handler.send('obj', [objId, this.pageIndex, 'Image', null]); }); + if (this.parsingType3Font) { + // In the very rare case where a Type3 image resource is being parsed, + // wait for the image to be both decoded *and* sent to simplify the + // rendering code on the main-thread (see issue10717.pdf). + await imgPromise; + } + operatorList.addOp(OPS.paintImageXObject, args); if (cacheKey) { imageCache[cacheKey] = { @@ -452,7 +477,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args, }; } - return Promise.resolve(); }, handleSMask: function PartialEvaluator_handleSmask(smask, resources, @@ -2622,9 +2646,15 @@ var TranslatedFont = (function TranslatedFontClosure() { // When parsing Type3 glyphs, always ignore them if there are errors. // Compared to the parsing of e.g. an entire page, it doesn't really // make sense to only be able to render a Type3 glyph partially. + // + // Also, ensure that any Type3 image resources (which should be very rare + // in practice) are completely decoded on the worker-thread, to simplify + // the rendering code on the main-thread (see issue10717.pdf). var type3Options = Object.create(evaluator.options); type3Options.ignoreErrors = false; + type3Options.nativeImageDecoderSupport = NativeImageDecoding.NONE; var type3Evaluator = evaluator.clone(type3Options); + type3Evaluator.parsingType3Font = true; var translatedFont = this.font; var loadCharProcsPromise = Promise.resolve(); diff --git a/src/display/api.js b/src/display/api.js index e73046361..95fe99f42 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2003,6 +2003,7 @@ class WorkerTransport { }); break; case 'FontPath': + case 'FontType3Res': this.commonObjs.resolve(id, exportedData); break; default: diff --git a/src/display/canvas.js b/src/display/canvas.js index 1531e0534..89aec6261 100644 --- a/src/display/canvas.js +++ b/src/display/canvas.js @@ -805,11 +805,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { if (fnId !== OPS.dependency) { this[fnId].apply(this, argsArray[i]); } else { - var deps = argsArray[i]; - for (var n = 0, nn = deps.length; n < nn; n++) { - var depObjId = deps[n]; - var common = depObjId[0] === 'g' && depObjId[1] === '_'; - var objsPool = common ? commonObjs : objs; + for (const depObjId of argsArray[i]) { + const objsPool = depObjId.startsWith('g_') ? commonObjs : objs; // If the promise isn't resolved yet, add the continueCallback // to the promise and bail out. @@ -1930,7 +1927,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) { - var domImage = this.objs.get(objId); + const domImage = this.processingType3 ? this.commonObjs.get(objId) : + this.objs.get(objId); if (!domImage) { warn('Dependent image isn\'t ready yet'); return; @@ -2067,7 +2065,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintImageXObject: function CanvasGraphics_paintImageXObject(objId) { - var imgData = this.objs.get(objId); + const imgData = this.processingType3 ? this.commonObjs.get(objId) : + this.objs.get(objId); if (!imgData) { warn('Dependent image isn\'t ready yet'); return; @@ -2079,7 +2078,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { paintImageXObjectRepeat: function CanvasGraphics_paintImageXObjectRepeat(objId, scaleX, scaleY, positions) { - var imgData = this.objs.get(objId); + const imgData = this.processingType3 ? this.commonObjs.get(objId) : + this.objs.get(objId); if (!imgData) { warn('Dependent image isn\'t ready yet'); return; diff --git a/test/pdfs/issue10717.pdf.link b/test/pdfs/issue10717.pdf.link new file mode 100644 index 000000000..6ac762df4 --- /dev/null +++ b/test/pdfs/issue10717.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3057353/test.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index c6051b8f4..3b8b7b6cc 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1252,6 +1252,16 @@ "rounds": 1, "type": "text" }, + { "id": "issue10717", + "file": "pdfs/issue10717.pdf", + "md5": "6d2ed03db798cc6beb3c7bdf103f5c1a", + "link": true, + "rounds": 1, + "firstPage": 1, + "lastPage": 2, + "type": "eq", + "about": "Type3 fonts with image resources; both pages need to be tested, otherwise the bug won't manifest." + }, { "id": "close-path-bug", "file": "pdfs/close-path-bug.pdf", "md5": "48dd17ef58393857d2d038d33699cac5",