From 9f02cc36d48d349011916ac1878dede898528934 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 14 Dec 2023 21:57:48 +0100 Subject: [PATCH] Attempt to further reduce re-parsing for globally cached images (PR 11912, 16108 follow-up) In PR 11912 we started caching images that occur on multiple pages globally, which improved performance a lot in many PDF documents. However, one slightly annoying limitation of the implementation is the need to re-parse the image once the global-caching threshold has been reached. Previously this was difficult to avoid, since large image-resources will cause cleanup to run on the main-thread after rendering has finished. In PR 16108 we started delaying this cleanup a little bit, to improve performance if a user e.g. zooms and/or rotates the document immediately after rendering completes. Taking those two PRs together, we now have a situation where it's much more likely that the main-thread has "globally used" images cached at the page-level. Hence we can instead attempt to *copy* a locally cached image into the global object-cache on the main-thread and thus reduce unnecessary re-parsing of large/complex global images, which significantly reduces the rendering time in many cases. For the PDF document in issue 11878, the rendering time of *the second page* changes as follows (on my computer): - With the `master`-branch it takes >600 ms to render. - With this patch that goes down to ~50 ms, which is one order of magnitude faster. (Note that all other pages are, as expected, completely unaffected by these changes.) This new main-thread copying is limited to "large" global images, since: - Re-parsing of small images, on the worker-thread, is usually fast enough to not be an issue. - With the delayed cleanup after rendering, it's still not guaranteed that an image is available in a page-level cache on the main-thread. - This forces the worker-thread to wait for the main-thread, which is a pattern that you always want to avoid unless absolutely necessary. --- src/core/evaluator.js | 36 +++++++++++++++++++++++++++++------- src/display/api.js | 32 ++++++++++++++++++++++++++++++-- test/unit/api_spec.js | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index ec363c73d..1a0ac5806 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -771,13 +771,15 @@ class PartialEvaluator { if (this.parsingType3Font) { objId = `${this.idFactory.getDocId()}_type3_${objId}`; - } else if (imageRef) { + } else if (cacheKey && imageRef) { cacheGlobally = this.globalImageCache.shouldCache( imageRef, this.pageIndex ); if (cacheGlobally) { + assert(!isInline, "Cannot cache an inline image globally."); + objId = `${this.idFactory.getDocId()}_${objId}`; } } @@ -785,6 +787,30 @@ class PartialEvaluator { // Ensure that the dependency is added before the image is decoded. operatorList.addDependency(objId); args = [objId, w, h]; + operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); + + // For large images, at least 500x500 in size, that we'll cache globally + // check if the image is still cached locally on the main-thread to avoid + // having to re-parse the image (since that can be slow). + if (cacheGlobally && w * h > 250000) { + const localLength = await this.handler.sendWithPromise("commonobj", [ + objId, + "CopyLocalImage", + { imageRef }, + ]); + + if (localLength) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + optionalContent, + byteSize: 0, // Temporary entry, to avoid `setData` returning early. + }); + this.globalImageCache.addByteSize(imageRef, localLength); + return; + } + } PDFImage.buildImage({ xref: this.xref, @@ -803,11 +829,11 @@ class PartialEvaluator { imgData.dataLen = imgData.bitmap ? imgData.width * imgData.height * 4 : imgData.data.length; + imgData.ref = imageRef; - if (cacheKey && imageRef && cacheGlobally) { + if (cacheGlobally) { this.globalImageCache.addByteSize(imageRef, imgData.dataLen); } - return this._sendImgData(objId, imgData, cacheGlobally); }) .catch(reason => { @@ -816,8 +842,6 @@ class PartialEvaluator { return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); }); - operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); - if (cacheKey) { const cacheData = { fn: OPS.paintImageXObject, @@ -830,8 +854,6 @@ class PartialEvaluator { this._regionalImageCache.set(/* name = */ null, imageRef, cacheData); if (cacheGlobally) { - assert(!isInline, "Cannot cache an inline image globally."); - this.globalImageCache.setData(imageRef, { objId, fn: OPS.paintImageXObject, diff --git a/src/display/api.js b/src/display/api.js index d77830e58..037c7d0da 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2704,11 +2704,11 @@ class WorkerTransport { messageHandler.on("commonobj", ([id, type, exportedData]) => { if (this.destroyed) { - return; // Ignore any pending requests if the worker was terminated. + return null; // Ignore any pending requests if the worker was terminated. } if (this.commonObjs.has(id)) { - return; + return null; } switch (type) { @@ -2750,6 +2750,23 @@ class WorkerTransport { this.commonObjs.resolve(id, font); }); break; + case "CopyLocalImage": + const { imageRef } = exportedData; + assert(imageRef, "The imageRef must be defined."); + + for (const pageProxy of this.#pageCache.values()) { + for (const [, data] of pageProxy.objs) { + if (data.ref !== imageRef) { + continue; + } + if (!data.dataLen) { + return null; + } + this.commonObjs.resolve(id, structuredClone(data)); + return data.dataLen; + } + } + break; case "FontPath": case "Image": case "Pattern": @@ -2758,6 +2775,8 @@ class WorkerTransport { default: throw new Error(`Got unknown common object type ${type}`); } + + return null; }); messageHandler.on("obj", ([id, pageIndex, type, imageData]) => { @@ -3166,6 +3185,15 @@ class RenderTask { * @type {function} */ this.onContinue = null; + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + // For testing purposes. + Object.defineProperty(this, "getOperatorList", { + value: () => { + return this.#internalRenderTask.operatorList; + }, + }); + } } /** diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 2db8a25dd..e0a349506 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -3811,14 +3811,36 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const loadingTask = getDocument( buildGetDocumentParams("issue11878.pdf", { isOffscreenCanvasSupported: false, + pdfBug: true, }) ); const pdfDoc = await loadingTask.promise; - let firstImgData = null; + let checkedCopyLocalImage = false, + firstImgData = null, + firstStatsOverall = null; for (let i = 1; i <= pdfDoc.numPages; i++) { const pdfPage = await pdfDoc.getPage(i); - const opList = await pdfPage.getOperatorList(); + const viewport = pdfPage.getViewport({ scale: 1 }); + + const canvasAndCtx = CanvasFactory.create( + viewport.width, + viewport.height + ); + const renderTask = pdfPage.render({ + canvasContext: canvasAndCtx.context, + viewport, + }); + + await renderTask.promise; + const opList = renderTask.getOperatorList(); + // The canvas is no longer necessary, since we only care about + // the image-data below. + CanvasFactory.destroy(canvasAndCtx); + + const [statsOverall] = pdfPage.stats.times + .filter(time => time.name === "Overall") + .map(time => time.end - time.start); const { commonObjs, objs } = pdfPage; const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject); @@ -3843,6 +3865,7 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) // Ensure that the actual image data is identical for all pages. if (i === 1) { firstImgData = objs.get(objId); + firstStatsOverall = statsOverall; expect(firstImgData.width).toEqual(EXPECTED_WIDTH); expect(firstImgData.height).toEqual(EXPECTED_HEIGHT); @@ -3854,6 +3877,8 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs; const currentImgData = objsPool.get(objId); + expect(currentImgData).not.toBe(firstImgData); + expect(currentImgData.width).toEqual(firstImgData.width); expect(currentImgData.height).toEqual(firstImgData.height); @@ -3866,11 +3891,20 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) return value === firstImgData.data[index]; }) ).toEqual(true); + + if (i === NUM_PAGES_THRESHOLD) { + checkedCopyLocalImage = true; + // Ensure that the image was copied in the main-thread, rather + // than being re-parsed in the worker-thread (which is slower). + expect(statsOverall).toBeLessThan(firstStatsOverall / 5); + } } } + expect(checkedCopyLocalImage).toBeTruthy(); await loadingTask.destroy(); firstImgData = null; + firstStatsOverall = null; }); it("render for printing, with `printAnnotationStorage` set", async function () {