diff --git a/src/core/document.js b/src/core/document.js index b3d3de419..f2d80c07b 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -74,6 +74,7 @@ class Page { ref, fontCache, builtInCMapCache, + globalImageCache, pdfFunctionFactory, }) { this.pdfManager = pdfManager; @@ -83,6 +84,7 @@ class Page { this.ref = ref; this.fontCache = fontCache; this.builtInCMapCache = builtInCMapCache; + this.globalImageCache = globalImageCache; this.pdfFunctionFactory = pdfFunctionFactory; this.evaluatorOptions = pdfManager.evaluatorOptions; this.resourcesPromise = null; @@ -261,6 +263,7 @@ class Page { idFactory: this.idFactory, fontCache: this.fontCache, builtInCMapCache: this.builtInCMapCache, + globalImageCache: this.globalImageCache, options: this.evaluatorOptions, pdfFunctionFactory: this.pdfFunctionFactory, }); @@ -354,6 +357,7 @@ class Page { idFactory: this.idFactory, fontCache: this.fontCache, builtInCMapCache: this.builtInCMapCache, + globalImageCache: this.globalImageCache, options: this.evaluatorOptions, pdfFunctionFactory: this.pdfFunctionFactory, }); @@ -816,6 +820,7 @@ class PDFDocument { ref, fontCache: catalog.fontCache, builtInCMapCache: catalog.builtInCMapCache, + globalImageCache: catalog.globalImageCache, pdfFunctionFactory: this.pdfFunctionFactory, }); })); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e02c1fa7b..7bac0edde 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -105,6 +105,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { idFactory, fontCache, builtInCMapCache, + globalImageCache, options = null, pdfFunctionFactory, }) { @@ -114,6 +115,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { this.idFactory = idFactory; this.fontCache = fontCache; this.builtInCMapCache = builtInCMapCache; + this.globalImageCache = globalImageCache; this.options = options || DefaultPartialEvaluatorOptions; this.pdfFunctionFactory = pdfFunctionFactory; this.parsingType3Font = false; @@ -451,6 +453,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { forceDisableNativeImageDecoder = false, }) { var dict = image.dict; + const imageRef = dict.objId; var w = dict.get("Width", "W"); var h = dict.get("Height", "H"); @@ -528,12 +531,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return undefined; } - const nativeImageDecoderSupport = forceDisableNativeImageDecoder + let nativeImageDecoderSupport = forceDisableNativeImageDecoder ? NativeImageDecoding.NONE : this.options.nativeImageDecoderSupport; // If there is no imageMask, create the PDFImage and a lot // of image processing can be done here. - let objId = `img_${this.idFactory.createObjId()}`; + let objId = `img_${this.idFactory.createObjId()}`, + cacheGlobally = false; if (this.parsingType3Font) { assert( @@ -542,6 +546,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { ); objId = `${this.idFactory.getDocId()}_type3res_${objId}`; + } else if (imageRef) { + cacheGlobally = this.globalImageCache.shouldCache( + imageRef, + this.pageIndex + ); + + if (cacheGlobally) { + // Ensure that the image is *completely* decoded on the worker-thread, + // in order to simplify the caching/rendering code on the main-thread. + nativeImageDecoderSupport = NativeImageDecoding.NONE; + + objId = `${this.idFactory.getDocId()}_${objId}`; + } } if ( @@ -566,7 +583,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { image.getIR(this.options.forceDataSchema), ]) .then( - function () { + () => { // Only add the dependency once we know that the native JPEG // decoding succeeded, to ensure that rendering will always // complete. @@ -579,6 +596,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { fn: OPS.paintJpegXObject, args, }; + + if (imageRef) { + this.globalImageCache.addPageIndex(imageRef, this.pageIndex); + } } }, reason => { @@ -639,6 +660,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { [objId, "FontType3Res", imgData], [imgData.data.buffer] ); + } else if (cacheGlobally) { + this.handler.send( + "commonobj", + [objId, "Image", imgData], + [imgData.data.buffer] + ); + return undefined; } this.handler.send( "obj", @@ -656,6 +684,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { "FontType3Res", null, ]); + } else if (cacheGlobally) { + this.handler.send("commonobj", [objId, "Image", null]); + return undefined; } this.handler.send("obj", [objId, this.pageIndex, "Image", null]); return undefined; @@ -674,6 +705,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { fn: OPS.paintImageXObject, args, }; + + if (imageRef) { + this.globalImageCache.addPageIndex(imageRef, this.pageIndex); + + if (cacheGlobally) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + }); + } + } } return undefined; }, @@ -1322,7 +1365,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { ); } - const xobj = xobjs.get(name); + let xobj = xobjs.getRaw(name); + if (xobj instanceof Ref) { + const globalImage = self.globalImageCache.getData( + xobj, + self.pageIndex + ); + + if (globalImage) { + operatorList.addDependency(globalImage.objId); + operatorList.addOp(globalImage.fn, globalImage.args); + + resolveXObject(); + return; + } + xobj = xref.fetch(xobj); + } + if (!xobj) { operatorList.addOp(fn, args); resolveXObject(); diff --git a/src/core/image_utils.js b/src/core/image_utils.js index cdd04fb8d..1a6a8bd87 100644 --- a/src/core/image_utils.js +++ b/src/core/image_utils.js @@ -14,8 +14,10 @@ */ /* eslint no-var: error */ +import { assert, info, shadow } from "../shared/util.js"; import { ColorSpace } from "./colorspace.js"; import { JpegStream } from "./jpeg_stream.js"; +import { RefSetCache } from "./primitives.js"; import { Stream } from "./stream.js"; class NativeImageDecoder { @@ -111,4 +113,96 @@ class NativeImageDecoder { } } -export { NativeImageDecoder }; +class GlobalImageCache { + static get NUM_PAGES_THRESHOLD() { + return shadow(this, "NUM_PAGES_THRESHOLD", 2); + } + + static get MAX_IMAGES_TO_CACHE() { + return shadow(this, "MAX_IMAGES_TO_CACHE", 10); + } + + constructor() { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert( + GlobalImageCache.NUM_PAGES_THRESHOLD > 1, + "GlobalImageCache - invalid NUM_PAGES_THRESHOLD constant." + ); + } + this._refCache = new RefSetCache(); + this._imageCache = new RefSetCache(); + } + + shouldCache(ref, pageIndex) { + const pageIndexSet = this._refCache.get(ref); + const numPages = pageIndexSet + ? pageIndexSet.size + (pageIndexSet.has(pageIndex) ? 0 : 1) + : 1; + + if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) { + return false; + } + if ( + !this._imageCache.has(ref) && + this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE + ) { + return false; + } + return true; + } + + addPageIndex(ref, pageIndex) { + let pageIndexSet = this._refCache.get(ref); + if (!pageIndexSet) { + pageIndexSet = new Set(); + this._refCache.put(ref, pageIndexSet); + } + pageIndexSet.add(pageIndex); + } + + getData(ref, pageIndex) { + if (!this._refCache.has(ref)) { + return null; + } + const pageIndexSet = this._refCache.get(ref); + + if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) { + return null; + } + if (!this._imageCache.has(ref)) { + return null; + } + // Ensure that we keep track of all pages containing the image reference. + pageIndexSet.add(pageIndex); + + return this._imageCache.get(ref); + } + + setData(ref, data) { + if (!this._refCache.has(ref)) { + throw new Error( + 'GlobalImageCache.setData - expected "addPageIndex" to have been called.' + ); + } + if (this._imageCache.has(ref)) { + return; + } + if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) { + info( + "GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE." + ); + return; + } + this._imageCache.put(ref, data); + } + + clear() { + this._refCache.clear(); + this._imageCache.clear(); + } +} + +export { NativeImageDecoder, GlobalImageCache }; diff --git a/src/core/obj.js b/src/core/obj.js index 8d100e4d5..939377633 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -54,6 +54,7 @@ import { } from "./core_utils.js"; import { CipherTransformFactory } from "./crypto.js"; import { ColorSpace } from "./colorspace.js"; +import { GlobalImageCache } from "./image_utils.js"; function fetchDestination(dest) { return isDict(dest) ? dest.get("D") : dest; @@ -71,6 +72,7 @@ class Catalog { this.fontCache = new RefSetCache(); this.builtInCMapCache = new Map(); + this.globalImageCache = new GlobalImageCache(); this.pageKidsCountCache = new RefSetCache(); } @@ -716,6 +718,7 @@ class Catalog { cleanup() { clearPrimitiveCaches(); + this.globalImageCache.clear(); this.pageKidsCountCache.clear(); const promises = []; diff --git a/src/core/primitives.js b/src/core/primitives.js index 8c8727260..f87c73a43 100644 --- a/src/core/primitives.js +++ b/src/core/primitives.js @@ -251,6 +251,10 @@ var RefSetCache = (function RefSetCacheClosure() { } RefSetCache.prototype = { + get size() { + return Object.keys(this.dict).length; + }, + get: function RefSetCache_get(ref) { return this.dict[ref.toString()]; }, diff --git a/src/display/api.js b/src/display/api.js index 5170c5209..a99cffeef 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2288,6 +2288,7 @@ class WorkerTransport { break; case "FontPath": case "FontType3Res": + case "Image": this.commonObjs.resolve(id, exportedData); break; default: diff --git a/src/display/canvas.js b/src/display/canvas.js index cf1cfca42..0cd2108f3 100644 --- a/src/display/canvas.js +++ b/src/display/canvas.js @@ -2114,7 +2114,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) { - const domImage = this.processingType3 + const domImage = objId.startsWith("g_") ? this.commonObjs.get(objId) : this.objs.get(objId); if (!domImage) { @@ -2277,7 +2277,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintImageXObject: function CanvasGraphics_paintImageXObject(objId) { - const imgData = this.processingType3 + const imgData = objId.startsWith("g_") ? this.commonObjs.get(objId) : this.objs.get(objId); if (!imgData) { @@ -2294,7 +2294,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { scaleY, positions ) { - const imgData = this.processingType3 + const imgData = objId.startsWith("g_") ? this.commonObjs.get(objId) : this.objs.get(objId); if (!imgData) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 1543bf0c5..90a5139bf 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -90,6 +90,7 @@ !issue11362.pdf !issue11578_reduced.pdf !issue11651.pdf +!issue11878.pdf !bad-PageLabels.pdf !decodeACSuccessive.pdf !filled-background.pdf diff --git a/test/pdfs/issue11878.pdf b/test/pdfs/issue11878.pdf new file mode 100644 index 000000000..f75ba9dc5 Binary files /dev/null and b/test/pdfs/issue11878.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 27e9ad6cc..b06d8c625 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -23,6 +23,7 @@ import { import { createPromiseCapability, FontType, + ImageKind, InvalidPDFException, MissingPDFException, OPS, @@ -44,6 +45,7 @@ import { PDFWorker, } from "../../src/display/api.js"; import { AutoPrintRegExp } from "../../web/ui_utils.js"; +import { GlobalImageCache } from "../../src/core/image_utils.js"; import { GlobalWorkerOptions } from "../../src/display/worker_options.js"; import { isNodeJS } from "../../src/shared/is_node.js"; import { Metadata } from "../../src/display/metadata.js"; @@ -1928,6 +1930,80 @@ describe("api", function () { }) .catch(done.fail); }); + + it("caches image resources at the document/page level as expected (issue 11878)", async function (done) { + const { NUM_PAGES_THRESHOLD } = GlobalImageCache, + EXPECTED_WIDTH = 2550, + EXPECTED_HEIGHT = 3300; + + const loadingTask = getDocument(buildGetDocumentParams("issue11878.pdf")); + let firstImgData = null; + + try { + const pdfDoc = await loadingTask.promise; + + for (let i = 1; i <= pdfDoc.numPages; i++) { + const pdfPage = await pdfDoc.getPage(i); + const opList = await pdfPage.getOperatorList(); + + const { commonObjs, objs } = pdfPage; + const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject); + const [objId, width, height] = opList.argsArray[imgIndex]; + + if (i < NUM_PAGES_THRESHOLD) { + expect(objId).toEqual(`img_p${i - 1}_1`); + + expect(objs.has(objId)).toEqual(true); + expect(commonObjs.has(objId)).toEqual(false); + } else { + expect(objId).toEqual( + `g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1` + ); + + expect(objs.has(objId)).toEqual(false); + expect(commonObjs.has(objId)).toEqual(true); + } + expect(width).toEqual(EXPECTED_WIDTH); + expect(height).toEqual(EXPECTED_HEIGHT); + + // Ensure that the actual image data is identical for all pages. + if (i === 1) { + firstImgData = objs.get(objId); + + expect(firstImgData.width).toEqual(EXPECTED_WIDTH); + expect(firstImgData.height).toEqual(EXPECTED_HEIGHT); + + expect(firstImgData.kind).toEqual(ImageKind.RGB_24BPP); + expect(firstImgData.data instanceof Uint8ClampedArray).toEqual( + true + ); + expect(firstImgData.data.length).toEqual(25245000); + } else { + const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs; + const currentImgData = objsPool.get(objId); + + expect(currentImgData.width).toEqual(firstImgData.width); + expect(currentImgData.height).toEqual(firstImgData.height); + + expect(currentImgData.kind).toEqual(firstImgData.kind); + expect(currentImgData.data instanceof Uint8ClampedArray).toEqual( + true + ); + expect( + currentImgData.data.every((value, index) => { + return value === firstImgData.data[index]; + }) + ).toEqual(true); + } + } + + await loadingTask.destroy(); + firstImgData = null; + done(); + } catch (ex) { + done.fail(ex); + } + }); }); describe("Multiple `getDocument` instances", function () { // Regression test for https://github.com/mozilla/pdf.js/issues/6205