From 1ab6d2c604971f4d98ebd36d85d7bca3088ca4cc Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 25 Jan 2021 15:09:11 +0100 Subject: [PATCH] Improve global image caching for small images (PR 11912 follow-up, issue 12098) When implementing the `GlobalImageCache` functionality I was mostly worried about the effect of *very large* images, hence the maximum number of cached images were purposely kept quite low[1]. However, there's one fairly obvious problem with that approach: In documents with hundreds, or even thousands, of *small* images the `GlobalImageCache` as implemented becomes essentially pointless. Hence this patch, where the `GlobalImageCache`-implementation is changed in the following ways: - We're still guaranteed to be able to cache a *minimum* number of images, set to `10` (similar as before). - If the *total* size of all the cached image data is below a threshold[2], we're allowed to cache additional images. This patch thus *improve*, but doesn't completely fix, issue 12098. Note that that document is created by a *very poor* PDF generator, since every single page contains the *entire* document (with all of its /Resources) and to create the individual pages clipping is used.[3] --- [1] Currently set to `10` images; imagine what would happen to overall memory usage if we encountered e.g. 50 images each 10 MB in size. [2] This value was chosen, somewhat randomly, to be `40` megabytes; basically five times the [maximum individual image size per page](https://github.com/mozilla/pdf.js/blob/6249ef517d3aaacc9aa6c9e1f5377acfaa4bc2a7/src/display/api.js#L2483-L2484). [3] This surely has to be some kind of record w.r.t. how badly PDF generators can mess things up... --- src/core/evaluator.js | 4 ++++ src/core/image_utils.js | 53 ++++++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index df604292b..017e5ae8a 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -609,6 +609,9 @@ class PartialEvaluator { .then(imageObj => { imgData = imageObj.createImageData(/* forceRGBA = */ false); + if (cacheKey && imageRef && cacheGlobally) { + this.globalImageCache.addByteSize(imageRef, imgData.data.length); + } return this._sendImgData(objId, imgData, cacheGlobally); }) .catch(reason => { @@ -633,6 +636,7 @@ class PartialEvaluator { objId, fn: OPS.paintImageXObject, args, + byteSize: 0, // Temporary entry, note `addByteSize` above. }); } } diff --git a/src/core/image_utils.js b/src/core/image_utils.js index 20878e621..5625f5b08 100644 --- a/src/core/image_utils.js +++ b/src/core/image_utils.js @@ -13,7 +13,7 @@ * limitations under the License. */ -import { assert, info, shadow, unreachable } from "../shared/util.js"; +import { assert, shadow, unreachable, warn } from "../shared/util.js"; import { RefSetCache } from "./primitives.js"; class BaseLocalCache { @@ -161,8 +161,12 @@ class GlobalImageCache { return shadow(this, "NUM_PAGES_THRESHOLD", 2); } - static get MAX_IMAGES_TO_CACHE() { - return shadow(this, "MAX_IMAGES_TO_CACHE", 10); + static get MIN_IMAGES_TO_CACHE() { + return shadow(this, "MIN_IMAGES_TO_CACHE", 10); + } + + static get MAX_BYTE_SIZE() { + return shadow(this, "MAX_BYTE_SIZE", /* Forty megabytes = */ 40e6); } constructor() { @@ -179,6 +183,24 @@ class GlobalImageCache { this._imageCache = new RefSetCache(); } + get _byteSize() { + let byteSize = 0; + this._imageCache.forEach(imageData => { + byteSize += imageData.byteSize; + }); + return byteSize; + } + + get _cacheLimitReached() { + if (this._imageCache.size < GlobalImageCache.MIN_IMAGES_TO_CACHE) { + return false; + } + if (this._byteSize < GlobalImageCache.MAX_BYTE_SIZE) { + return false; + } + return true; + } + shouldCache(ref, pageIndex) { const pageIndexSet = this._refCache.get(ref); const numPages = pageIndexSet @@ -188,10 +210,7 @@ class GlobalImageCache { if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) { return false; } - if ( - !this._imageCache.has(ref) && - this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE - ) { + if (!this._imageCache.has(ref) && this._cacheLimitReached) { return false; } return true; @@ -206,6 +225,20 @@ class GlobalImageCache { pageIndexSet.add(pageIndex); } + /** + * PLEASE NOTE: Must be called *after* the `setData` method. + */ + addByteSize(ref, byteSize) { + const imageData = this._imageCache.get(ref); + if (!imageData) { + return; // The image data isn't cached (the limit was reached). + } + if (imageData.byteSize) { + return; // The byte-size has already been set. + } + imageData.byteSize = byteSize; + } + getData(ref, pageIndex) { const pageIndexSet = this._refCache.get(ref); if (!pageIndexSet) { @@ -232,10 +265,8 @@ class GlobalImageCache { if (this._imageCache.has(ref)) { return; } - if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) { - info( - "GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE." - ); + if (this._cacheLimitReached) { + warn("GlobalImageCache.setData - cache limit reached."); return; } this._imageCache.put(ref, data);