From 72da2aa1662d6bcefcc7f350b1914cf01983dfc5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 27 Jan 2021 16:56:17 +0100 Subject: [PATCH] Ignore globally cached images in `PartialEvaluator.getTextContent` (PR 11930 follow-up) Given that we'll only cache `/XObject`s of the `Image`-type globally, we can utilize that in `PartialEvaluator.getTextContent` as well. This way, in cases such as e.g. issue 12098, we can avoid having to fetch/parse `/XObject`s that we already know to be `Image`s. This is helpful, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general. Also, skip a redundant `RefSetCache.has` check in the `GlobalImageCache.getData` method. --- src/core/evaluator.js | 9 +++++++++ src/core/image_utils.js | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 017e5ae8a..4c28cc6b6 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2515,6 +2515,15 @@ class PartialEvaluator { return; } + const globalImage = self.globalImageCache.getData( + xobj, + self.pageIndex + ); + if (globalImage) { + resolveXObject(); + return; + } + xobj = xref.fetch(xobj); } diff --git a/src/core/image_utils.js b/src/core/image_utils.js index 5625f5b08..17ae9bf7c 100644 --- a/src/core/image_utils.js +++ b/src/core/image_utils.js @@ -247,13 +247,14 @@ class GlobalImageCache { if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) { return null; } - if (!this._imageCache.has(ref)) { + const imageData = this._imageCache.get(ref); + if (!imageData) { return null; } // Ensure that we keep track of all pages containing the image reference. pageIndexSet.add(pageIndex); - return this._imageCache.get(ref); + return imageData; } setData(ref, data) {