Attempt to also cache images at the "page"-level (issue 16263)

Currently we have two separate image-caches on the worker-thread:
 - A local one, which is unique to each `PartialEvaluator.getOperatorList` invocation. This one caches both names *and* references, since image-resources may be accessed in either way.
 - A global one, which applies to the entire PDF documents and all its pages. This one only caches references, since nothing else would work.

This patch introduces a third image-cache, which essentially sits "between" the two existing ones. The new `RegionalImageCache`[1] will be usable throughout a `PartialEvaluator` instance, and consequently it *only* caches references, which thus allows us to keep track of repeated image-resources found in e.g. different /Form and /SMask objects.

---
[1] For lack of a better word, since naming things is hard...
This commit is contained in:
Jonas Jenwald 2023-04-10 11:00:35 +02:00
parent 195db2cff5
commit 9881dbf927
5 changed files with 63 additions and 9 deletions

View File

@ -59,6 +59,7 @@ import {
LocalGStateCache, LocalGStateCache,
LocalImageCache, LocalImageCache,
LocalTilingPatternCache, LocalTilingPatternCache,
RegionalImageCache,
} from "./image_utils.js"; } from "./image_utils.js";
import { NullStream, Stream } from "./stream.js"; import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js"; import { BaseStream } from "./base_stream.js";
@ -229,6 +230,7 @@ class PartialEvaluator {
this.options = options || DefaultPartialEvaluatorOptions; this.options = options || DefaultPartialEvaluatorOptions;
this.parsingType3Font = false; this.parsingType3Font = false;
this._regionalImageCache = new RegionalImageCache();
this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this); this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this);
ImageResizer.setMaxArea(this.options.canvasMaxAreaInBytes); ImageResizer.setMaxArea(this.options.canvasMaxAreaInBytes);
} }
@ -635,11 +637,20 @@ class PartialEvaluator {
); );
if (cacheKey) { if (cacheKey) {
localImageCache.set(cacheKey, imageRef, { const cacheData = {
fn: OPS.paintImageMaskXObject, fn: OPS.paintImageMaskXObject,
args, args,
optionalContent, optionalContent,
}); };
localImageCache.set(cacheKey, imageRef, cacheData);
if (imageRef) {
this._regionalImageCache.set(
/* name = */ null,
imageRef,
cacheData
);
}
} }
return; return;
} }
@ -664,11 +675,20 @@ class PartialEvaluator {
); );
if (cacheKey) { if (cacheKey) {
localImageCache.set(cacheKey, imageRef, { const cacheData = {
fn: OPS.paintSolidColorImageMask, fn: OPS.paintSolidColorImageMask,
args: [], args: [],
optionalContent, optionalContent,
}); };
localImageCache.set(cacheKey, imageRef, cacheData);
if (imageRef) {
this._regionalImageCache.set(
/* name = */ null,
imageRef,
cacheData
);
}
} }
return; return;
} }
@ -693,11 +713,16 @@ class PartialEvaluator {
); );
if (cacheKey) { if (cacheKey) {
localImageCache.set(cacheKey, imageRef, { const cacheData = {
fn: OPS.paintImageMaskXObject, fn: OPS.paintImageMaskXObject,
args, args,
optionalContent, optionalContent,
}); };
localImageCache.set(cacheKey, imageRef, cacheData);
if (imageRef) {
this._regionalImageCache.set(/* name = */ null, imageRef, cacheData);
}
} }
return; return;
} }
@ -790,13 +815,16 @@ class PartialEvaluator {
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);
if (cacheKey) { if (cacheKey) {
localImageCache.set(cacheKey, imageRef, { const cacheData = {
fn: OPS.paintImageXObject, fn: OPS.paintImageXObject,
args, args,
optionalContent, optionalContent,
}); };
localImageCache.set(cacheKey, imageRef, cacheData);
if (imageRef) { if (imageRef) {
this._regionalImageCache.set(/* name = */ null, imageRef, cacheData);
assert(!isInline, "Cannot cache an inline image globally."); assert(!isInline, "Cannot cache an inline image globally.");
this.globalImageCache.addPageIndex(imageRef, this.pageIndex); this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
@ -1723,7 +1751,9 @@ class PartialEvaluator {
let xobj = xobjs.getRaw(name); let xobj = xobjs.getRaw(name);
if (xobj instanceof Ref) { if (xobj instanceof Ref) {
const localImage = localImageCache.getByRef(xobj); const localImage =
localImageCache.getByRef(xobj) ||
self._regionalImageCache.getByRef(xobj);
if (localImage) { if (localImage) {
operatorList.addImageOps( operatorList.addImageOps(
localImage.fn, localImage.fn,

View File

@ -156,6 +156,22 @@ class LocalTilingPatternCache extends BaseLocalCache {
} }
} }
class RegionalImageCache extends BaseLocalCache {
constructor(options) {
super({ onlyRefs: true });
}
set(name = null, ref, data) {
if (!ref) {
throw new Error('RegionalImageCache.set - expected "ref" argument.');
}
if (this._imageCache.has(ref)) {
return;
}
this._imageCache.put(ref, data);
}
}
class GlobalImageCache { class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() { static get NUM_PAGES_THRESHOLD() {
return shadow(this, "NUM_PAGES_THRESHOLD", 2); return shadow(this, "NUM_PAGES_THRESHOLD", 2);
@ -288,4 +304,5 @@ export {
LocalGStateCache, LocalGStateCache,
LocalImageCache, LocalImageCache,
LocalTilingPatternCache, LocalTilingPatternCache,
RegionalImageCache,
}; };

View File

@ -225,6 +225,7 @@
!issue840.pdf !issue840.pdf
!160F-2019.pdf !160F-2019.pdf
!issue4402_reduced.pdf !issue4402_reduced.pdf
!issue16263.pdf
!issue845r.pdf !issue845r.pdf
!issue3405r.pdf !issue3405r.pdf
!issue14130.pdf !issue14130.pdf

BIN
test/pdfs/issue16263.pdf Normal file

Binary file not shown.

View File

@ -4721,6 +4721,12 @@
"link": false, "link": false,
"type": "eq" "type": "eq"
}, },
{ "id": "issue16263",
"file": "pdfs/issue16263.pdf",
"md5": "93c3e7884f1f2d072442898127e0a6fd",
"rounds": 1,
"type": "eq"
},
{ "id": "issue2006", { "id": "issue2006",
"file": "pdfs/issue2006.pdf", "file": "pdfs/issue2006.pdf",
"md5": "71ec73831ece9b508ad20efa6ff28642", "md5": "71ec73831ece9b508ad20efa6ff28642",