Improve the *local* image caching in PartialEvaluator.getOperatorList

Currently the local `imageCache`, as used in `PartialEvaluator.getOperatorList`, will miss certain cases of repeated images because the caching is *only* done by name (usually using a format such as e.g. "Im0", "Im1", ...).
However, in some PDF documents the `/XObject` dictionaries many contain hundreds (or even thousands) of distinctly named images, despite them referring to only a handful of actual image objects (via the XRef table).

With these changes we'll now cache *local* images using both name and (where applicable) reference, thus improving re-usage of images resources even further.

This patch was tested using the PDF file from [bug 857031](https://bugzilla.mozilla.org/show_bug.cgi?id=857031), i.e. https://bug857031.bmoattachments.org/attachment.cgi?id=732270, with the following manifest file:
```
[
    {  "id": "bug857031",
       "file": "../web/pdfs/bug857031.pdf",
       "md5": "",
       "rounds": 250,
       "lastPage": 1,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) | +/- |    %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
firefox | 0    | Overall      |   250 |         2749 |        2656 | -93 | -3.38 |        faster
firefox | 0    | Page Request |   250 |            3 |           4 |   1 | 50.14 |        slower
firefox | 0    | Rendering    |   250 |         2746 |        2652 | -94 | -3.44 |        faster
```

While this is certainly an improvement, since we now avoid re-parsing ~1000 images on the first page, all of the image resources are small enough that the total rendering time doesn't improve that much in this particular case.

In pathological cases, such as e.g. the PDF document in issue 4958, the improvements with this patch can be very significant. Looking for example at page 2, from issue 4958, the rendering time drops from ~60 seconds with `master` to ~30 seconds with this patch (obviously still slow, but it really showcases the potential of this patch nicely).

Finally, note that there's also potential for additional improvements by re-using `LocalImageCache` instances for e.g. /XObject data of the `Form`-type. However, given that recent changes in this area I purposely didn't want to complicate *this* patch more than necessary.
This commit is contained in:
Jonas Jenwald 2020-05-23 13:55:31 +02:00
parent 9d38dd4e8b
commit d62c9181bd
2 changed files with 68 additions and 17 deletions

View File

@ -79,6 +79,7 @@ import { DecodeStream } from "./stream.js";
import { getGlyphsUnicode } from "./glyphlist.js";
import { getMetrics } from "./metrics.js";
import { isPDFFunction } from "./function.js";
import { LocalImageCache } from "./image_utils.js";
import { MurmurHash3_64 } from "./murmurhash3.js";
import { OperatorList } from "./operator_list.js";
import { PDFImage } from "./image.js";
@ -444,7 +445,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
isInline = false,
operatorList,
cacheKey,
imageCache,
localImageCache,
}) {
var dict = image.dict;
const imageRef = dict.objId;
@ -491,10 +492,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintImageMaskXObject, args);
if (cacheKey) {
imageCache[cacheKey] = {
localImageCache.set(cacheKey, imageRef, {
fn: OPS.paintImageMaskXObject,
args,
};
});
}
return undefined;
}
@ -598,10 +599,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintImageXObject, args);
if (cacheKey) {
imageCache[cacheKey] = {
localImageCache.set(cacheKey, imageRef, {
fn: OPS.paintImageXObject,
args,
};
});
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
@ -1201,7 +1202,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var self = this;
var xref = this.xref;
let parsingText = false;
var imageCache = Object.create(null);
const localImageCache = new LocalImageCache();
var xobjs = resources.get("XObject") || Dict.empty;
var patterns = resources.get("Pattern") || Dict.empty;
@ -1248,10 +1249,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case OPS.paintXObject:
// eagerly compile XForm objects
var name = args[0].name;
if (name && imageCache[name] !== undefined) {
operatorList.addOp(imageCache[name].fn, imageCache[name].args);
args = null;
continue;
if (name) {
const localImage = localImageCache.getByName(name);
if (localImage) {
operatorList.addOp(localImage.fn, localImage.args);
args = null;
continue;
}
}
next(
@ -1264,11 +1268,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
let xobj = xobjs.getRaw(name);
if (xobj instanceof Ref) {
const localImage = localImageCache.getByRef(xobj);
if (localImage) {
operatorList.addOp(localImage.fn, localImage.args);
resolveXObject();
return;
}
const globalImage = self.globalImageCache.getData(
xobj,
self.pageIndex
);
if (globalImage) {
operatorList.addDependency(globalImage.objId);
operatorList.addOp(globalImage.fn, globalImage.args);
@ -1276,6 +1287,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
resolveXObject();
return;
}
xobj = xref.fetch(xobj);
}
@ -1316,7 +1328,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
image: xobj,
operatorList,
cacheKey: name,
imageCache,
localImageCache,
})
.then(resolveXObject, rejectXObject);
return;
@ -1375,9 +1387,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case OPS.endInlineImage:
var cacheKey = args[0].cacheKey;
if (cacheKey) {
var cacheEntry = imageCache[cacheKey];
if (cacheEntry !== undefined) {
operatorList.addOp(cacheEntry.fn, cacheEntry.args);
const localImage = localImageCache.getByName(cacheKey);
if (localImage) {
operatorList.addOp(localImage.fn, localImage.args);
args = null;
continue;
}
@ -1389,7 +1401,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
isInline: true,
operatorList,
cacheKey,
imageCache,
localImageCache,
})
);
return;

View File

@ -17,6 +17,45 @@
import { assert, info, shadow } from "../shared/util.js";
import { RefSetCache } from "./primitives.js";
class LocalImageCache {
constructor() {
this._nameRefMap = new Map();
this._imageMap = new Map();
this._imageCache = new RefSetCache();
}
getByName(name) {
const ref = this._nameRefMap.get(name);
if (ref) {
return this.getByRef(ref);
}
return this._imageMap.get(name) || null;
}
getByRef(ref) {
return this._imageCache.get(ref) || null;
}
set(name, ref = null, data) {
if (!name) {
throw new Error('LocalImageCache.set - expected "name" argument.');
}
if (ref) {
if (this._imageCache.has(ref)) {
return;
}
this._nameRefMap.set(name, ref);
this._imageCache.put(ref, data);
return;
}
// name
if (this._imageMap.has(name)) {
return;
}
this._imageMap.set(name, data);
}
}
class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() {
return shadow(this, "NUM_PAGES_THRESHOLD", 2);
@ -111,4 +150,4 @@ class GlobalImageCache {
}
}
export { GlobalImageCache };
export { LocalImageCache, GlobalImageCache };