fixes caching of inline images during parsing.

As described in #5444, the evaluator will perform identity checking of
paintImageMaskXObjects to decide if it can use
paintImageMaskXObjectRepeat instead of paintImageMaskXObjectGroup.

This can only ever work if the entry is a cache hit. However the
previous caching implementation was doing a lazy caching, which would
only consider a image cache worthy if it is repeated.
Only then the repeated instance would be cached.
As a result of this the sequence of identical images A1 A2 A3 A4 would
be seen as A1 A2 A2 A2 by the evaluator, which prevents using the
"repeat" optimization. Also only the last encountered image is cached,
so A1 B1 A2 B2, would stay A1 B1 A2 B2.

The new implementation drops the "lazy" init of the cache. The threshold
for enabling an image to be cached is rather small, so the potential waste
in storage and adler32 calculation is rather low. It also caches any
eligible image by its adler32.

The two example from above would now be A1 A1 A1 A1 and A1 B1 A1 B1
which not only saves temporary storage, but also prevents computing
identical masks over and over again (which is the main performance impact
of #2618)
This commit is contained in:
Fabian Lange 2014-10-26 17:03:44 +01:00
parent d65db7c5ed
commit 970c048d50
2 changed files with 34 additions and 37 deletions

View File

@ -171,7 +171,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
buildPaintImageXObject:
function PartialEvaluator_buildPaintImageXObject(resources, image,
inline, operatorList,
cacheKey, cache) {
cacheKey, imageCache) {
var self = this;
var dict = image.dict;
var w = dict.get('Width', 'W');
@ -209,9 +209,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
args = [imgData];
operatorList.addOp(OPS.paintImageMaskXObject, args);
if (cacheKey) {
cache.key = cacheKey;
cache.fn = OPS.paintImageMaskXObject;
cache.args = args;
imageCache[cacheKey] = {
fn: OPS.paintImageMaskXObject,
args: args
};
}
return;
}
@ -260,9 +261,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintImageXObject, args);
if (cacheKey) {
cache.key = cacheKey;
cache.fn = OPS.paintImageXObject;
cache.args = args;
imageCache[cacheKey] = {
fn: OPS.paintImageXObject,
args: args
};
}
},
@ -656,8 +658,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
// eagerly compile XForm objects
var name = args[0].name;
if (imageCache.key === name) {
operatorList.addOp(imageCache.fn, imageCache.args);
if (imageCache[name] !== undefined) {
operatorList.addOp(imageCache[name].fn, imageCache[name].args);
args = null;
continue;
}
@ -706,10 +708,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, reject);
case OPS.endInlineImage:
var cacheKey = args[0].cacheKey;
if (cacheKey && imageCache.key === cacheKey) {
operatorList.addOp(imageCache.fn, imageCache.args);
args = null;
continue;
if (cacheKey) {
var cacheEntry = imageCache[cacheKey];
if (cacheEntry !== undefined) {
operatorList.addOp(cacheEntry.fn, cacheEntry.args);
args = null;
continue;
}
}
self.buildPaintImageXObject(resources, args[0], true,
operatorList, cacheKey, imageCache);

View File

@ -28,16 +28,14 @@ function isEOF(v) {
return (v === EOF);
}
var MAX_LENGTH_TO_CACHE = 1000;
var Parser = (function ParserClosure() {
function Parser(lexer, allowStreams, xref) {
this.lexer = lexer;
this.allowStreams = allowStreams;
this.xref = xref;
this.imageCache = {
length: 0,
adler32: 0,
stream: null
};
this.imageCache = {};
this.refill();
}
@ -185,34 +183,29 @@ var Parser = (function ParserClosure() {
var length = (stream.pos - 4) - startPos;
var imageStream = stream.makeSubStream(startPos, length, dict);
// trying to cache repeat images, first we are trying to "warm up" caching
// using length, then comparing adler32
var MAX_LENGTH_TO_CACHE = 1000;
var cacheImage = false, adler32;
if (length < MAX_LENGTH_TO_CACHE && this.imageCache.length === length) {
// cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum.
var adler32;
if (length < MAX_LENGTH_TO_CACHE) {
var imageBytes = imageStream.getBytes();
imageStream.reset();
var a = 1;
var b = 0;
for (i = 0, ii = imageBytes.length; i < ii; ++i) {
a = (a + (imageBytes[i] & 0xff)) % 65521;
b = (b + a) % 65521;
// no modulo required in the loop if imageBytes.length < 5552
a += imageBytes[i] & 0xff;
b += a;
}
adler32 = (b << 16) | a;
adler32 = ((b % 65521) << 16) | (a % 65521);
if (this.imageCache.stream && this.imageCache.adler32 === adler32) {
if (this.imageCache.adler32 === adler32) {
this.buf2 = Cmd.get('EI');
this.shift();
this.imageCache.stream.reset();
return this.imageCache.stream;
this.imageCache[adler32].reset();
return this.imageCache[adler32];
}
cacheImage = true;
}
if (!cacheImage && !this.imageCache.stream) {
this.imageCache.length = length;
this.imageCache.stream = null;
}
if (cipherTransform) {
@ -221,10 +214,9 @@ var Parser = (function ParserClosure() {
imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict;
if (cacheImage) {
if (adler32 !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + adler32;
this.imageCache.adler32 = adler32;
this.imageCache.stream = imageStream;
this.imageCache[adler32] = imageStream;
}
this.buf2 = Cmd.get('EI');