fixes caching of inline images during parsing.

As described in #5444, the evaluator will perform identity checking of
paintImageMaskXObjects to decide if it can use
paintImageMaskXObjectRepeat instead of paintImageMaskXObjectGroup.

This can only ever work if the entry is a cache hit. However the
previous caching implementation was doing a lazy caching, which would
only consider a image cache worthy if it is repeated.
Only then the repeated instance would be cached.
As a result of this the sequence of identical images A1 A2 A3 A4 would
be seen as A1 A2 A2 A2 by the evaluator, which prevents using the
"repeat" optimization. Also only the last encountered image is cached,
so A1 B1 A2 B2, would stay A1 B1 A2 B2.

The new implementation drops the "lazy" init of the cache. The threshold
for enabling an image to be cached is rather small, so the potential waste
in storage and adler32 calculation is rather low. It also caches any
eligible image by its adler32.

The two example from above would now be A1 A1 A1 A1 and A1 B1 A1 B1
which not only saves temporary storage, but also prevents computing
identical masks over and over again (which is the main performance impact
of #2618)
This commit is contained in:
Fabian Lange 2014-10-26 17:03:44 +01:00
parent d65db7c5ed
commit 970c048d50
2 changed files with 34 additions and 37 deletions

View File

@ -171,7 +171,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
buildPaintImageXObject: buildPaintImageXObject:
function PartialEvaluator_buildPaintImageXObject(resources, image, function PartialEvaluator_buildPaintImageXObject(resources, image,
inline, operatorList, inline, operatorList,
cacheKey, cache) { cacheKey, imageCache) {
var self = this; var self = this;
var dict = image.dict; var dict = image.dict;
var w = dict.get('Width', 'W'); var w = dict.get('Width', 'W');
@ -209,9 +209,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
args = [imgData]; args = [imgData];
operatorList.addOp(OPS.paintImageMaskXObject, args); operatorList.addOp(OPS.paintImageMaskXObject, args);
if (cacheKey) { if (cacheKey) {
cache.key = cacheKey; imageCache[cacheKey] = {
cache.fn = OPS.paintImageMaskXObject; fn: OPS.paintImageMaskXObject,
cache.args = args; args: args
};
} }
return; return;
} }
@ -260,9 +261,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintImageXObject, args); operatorList.addOp(OPS.paintImageXObject, args);
if (cacheKey) { if (cacheKey) {
cache.key = cacheKey; imageCache[cacheKey] = {
cache.fn = OPS.paintImageXObject; fn: OPS.paintImageXObject,
cache.args = args; args: args
};
} }
}, },
@ -656,8 +658,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
// eagerly compile XForm objects // eagerly compile XForm objects
var name = args[0].name; var name = args[0].name;
if (imageCache.key === name) { if (imageCache[name] !== undefined) {
operatorList.addOp(imageCache.fn, imageCache.args); operatorList.addOp(imageCache[name].fn, imageCache[name].args);
args = null; args = null;
continue; continue;
} }
@ -706,10 +708,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, reject); }, reject);
case OPS.endInlineImage: case OPS.endInlineImage:
var cacheKey = args[0].cacheKey; var cacheKey = args[0].cacheKey;
if (cacheKey && imageCache.key === cacheKey) { if (cacheKey) {
operatorList.addOp(imageCache.fn, imageCache.args); var cacheEntry = imageCache[cacheKey];
args = null; if (cacheEntry !== undefined) {
continue; operatorList.addOp(cacheEntry.fn, cacheEntry.args);
args = null;
continue;
}
} }
self.buildPaintImageXObject(resources, args[0], true, self.buildPaintImageXObject(resources, args[0], true,
operatorList, cacheKey, imageCache); operatorList, cacheKey, imageCache);

View File

@ -28,16 +28,14 @@ function isEOF(v) {
return (v === EOF); return (v === EOF);
} }
var MAX_LENGTH_TO_CACHE = 1000;
var Parser = (function ParserClosure() { var Parser = (function ParserClosure() {
function Parser(lexer, allowStreams, xref) { function Parser(lexer, allowStreams, xref) {
this.lexer = lexer; this.lexer = lexer;
this.allowStreams = allowStreams; this.allowStreams = allowStreams;
this.xref = xref; this.xref = xref;
this.imageCache = { this.imageCache = {};
length: 0,
adler32: 0,
stream: null
};
this.refill(); this.refill();
} }
@ -185,34 +183,29 @@ var Parser = (function ParserClosure() {
var length = (stream.pos - 4) - startPos; var length = (stream.pos - 4) - startPos;
var imageStream = stream.makeSubStream(startPos, length, dict); var imageStream = stream.makeSubStream(startPos, length, dict);
// trying to cache repeat images, first we are trying to "warm up" caching // cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// using length, then comparing adler32 // adler32 checksum.
var MAX_LENGTH_TO_CACHE = 1000; var adler32;
var cacheImage = false, adler32; if (length < MAX_LENGTH_TO_CACHE) {
if (length < MAX_LENGTH_TO_CACHE && this.imageCache.length === length) {
var imageBytes = imageStream.getBytes(); var imageBytes = imageStream.getBytes();
imageStream.reset(); imageStream.reset();
var a = 1; var a = 1;
var b = 0; var b = 0;
for (i = 0, ii = imageBytes.length; i < ii; ++i) { for (i = 0, ii = imageBytes.length; i < ii; ++i) {
a = (a + (imageBytes[i] & 0xff)) % 65521; // no modulo required in the loop if imageBytes.length < 5552
b = (b + a) % 65521; a += imageBytes[i] & 0xff;
b += a;
} }
adler32 = (b << 16) | a; adler32 = ((b % 65521) << 16) | (a % 65521);
if (this.imageCache.stream && this.imageCache.adler32 === adler32) { if (this.imageCache.adler32 === adler32) {
this.buf2 = Cmd.get('EI'); this.buf2 = Cmd.get('EI');
this.shift(); this.shift();
this.imageCache.stream.reset(); this.imageCache[adler32].reset();
return this.imageCache.stream; return this.imageCache[adler32];
} }
cacheImage = true;
}
if (!cacheImage && !this.imageCache.stream) {
this.imageCache.length = length;
this.imageCache.stream = null;
} }
if (cipherTransform) { if (cipherTransform) {
@ -221,10 +214,9 @@ var Parser = (function ParserClosure() {
imageStream = this.filter(imageStream, dict, length); imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict; imageStream.dict = dict;
if (cacheImage) { if (adler32 !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + adler32; imageStream.cacheKey = 'inline_' + length + '_' + adler32;
this.imageCache.adler32 = adler32; this.imageCache[adler32] = imageStream;
this.imageCache.stream = imageStream;
} }
this.buf2 = Cmd.get('EI'); this.buf2 = Cmd.get('EI');