From 970c048d50335658f1646b88acd2bf17d9c88230 Mon Sep 17 00:00:00 2001 From: Fabian Lange Date: Sun, 26 Oct 2014 17:03:44 +0100 Subject: [PATCH] fixes caching of inline images during parsing. As described in #5444, the evaluator will perform identity checking of paintImageMaskXObjects to decide if it can use paintImageMaskXObjectRepeat instead of paintImageMaskXObjectGroup. This can only ever work if the entry is a cache hit. However the previous caching implementation was doing a lazy caching, which would only consider a image cache worthy if it is repeated. Only then the repeated instance would be cached. As a result of this the sequence of identical images A1 A2 A3 A4 would be seen as A1 A2 A2 A2 by the evaluator, which prevents using the "repeat" optimization. Also only the last encountered image is cached, so A1 B1 A2 B2, would stay A1 B1 A2 B2. The new implementation drops the "lazy" init of the cache. The threshold for enabling an image to be cached is rather small, so the potential waste in storage and adler32 calculation is rather low. It also caches any eligible image by its adler32. The two example from above would now be A1 A1 A1 A1 and A1 B1 A1 B1 which not only saves temporary storage, but also prevents computing identical masks over and over again (which is the main performance impact of #2618) --- src/core/evaluator.js | 31 ++++++++++++++++++------------- src/core/parser.js | 40 ++++++++++++++++------------------------ 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 753d248c8..fb0d746d1 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -171,7 +171,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { buildPaintImageXObject: function PartialEvaluator_buildPaintImageXObject(resources, image, inline, operatorList, - cacheKey, cache) { + cacheKey, imageCache) { var self = this; var dict = image.dict; var w = dict.get('Width', 'W'); @@ -209,9 +209,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { args = [imgData]; operatorList.addOp(OPS.paintImageMaskXObject, args); if (cacheKey) { - cache.key = cacheKey; - cache.fn = OPS.paintImageMaskXObject; - cache.args = args; + imageCache[cacheKey] = { + fn: OPS.paintImageMaskXObject, + args: args + }; } return; } @@ -260,9 +261,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addOp(OPS.paintImageXObject, args); if (cacheKey) { - cache.key = cacheKey; - cache.fn = OPS.paintImageXObject; - cache.args = args; + imageCache[cacheKey] = { + fn: OPS.paintImageXObject, + args: args + }; } }, @@ -656,8 +658,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } // eagerly compile XForm objects var name = args[0].name; - if (imageCache.key === name) { - operatorList.addOp(imageCache.fn, imageCache.args); + if (imageCache[name] !== undefined) { + operatorList.addOp(imageCache[name].fn, imageCache[name].args); args = null; continue; } @@ -706,10 +708,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, reject); case OPS.endInlineImage: var cacheKey = args[0].cacheKey; - if (cacheKey && imageCache.key === cacheKey) { - operatorList.addOp(imageCache.fn, imageCache.args); - args = null; - continue; + if (cacheKey) { + var cacheEntry = imageCache[cacheKey]; + if (cacheEntry !== undefined) { + operatorList.addOp(cacheEntry.fn, cacheEntry.args); + args = null; + continue; + } } self.buildPaintImageXObject(resources, args[0], true, operatorList, cacheKey, imageCache); diff --git a/src/core/parser.js b/src/core/parser.js index 73eb75622..f039ac3bb 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -28,16 +28,14 @@ function isEOF(v) { return (v === EOF); } +var MAX_LENGTH_TO_CACHE = 1000; + var Parser = (function ParserClosure() { function Parser(lexer, allowStreams, xref) { this.lexer = lexer; this.allowStreams = allowStreams; this.xref = xref; - this.imageCache = { - length: 0, - adler32: 0, - stream: null - }; + this.imageCache = {}; this.refill(); } @@ -185,34 +183,29 @@ var Parser = (function ParserClosure() { var length = (stream.pos - 4) - startPos; var imageStream = stream.makeSubStream(startPos, length, dict); - // trying to cache repeat images, first we are trying to "warm up" caching - // using length, then comparing adler32 - var MAX_LENGTH_TO_CACHE = 1000; - var cacheImage = false, adler32; - if (length < MAX_LENGTH_TO_CACHE && this.imageCache.length === length) { + // cache all images below the MAX_LENGTH_TO_CACHE threshold by their + // adler32 checksum. + var adler32; + if (length < MAX_LENGTH_TO_CACHE) { var imageBytes = imageStream.getBytes(); imageStream.reset(); var a = 1; var b = 0; for (i = 0, ii = imageBytes.length; i < ii; ++i) { - a = (a + (imageBytes[i] & 0xff)) % 65521; - b = (b + a) % 65521; + // no modulo required in the loop if imageBytes.length < 5552 + a += imageBytes[i] & 0xff; + b += a; } - adler32 = (b << 16) | a; + adler32 = ((b % 65521) << 16) | (a % 65521); - if (this.imageCache.stream && this.imageCache.adler32 === adler32) { + if (this.imageCache.adler32 === adler32) { this.buf2 = Cmd.get('EI'); this.shift(); - this.imageCache.stream.reset(); - return this.imageCache.stream; + this.imageCache[adler32].reset(); + return this.imageCache[adler32]; } - cacheImage = true; - } - if (!cacheImage && !this.imageCache.stream) { - this.imageCache.length = length; - this.imageCache.stream = null; } if (cipherTransform) { @@ -221,10 +214,9 @@ var Parser = (function ParserClosure() { imageStream = this.filter(imageStream, dict, length); imageStream.dict = dict; - if (cacheImage) { + if (adler32 !== undefined) { imageStream.cacheKey = 'inline_' + length + '_' + adler32; - this.imageCache.adler32 = adler32; - this.imageCache.stream = imageStream; + this.imageCache[adler32] = imageStream; } this.buf2 = Cmd.get('EI');