Merge pull request #12087 from Snuffleupagus/LocalGStateCache

Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.{getOperatorList, getTextContent}` (issue 2813)
This commit is contained in:
Tim van der Meij 2020-07-17 16:02:45 +02:00 committed by GitHub
commit e63d1ebff5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 180 additions and 54 deletions

View File

@ -76,7 +76,11 @@ import {
import { getTilingPatternIR, Pattern } from "./pattern.js"; import { getTilingPatternIR, Pattern } from "./pattern.js";
import { isPDFFunction, PDFFunctionFactory } from "./function.js"; import { isPDFFunction, PDFFunctionFactory } from "./function.js";
import { Lexer, Parser } from "./parser.js"; import { Lexer, Parser } from "./parser.js";
import { LocalColorSpaceCache, LocalImageCache } from "./image_utils.js"; import {
LocalColorSpaceCache,
LocalGStateCache,
LocalImageCache,
} from "./image_utils.js";
import { bidi } from "./bidi.js"; import { bidi } from "./bidi.js";
import { ColorSpace } from "./colorspace.js"; import { ColorSpace } from "./colorspace.js";
import { DecodeStream } from "./stream.js"; import { DecodeStream } from "./stream.js";
@ -804,14 +808,18 @@ class PartialEvaluator {
throw reason; throw reason;
} }
setGState( async setGState({
resources, resources,
gState, gState,
operatorList, operatorList,
cacheKey,
task, task,
stateManager, stateManager,
localColorSpaceCache localGStateCache,
) { localColorSpaceCache,
}) {
const gStateRef = gState.objId;
let isSimpleGState = true;
// This array holds the converted/processed state data. // This array holds the converted/processed state data.
var gStateObj = []; var gStateObj = [];
var gStateKeys = gState.getKeys(); var gStateKeys = gState.getKeys();
@ -857,6 +865,8 @@ class PartialEvaluator {
break; break;
} }
if (isDict(value)) { if (isDict(value)) {
isSimpleGState = false;
promise = promise.then(() => { promise = promise.then(() => {
return this.handleSMask( return this.handleSMask(
value, value,
@ -901,6 +911,10 @@ class PartialEvaluator {
if (gStateObj.length > 0) { if (gStateObj.length > 0) {
operatorList.addOp(OPS.setGState, [gStateObj]); operatorList.addOp(OPS.setGState, [gStateObj]);
} }
if (isSimpleGState) {
localGStateCache.set(cacheKey, gStateRef, gStateObj);
}
}); });
} }
@ -1221,6 +1235,7 @@ class PartialEvaluator {
let parsingText = false; let parsingText = false;
const localImageCache = new LocalImageCache(); const localImageCache = new LocalImageCache();
const localColorSpaceCache = new LocalColorSpaceCache(); const localColorSpaceCache = new LocalColorSpaceCache();
const localGStateCache = new LocalGStateCache();
var xobjs = resources.get("XObject") || Dict.empty; var xobjs = resources.get("XObject") || Dict.empty;
var patterns = resources.get("Pattern") || Dict.empty; var patterns = resources.get("Pattern") || Dict.empty;
@ -1250,7 +1265,8 @@ class PartialEvaluator {
operation = {}, operation = {},
i, i,
ii, ii,
cs; cs,
name;
while (!(stop = timeSlotManager.check())) { while (!(stop = timeSlotManager.check())) {
// The arguments parsed by read() are used beyond this loop, so we // The arguments parsed by read() are used beyond this loop, so we
// cannot reuse the same array on each iteration. Therefore we pass // cannot reuse the same array on each iteration. Therefore we pass
@ -1266,7 +1282,7 @@ class PartialEvaluator {
switch (fn | 0) { switch (fn | 0) {
case OPS.paintXObject: case OPS.paintXObject:
// eagerly compile XForm objects // eagerly compile XForm objects
var name = args[0].name; name = args[0].name;
if (name) { if (name) {
const localImage = localImageCache.getByName(name); const localImage = localImageCache.getByName(name);
if (localImage) { if (localImage) {
@ -1629,23 +1645,64 @@ class PartialEvaluator {
fn = OPS.shadingFill; fn = OPS.shadingFill;
break; break;
case OPS.setGState: case OPS.setGState:
var dictName = args[0]; name = args[0].name;
var extGState = resources.get("ExtGState"); if (name) {
const localGStateObj = localGStateCache.getByName(name);
if (!isDict(extGState) || !extGState.has(dictName.name)) { if (localGStateObj) {
break; if (localGStateObj.length > 0) {
operatorList.addOp(OPS.setGState, [localGStateObj]);
}
args = null;
continue;
}
} }
var gState = extGState.get(dictName.name);
next( next(
self.setGState( new Promise(function (resolveGState, rejectGState) {
resources, if (!name) {
gState, throw new FormatError("GState must be referred to by name.");
operatorList, }
task,
stateManager, const extGState = resources.get("ExtGState");
localColorSpaceCache if (!(extGState instanceof Dict)) {
) throw new FormatError("ExtGState should be a dictionary.");
}
const gState = extGState.get(name);
// TODO: Attempt to lookup cached GStates by reference as well,
// if and only if there are PDF documents where doing so
// would significantly improve performance.
if (!(gState instanceof Dict)) {
throw new FormatError("GState should be a dictionary.");
}
self
.setGState({
resources,
gState,
operatorList,
cacheKey: name,
task,
stateManager,
localGStateCache,
localColorSpaceCache,
})
.then(resolveGState, rejectGState);
}).catch(function (reason) {
if (reason instanceof AbortException) {
return;
}
if (self.options.ignoreErrors) {
// Error(s) in the ExtGState -- sending unsupported feature
// notification and allow parsing/rendering to continue.
self.handler.send("UnsupportedFeature", {
featureId: UNSUPPORTED_FEATURES.errorExtGState,
});
warn(`getOperatorList - ignoring ExtGState: "${reason}".`);
return;
}
throw reason;
})
); );
return; return;
case OPS.moveTo: case OPS.moveTo:
@ -1767,6 +1824,7 @@ class PartialEvaluator {
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null; var xobjs = null;
const emptyXObjectCache = new LocalImageCache(); const emptyXObjectCache = new LocalImageCache();
const emptyGStateCache = new LocalGStateCache();
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
@ -2339,25 +2397,59 @@ class PartialEvaluator {
); );
return; return;
case OPS.setGState: case OPS.setGState:
flushTextContentItem(); name = args[0].name;
var dictName = args[0]; if (name && emptyGStateCache.getByName(name)) {
var extGState = resources.get("ExtGState"); break;
}
if (!isDict(extGState) || !isName(dictName)) { next(
break; new Promise(function (resolveGState, rejectGState) {
} if (!name) {
var gState = extGState.get(dictName.name); throw new FormatError("GState must be referred to by name.");
if (!isDict(gState)) { }
break;
} const extGState = resources.get("ExtGState");
var gStateFont = gState.get("Font"); if (!(extGState instanceof Dict)) {
if (gStateFont) { throw new FormatError("ExtGState should be a dictionary.");
textState.fontName = null; }
textState.fontSize = gStateFont[1];
next(handleSetFont(null, gStateFont[0])); const gState = extGState.get(name);
return; // TODO: Attempt to lookup cached GStates by reference as well,
} // if and only if there are PDF documents where doing so
break; // would significantly improve performance.
if (!(gState instanceof Dict)) {
throw new FormatError("GState should be a dictionary.");
}
const gStateFont = gState.get("Font");
if (!gStateFont) {
emptyGStateCache.set(name, gState.objId, true);
resolveGState();
return;
}
flushTextContentItem();
textState.fontName = null;
textState.fontSize = gStateFont[1];
handleSetFont(null, gStateFont[0]).then(
resolveGState,
rejectGState
);
}).catch(function (reason) {
if (reason instanceof AbortException) {
return;
}
if (self.options.ignoreErrors) {
// Error(s) in the ExtGState -- allow text-extraction to
// continue.
warn(`getTextContent - ignoring ExtGState: "${reason}".`);
return;
}
throw reason;
})
);
return;
} // switch } // switch
if (textContent.items.length >= sink.desiredSize) { if (textContent.items.length >= sink.desiredSize) {
// Wait for ready, if we reach highWaterMark. // Wait for ready, if we reach highWaterMark.

View File

@ -113,6 +113,27 @@ class LocalFunctionCache extends BaseLocalCache {
} }
} }
class LocalGStateCache extends BaseLocalCache {
set(name, ref = null, data) {
if (!name) {
throw new Error('LocalGStateCache.set - expected "name" argument.');
}
if (ref) {
if (this._imageCache.has(ref)) {
return;
}
this._nameRefMap.set(name, ref);
this._imageCache.put(ref, data);
return;
}
// name
if (this._imageMap.has(name)) {
return;
}
this._imageMap.set(name, data);
}
}
class GlobalImageCache { class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() { static get NUM_PAGES_THRESHOLD() {
return shadow(this, "NUM_PAGES_THRESHOLD", 2); return shadow(this, "NUM_PAGES_THRESHOLD", 2);
@ -210,5 +231,6 @@ export {
LocalImageCache, LocalImageCache,
LocalColorSpaceCache, LocalColorSpaceCache,
LocalFunctionCache, LocalFunctionCache,
LocalGStateCache,
GlobalImageCache, GlobalImageCache,
}; };

View File

@ -242,23 +242,35 @@ describe("evaluator", function () {
); );
}); });
it("should execute if nested commands", function (done) { it("should execute if nested commands", function (done) {
const gState = new Dict();
gState.set("LW", 2);
gState.set("CA", 0.5);
const extGState = new Dict();
extGState.set("GS2", gState);
const resources = new ResourcesMock();
resources.ExtGState = extGState;
var stream = new StringStream("/F2 /GS2 gs 5.711 Tf"); var stream = new StringStream("/F2 /GS2 gs 5.711 Tf");
runOperatorListCheck( runOperatorListCheck(partialEvaluator, stream, resources, function (
partialEvaluator, result
stream, ) {
new ResourcesMock(), expect(result.fnArray.length).toEqual(3);
function (result) { expect(result.fnArray[0]).toEqual(OPS.setGState);
expect(result.fnArray.length).toEqual(3); expect(result.fnArray[1]).toEqual(OPS.dependency);
expect(result.fnArray[0]).toEqual(OPS.setGState); expect(result.fnArray[2]).toEqual(OPS.setFont);
expect(result.fnArray[1]).toEqual(OPS.dependency); expect(result.argsArray.length).toEqual(3);
expect(result.fnArray[2]).toEqual(OPS.setFont); expect(result.argsArray[0]).toEqual([
expect(result.argsArray.length).toEqual(3); [
expect(result.argsArray[0].length).toEqual(1); ["LW", 2],
expect(result.argsArray[1].length).toEqual(1); ["CA", 0.5],
expect(result.argsArray[2].length).toEqual(2); ],
done(); ]);
} expect(result.argsArray[1]).toEqual(["g_font_error"]);
); expect(result.argsArray[2]).toEqual(["g_font_error", 5.711]);
done();
});
}); });
it("should skip if too few arguments", function (done) { it("should skip if too few arguments", function (done) {
var stream = new StringStream("5 d0"); var stream = new StringStream("5 d0");