[Regression] Re-factor the *internal* includeAnnotationStorage handling, since it's currently subtly wrong

*This patch is very similar to the recently fixed `renderInteractiveForms`-options, see PR 13867.*
As far as I can tell, this *subtle* bug has existed ever since `AnnotationStorage`-support was first added in PR 12106 (a little over a year ago).

The value of the `includeAnnotationStorage`-option, as passed to the `PDFPageProxy.render` method, will (potentially) affect the size/content of the operatorList that's returned from the worker (for documents with forms).
Given that operatorLists will generally, unless they contain huge images, be cached in the API, repeated `PDFPageProxy.render` calls where the form-data has been changed by the user in between, can thus *wrongly* return a cached operatorList.

In the viewer we're only using the `includeAnnotationStorage`-option when printing, which is probably why this has gone unnoticed for so long. Note that we, for performance reasons, don't cache printing-operatorLists in the API.
However, there's nothing stopping an API-user from using the `includeAnnotationStorage`-option during "normal" rendering, which could thus result in *subtle* (and difficult to understand) rendering bugs.

In order to handle this, we need to know if the `AnnotationStorage`-instance has been updated since the last `PDFPageProxy.render` call. The most "correct" solution would obviously be to create a hash of the `AnnotationStorage` contents, however that would require adding a bunch of code, complexity, and runtime overhead.
Given that operatorList caching in the API doesn't have to be perfect[1], but only have to avoid *false* cache-hits, we can simplify things significantly be only keeping track of the last time that the `AnnotationStorage`-data was modified.

*Please note:* While working on this patch, I also noticed that the `renderInteractiveForms`- and `includeAnnotationStorage`-options in the `PDFPageProxy.render` method are mutually exclusive.[2]
Given that the various Annotation-related options in `PDFPageProxy.render` have been added at different times, this has unfortunately led to the current "messy" situation.[3]

---
[1] Note how we're already not caching operatorLists for pages with *huge* images, in order to save memory, hence there's no guarantee that operatorLists will always be cached.

[2] Setting both to `true` will result in undefined behaviour, since trying to insert `AnnotationStorage`-values into fields that are being excluded from the operatorList-building will obviously not work, which isn't at all clear from the documentation.

[3] My intention is to try and fix this in a follow-up PR, and I've got a WIP patch locally, however it will result in a number of API-observable changes.
This commit is contained in:
Jonas Jenwald 2021-08-15 19:57:42 +02:00
parent 1465b1670f
commit a7f0301f21
6 changed files with 107 additions and 57 deletions

View File

@ -49,6 +49,7 @@
"unicorn/no-new-buffer": "error",
"unicorn/no-instanceof-array": "error",
"unicorn/no-useless-spread": "error",
"unicorn/prefer-date-now": "error",
"unicorn/prefer-string-starts-ends-with": "error",
// Possible errors

View File

@ -320,7 +320,14 @@ class Page {
});
}
getOperatorList({ handler, sink, task, intent, annotationStorage }) {
getOperatorList({
handler,
sink,
task,
intent,
cacheKey,
annotationStorage = null,
}) {
const contentStreamPromise = this.getContentStream(handler);
const resourcesPromise = this.loadResources([
"ColorSpace",
@ -354,7 +361,7 @@ class Page {
this.nonBlendModesSet
),
pageIndex: this.pageIndex,
intent,
cacheKey,
});
return partialEvaluator
@ -377,7 +384,7 @@ class Page {
pageOpList.flush(true);
return { length: pageOpList.totalLength };
}
const renderForms = !!(intent & RenderingIntentFlag.ANNOTATION_FORMS),
const renderForms = !!(intent & RenderingIntentFlag.ANNOTATIONS_FORMS),
intentAny = !!(intent & RenderingIntentFlag.ANY),
intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
intentPrint = !!(intent & RenderingIntentFlag.PRINT);

View File

@ -688,6 +688,7 @@ class WorkerMessageHandler {
sink,
task,
intent: data.intent,
cacheKey: data.cacheKey,
annotationStorage: data.annotationStorage,
})
.then(

View File

@ -21,6 +21,7 @@ import { objectFromMap } from "../shared/util.js";
class AnnotationStorage {
constructor() {
this._storage = new Map();
this._timeStamp = Date.now();
this._modified = false;
// Callbacks to signal when the modification state is set or reset.
@ -41,8 +42,7 @@ class AnnotationStorage {
* @returns {Object}
*/
getValue(key, defaultValue) {
const obj = this._storage.get(key);
return obj !== undefined ? obj : defaultValue;
return this._storage.get(key) ?? defaultValue;
}
/**
@ -64,10 +64,11 @@ class AnnotationStorage {
}
}
} else {
this._storage.set(key, value);
modified = true;
this._storage.set(key, value);
}
if (modified) {
this._timeStamp = Date.now();
this._setModified();
}
}
@ -108,6 +109,14 @@ class AnnotationStorage {
get serializable() {
return this._storage.size > 0 ? this._storage : null;
}
/**
* PLEASE NOTE: Only intended for usage within the API itself.
* @ignore
*/
get lastModified() {
return this._timeStamp.toString();
}
}
export { AnnotationStorage };

View File

@ -1279,15 +1279,15 @@ class PDFPageProxy {
* {Array} of the annotation objects.
*/
getAnnotations({ intent = "display" } = {}) {
const renderingIntent = this._transport.getRenderingIntent(intent, {});
const intentArgs = this._transport.getRenderingIntent(intent, {});
let promise = this._annotationPromises.get(renderingIntent);
let promise = this._annotationPromises.get(intentArgs.cacheKey);
if (!promise) {
promise = this._transport.getAnnotations(
this._pageIndex,
renderingIntent
intentArgs.renderingIntent
);
this._annotationPromises.set(renderingIntent, promise);
this._annotationPromises.set(intentArgs.cacheKey, promise);
}
return promise;
}
@ -1335,8 +1335,9 @@ class PDFPageProxy {
this._stats.time("Overall");
}
const renderingIntent = this._transport.getRenderingIntent(intent, {
const intentArgs = this._transport.getRenderingIntent(intent, {
renderForms: renderInteractiveForms === true,
includeAnnotationStorage: includeAnnotationStorage === true,
});
// If there was a pending destroy, cancel it so no cleanup happens during
// this call to render.
@ -1346,10 +1347,10 @@ class PDFPageProxy {
optionalContentConfigPromise = this._transport.getOptionalContentConfig();
}
let intentState = this._intentStates.get(renderingIntent);
let intentState = this._intentStates.get(intentArgs.cacheKey);
if (!intentState) {
intentState = Object.create(null);
this._intentStates.set(renderingIntent, intentState);
this._intentStates.set(intentArgs.cacheKey, intentState);
}
// Ensure that a pending `streamReader` cancel timeout is always aborted.
@ -1361,9 +1362,9 @@ class PDFPageProxy {
const canvasFactoryInstance =
canvasFactory ||
new DefaultCanvasFactory({ ownerDocument: this._ownerDocument });
const annotationStorage = includeAnnotationStorage
? this._transport.annotationStorage.serializable
: null;
const intentPrint = !!(
intentArgs.renderingIntent & RenderingIntentFlag.PRINT
);
// If there's no displayReadyCapability yet, then the operatorList
// was never requested before. Make the request and create the promise.
@ -1378,11 +1379,7 @@ class PDFPageProxy {
if (this._stats) {
this._stats.time("Page Request");
}
this._pumpOperatorList({
pageIndex: this._pageIndex,
intent: renderingIntent,
annotationStorage,
});
this._pumpOperatorList(intentArgs);
}
const complete = error => {
@ -1390,10 +1387,7 @@ class PDFPageProxy {
// Attempt to reduce memory usage during *printing*, by always running
// cleanup once rendering has finished (regardless of cleanupAfterRender).
if (
this.cleanupAfterRender ||
renderingIntent & RenderingIntentFlag.PRINT
) {
if (this.cleanupAfterRender || intentPrint) {
this.pendingCleanup = true;
}
this._tryCleanup();
@ -1403,7 +1397,7 @@ class PDFPageProxy {
this._abortOperatorList({
intentState,
reason: error,
reason: error instanceof Error ? error : new Error(error),
});
} else {
internalRenderTask.capability.resolve();
@ -1429,7 +1423,7 @@ class PDFPageProxy {
operatorList: intentState.operatorList,
pageIndex: this._pageIndex,
canvasFactory: canvasFactoryInstance,
useRequestAnimationFrame: !(renderingIntent & RenderingIntentFlag.PRINT),
useRequestAnimationFrame: !intentPrint,
pdfBug: this._pdfBug,
});
@ -1474,13 +1468,13 @@ class PDFPageProxy {
}
}
const renderingIntent = this._transport.getRenderingIntent(intent, {
const intentArgs = this._transport.getRenderingIntent(intent, {
isOpList: true,
});
let intentState = this._intentStates.get(renderingIntent);
let intentState = this._intentStates.get(intentArgs.cacheKey);
if (!intentState) {
intentState = Object.create(null);
this._intentStates.set(renderingIntent, intentState);
this._intentStates.set(intentArgs.cacheKey, intentState);
}
let opListTask;
@ -1498,10 +1492,7 @@ class PDFPageProxy {
if (this._stats) {
this._stats.time("Page Request");
}
this._pumpOperatorList({
pageIndex: this._pageIndex,
intent: renderingIntent,
});
this._pumpOperatorList(intentArgs);
}
return intentState.opListReadCapability.promise;
}
@ -1584,14 +1575,14 @@ class PDFPageProxy {
this._transport.pageCache[this._pageIndex] = null;
const waitOn = [];
for (const [intent, intentState] of this._intentStates) {
for (const intentState of this._intentStates.values()) {
this._abortOperatorList({
intentState,
reason: new Error("Page was destroyed."),
force: true,
});
if (intent & RenderingIntentFlag.OPLIST) {
if (intentState.opListReadCapability) {
// Avoid errors below, since the renderTasks are just stubs.
continue;
}
@ -1649,8 +1640,8 @@ class PDFPageProxy {
/**
* @private
*/
_startRenderPage(transparency, intent) {
const intentState = this._intentStates.get(intent);
_startRenderPage(transparency, cacheKey) {
const intentState = this._intentStates.get(cacheKey);
if (!intentState) {
return; // Rendering was cancelled.
}
@ -1688,19 +1679,32 @@ class PDFPageProxy {
/**
* @private
*/
_pumpOperatorList(args) {
assert(
args.intent,
'PDFPageProxy._pumpOperatorList: Expected "intent" argument.'
);
_pumpOperatorList({ renderingIntent, cacheKey }) {
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
Number.isInteger(renderingIntent) && renderingIntent > 0,
'_pumpOperatorList: Expected valid "renderingIntent" argument.'
);
}
const readableStream = this._transport.messageHandler.sendWithStream(
"GetOperatorList",
args
{
pageIndex: this._pageIndex,
intent: renderingIntent,
cacheKey,
annotationStorage:
renderingIntent & RenderingIntentFlag.ANNOTATIONS_STORAGE
? this._transport.annotationStorage.serializable
: null,
}
);
const reader = readableStream.getReader();
const intentState = this._intentStates.get(args.intent);
const intentState = this._intentStates.get(cacheKey);
intentState.streamReader = reader;
const pump = () => {
@ -1749,11 +1753,15 @@ class PDFPageProxy {
* @private
*/
_abortOperatorList({ intentState, reason, force = false }) {
assert(
reason instanceof Error ||
(typeof reason === "object" && reason !== null),
'PDFPageProxy._abortOperatorList: Expected "reason" argument.'
);
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
reason instanceof Error,
'_abortOperatorList: Expected valid "reason" argument.'
);
}
if (!intentState.streamReader) {
return;
@ -1787,9 +1795,9 @@ class PDFPageProxy {
}
// Remove the current `intentState`, since a cancelled `getOperatorList`
// call on the worker-thread cannot be re-started...
for (const [intent, curIntentState] of this._intentStates) {
for (const [curCacheKey, curIntentState] of this._intentStates) {
if (curIntentState === intentState) {
this._intentStates.delete(intent);
this._intentStates.delete(curCacheKey);
break;
}
}
@ -2333,8 +2341,12 @@ class WorkerTransport {
return shadow(this, "annotationStorage", new AnnotationStorage());
}
getRenderingIntent(intent, { renderForms = false, isOpList = false }) {
getRenderingIntent(
intent,
{ renderForms = false, includeAnnotationStorage = false, isOpList = false }
) {
let renderingIntent = RenderingIntentFlag.DISPLAY; // Default value.
let lastModified = "";
switch (intent) {
case "any":
@ -2350,14 +2362,22 @@ class WorkerTransport {
}
if (renderForms) {
renderingIntent += RenderingIntentFlag.ANNOTATION_FORMS;
renderingIntent += RenderingIntentFlag.ANNOTATIONS_FORMS;
}
if (includeAnnotationStorage) {
renderingIntent += RenderingIntentFlag.ANNOTATIONS_STORAGE;
lastModified = this.annotationStorage.lastModified;
}
if (isOpList) {
renderingIntent += RenderingIntentFlag.OPLIST;
}
return renderingIntent;
return {
renderingIntent,
cacheKey: `${renderingIntent}_${lastModified}`,
};
}
destroy() {
@ -2617,7 +2637,7 @@ class WorkerTransport {
}
const page = this.pageCache[data.pageIndex];
page._startRenderPage(data.transparency, data.intent);
page._startRenderPage(data.transparency, data.cacheKey);
});
messageHandler.on("commonobj", data => {

View File

@ -18,11 +18,23 @@ import "./compatibility.js";
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
const FONT_IDENTITY_MATRIX = [0.001, 0, 0, 0.001, 0, 0];
/**
* Refer to the `WorkerTransport.getRenderingIntent`-method in the API, to see
* how these flags are being used:
* - ANY, DISPLAY, and PRINT are the normal rendering intents, note the
* `PDFPageProxy.{render, getOperatorList, getAnnotations}`-methods.
* - ANNOTATIONS_FORMS, and ANNOTATIONS_STORAGE controls which annotations are
* rendered onto the canvas, note the `renderInteractiveForms`- respectively
* `includeAnnotationStorage`-options in the `PDFPageProxy.render`-method.
* - OPLIST is used with the `PDFPageProxy.getOperatorList`-method, note the
* `OperatorList`-constructor (on the worker-thread).
*/
const RenderingIntentFlag = {
ANY: 0x01,
DISPLAY: 0x02,
PRINT: 0x04,
ANNOTATION_FORMS: 0x20,
ANNOTATIONS_FORMS: 0x10,
ANNOTATIONS_STORAGE: 0x20,
OPLIST: 0x100,
};