pdf.js/src/display/annotation_storage.js

207 lines
4.8 KiB
JavaScript
Raw Normal View History

/* Copyright 2020 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { objectFromMap, unreachable } from "../shared/util.js";
import { AnnotationEditor } from "./editor/editor.js";
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
/**
* Key/value storage for annotation data in forms.
*/
class AnnotationStorage {
constructor() {
this._storage = new Map();
this._modified = false;
// Callbacks to signal when the modification state is set or reset.
// This is used by the viewer to only bind on `beforeunload` if forms
// are actually edited to prevent doing so unconditionally since that
// can have undesirable effects.
this.onSetModified = null;
this.onResetModified = null;
}
/**
* Get the value for a given key if it exists, or return the default value.
*
* @public
* @memberof AnnotationStorage
* @param {string} key
* @param {Object} defaultValue
* @returns {Object}
*/
getValue(key, defaultValue) {
const value = this._storage.get(key);
if (value === undefined) {
return defaultValue;
}
return Object.assign(defaultValue, value);
}
/**
* Get the value for a given key.
*
* @public
* @memberof AnnotationStorage
* @param {string} key
* @returns {Object}
*/
getRawValue(key) {
return this._storage.get(key);
}
/**
* Remove a value from the storage.
* @param {string} key
*/
removeKey(key) {
this._storage.delete(key);
if (this._storage.size === 0) {
this.resetModified();
}
}
/**
* Set the value for a given key
*
* @public
* @memberof AnnotationStorage
* @param {string} key
* @param {Object} value
*/
setValue(key, value) {
const obj = this._storage.get(key);
let modified = false;
if (obj !== undefined) {
for (const [entry, val] of Object.entries(value)) {
if (obj[entry] !== val) {
modified = true;
obj[entry] = val;
}
}
} else {
modified = true;
[Regression] Re-factor the *internal* `includeAnnotationStorage` handling, since it's currently subtly wrong *This patch is very similar to the recently fixed `renderInteractiveForms`-options, see PR 13867.* As far as I can tell, this *subtle* bug has existed ever since `AnnotationStorage`-support was first added in PR 12106 (a little over a year ago). The value of the `includeAnnotationStorage`-option, as passed to the `PDFPageProxy.render` method, will (potentially) affect the size/content of the operatorList that's returned from the worker (for documents with forms). Given that operatorLists will generally, unless they contain huge images, be cached in the API, repeated `PDFPageProxy.render` calls where the form-data has been changed by the user in between, can thus *wrongly* return a cached operatorList. In the viewer we're only using the `includeAnnotationStorage`-option when printing, which is probably why this has gone unnoticed for so long. Note that we, for performance reasons, don't cache printing-operatorLists in the API. However, there's nothing stopping an API-user from using the `includeAnnotationStorage`-option during "normal" rendering, which could thus result in *subtle* (and difficult to understand) rendering bugs. In order to handle this, we need to know if the `AnnotationStorage`-instance has been updated since the last `PDFPageProxy.render` call. The most "correct" solution would obviously be to create a hash of the `AnnotationStorage` contents, however that would require adding a bunch of code, complexity, and runtime overhead. Given that operatorList caching in the API doesn't have to be perfect[1], but only have to avoid *false* cache-hits, we can simplify things significantly be only keeping track of the last time that the `AnnotationStorage`-data was modified. *Please note:* While working on this patch, I also noticed that the `renderInteractiveForms`- and `includeAnnotationStorage`-options in the `PDFPageProxy.render` method are mutually exclusive.[2] Given that the various Annotation-related options in `PDFPageProxy.render` have been added at different times, this has unfortunately led to the current "messy" situation.[3] --- [1] Note how we're already not caching operatorLists for pages with *huge* images, in order to save memory, hence there's no guarantee that operatorLists will always be cached. [2] Setting both to `true` will result in undefined behaviour, since trying to insert `AnnotationStorage`-values into fields that are being excluded from the operatorList-building will obviously not work, which isn't at all clear from the documentation. [3] My intention is to try and fix this in a follow-up PR, and I've got a WIP patch locally, however it will result in a number of API-observable changes.
2021-08-16 02:57:42 +09:00
this._storage.set(key, value);
}
if (modified) {
this.#setModified();
}
}
getAll() {
return this._storage.size > 0 ? objectFromMap(this._storage) : null;
}
get size() {
return this._storage.size;
}
#setModified() {
if (!this._modified) {
this._modified = true;
if (typeof this.onSetModified === "function") {
this.onSetModified();
}
}
}
resetModified() {
if (this._modified) {
this._modified = false;
if (typeof this.onResetModified === "function") {
this.onResetModified();
}
}
}
/**
* @returns {PrintAnnotationStorage}
*/
get print() {
return new PrintAnnotationStorage(this);
}
/**
* PLEASE NOTE: Only intended for usage within the API itself.
* @ignore
*/
get serializable() {
if (this._storage.size === 0) {
return null;
}
const clone = new Map();
for (const [key, val] of this._storage) {
const serialized =
val instanceof AnnotationEditor ? val.serialize() : val;
if (serialized) {
clone.set(key, serialized);
}
}
return clone;
}
[Regression] Re-factor the *internal* `includeAnnotationStorage` handling, since it's currently subtly wrong *This patch is very similar to the recently fixed `renderInteractiveForms`-options, see PR 13867.* As far as I can tell, this *subtle* bug has existed ever since `AnnotationStorage`-support was first added in PR 12106 (a little over a year ago). The value of the `includeAnnotationStorage`-option, as passed to the `PDFPageProxy.render` method, will (potentially) affect the size/content of the operatorList that's returned from the worker (for documents with forms). Given that operatorLists will generally, unless they contain huge images, be cached in the API, repeated `PDFPageProxy.render` calls where the form-data has been changed by the user in between, can thus *wrongly* return a cached operatorList. In the viewer we're only using the `includeAnnotationStorage`-option when printing, which is probably why this has gone unnoticed for so long. Note that we, for performance reasons, don't cache printing-operatorLists in the API. However, there's nothing stopping an API-user from using the `includeAnnotationStorage`-option during "normal" rendering, which could thus result in *subtle* (and difficult to understand) rendering bugs. In order to handle this, we need to know if the `AnnotationStorage`-instance has been updated since the last `PDFPageProxy.render` call. The most "correct" solution would obviously be to create a hash of the `AnnotationStorage` contents, however that would require adding a bunch of code, complexity, and runtime overhead. Given that operatorList caching in the API doesn't have to be perfect[1], but only have to avoid *false* cache-hits, we can simplify things significantly be only keeping track of the last time that the `AnnotationStorage`-data was modified. *Please note:* While working on this patch, I also noticed that the `renderInteractiveForms`- and `includeAnnotationStorage`-options in the `PDFPageProxy.render` method are mutually exclusive.[2] Given that the various Annotation-related options in `PDFPageProxy.render` have been added at different times, this has unfortunately led to the current "messy" situation.[3] --- [1] Note how we're already not caching operatorLists for pages with *huge* images, in order to save memory, hence there's no guarantee that operatorLists will always be cached. [2] Setting both to `true` will result in undefined behaviour, since trying to insert `AnnotationStorage`-values into fields that are being excluded from the operatorList-building will obviously not work, which isn't at all clear from the documentation. [3] My intention is to try and fix this in a follow-up PR, and I've got a WIP patch locally, however it will result in a number of API-observable changes.
2021-08-16 02:57:42 +09:00
/**
* PLEASE NOTE: Only intended for usage within the API itself.
* @ignore
*/
static getHash(map) {
if (!map) {
return "";
}
const hash = new MurmurHash3_64();
for (const [key, val] of map) {
hash.update(`${key}:${JSON.stringify(val)}`);
}
return hash.hexdigest();
[Regression] Re-factor the *internal* `includeAnnotationStorage` handling, since it's currently subtly wrong *This patch is very similar to the recently fixed `renderInteractiveForms`-options, see PR 13867.* As far as I can tell, this *subtle* bug has existed ever since `AnnotationStorage`-support was first added in PR 12106 (a little over a year ago). The value of the `includeAnnotationStorage`-option, as passed to the `PDFPageProxy.render` method, will (potentially) affect the size/content of the operatorList that's returned from the worker (for documents with forms). Given that operatorLists will generally, unless they contain huge images, be cached in the API, repeated `PDFPageProxy.render` calls where the form-data has been changed by the user in between, can thus *wrongly* return a cached operatorList. In the viewer we're only using the `includeAnnotationStorage`-option when printing, which is probably why this has gone unnoticed for so long. Note that we, for performance reasons, don't cache printing-operatorLists in the API. However, there's nothing stopping an API-user from using the `includeAnnotationStorage`-option during "normal" rendering, which could thus result in *subtle* (and difficult to understand) rendering bugs. In order to handle this, we need to know if the `AnnotationStorage`-instance has been updated since the last `PDFPageProxy.render` call. The most "correct" solution would obviously be to create a hash of the `AnnotationStorage` contents, however that would require adding a bunch of code, complexity, and runtime overhead. Given that operatorList caching in the API doesn't have to be perfect[1], but only have to avoid *false* cache-hits, we can simplify things significantly be only keeping track of the last time that the `AnnotationStorage`-data was modified. *Please note:* While working on this patch, I also noticed that the `renderInteractiveForms`- and `includeAnnotationStorage`-options in the `PDFPageProxy.render` method are mutually exclusive.[2] Given that the various Annotation-related options in `PDFPageProxy.render` have been added at different times, this has unfortunately led to the current "messy" situation.[3] --- [1] Note how we're already not caching operatorLists for pages with *huge* images, in order to save memory, hence there's no guarantee that operatorLists will always be cached. [2] Setting both to `true` will result in undefined behaviour, since trying to insert `AnnotationStorage`-values into fields that are being excluded from the operatorList-building will obviously not work, which isn't at all clear from the documentation. [3] My intention is to try and fix this in a follow-up PR, and I've got a WIP patch locally, however it will result in a number of API-observable changes.
2021-08-16 02:57:42 +09:00
}
}
/**
* A special `AnnotationStorage` for use during printing, where the serializable
* data is *frozen* upon initialization, to prevent scripting from modifying its
* contents. (Necessary since printing is triggered synchronously in browsers.)
*/
class PrintAnnotationStorage extends AnnotationStorage {
#serializable = null;
constructor(parent) {
super();
// Create a *copy* of the data, since Objects are passed by reference in JS.
this.#serializable = structuredClone(parent.serializable);
}
/**
* @returns {PrintAnnotationStorage}
*/
// eslint-disable-next-line getter-return
get print() {
unreachable("Should not call PrintAnnotationStorage.print");
}
/**
* PLEASE NOTE: Only intended for usage within the API itself.
* @ignore
*/
get serializable() {
return this.#serializable;
}
}
export { AnnotationStorage, PrintAnnotationStorage };