Slightly reduce asynchronicity when parsing Annotations

Over time the amount of "document level" data potentially needed during parsing of Annotations have increased a fair bit, which means that we currently need to ensure that a bunch of data is available for each individual Annotation.
Given that this data is "constant" for a PDF document we can instead create (and cache) it lazily, only when needed, *before* starting to parse the Annotations on a page. This way the parsing of individual Annotations should become slightly less asynchronous, which really cannot hurt.

An additional benefit of these changes is that we can reduce the number of parameters that need to be explicitly passed around in the annotation-code, which helps overall readability in my opinion.

One potential drawback of these changes is that the `AnnotationFactory.create` method no longer handles "everything" on its own, however given how few call-sites there are I don't think that's too much of a problem.
This commit is contained in:
Jonas Jenwald 2023-09-07 14:14:35 +02:00
parent 3e32d87be7
commit df9cce39c0
5 changed files with 414 additions and 313 deletions

View File

@ -67,6 +67,35 @@ import { OperatorList } from "./operator_list.js";
import { XFAFactory } from "./xfa/factory.js";
class AnnotationFactory {
static createGlobals(pdfManager) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
pdfManager.ensureDoc("xfaDatasets"),
pdfManager.ensureCatalog("structTreeRoot"),
// Only necessary to prevent the `Catalog.baseUrl`-getter, used
// with some Annotations, from throwing and thus breaking parsing:
pdfManager.ensureCatalog("baseUrl"),
// Only necessary to prevent the `Catalog.attachments`-getter, used
// with "GoToE" actions, from throwing and thus breaking parsing:
pdfManager.ensureCatalog("attachments"),
]).then(
([acroForm, xfaDatasets, structTreeRoot, baseUrl, attachments]) => {
return {
pdfManager,
acroForm: acroForm instanceof Dict ? acroForm : Dict.empty,
xfaDatasets,
structTreeRoot,
baseUrl,
attachments,
};
},
reason => {
warn(`createGlobals: "${reason}".`);
return null;
}
);
}
/**
* Create an `Annotation` object of the correct type for the given reference
* to an annotation dictionary. This yields a promise that is resolved when
@ -74,48 +103,33 @@ class AnnotationFactory {
*
* @param {XRef} xref
* @param {Object} ref
* @param {PDFManager} pdfManager
* @params {Object} annotationGlobals
* @param {Object} idFactory
* @param {boolean} collectFields
* @param {boolean} [collectFields]
* @param {Object} [pageRef]
* @returns {Promise} A promise that is resolved with an {Annotation}
* instance.
*/
static create(xref, ref, pdfManager, idFactory, collectFields, pageRef) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
// Only necessary to prevent the `pdfManager.docBaseUrl`-getter, used
// with certain Annotations, from throwing and thus breaking parsing:
pdfManager.ensureCatalog("baseUrl"),
// Only necessary in the `Catalog.parseDestDictionary`-method,
// when parsing "GoToE" actions:
pdfManager.ensureCatalog("attachments"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
pageRef ? pdfManager.ensureCatalog("structTreeRoot") : null,
]).then(
([
acroForm,
baseUrl,
attachments,
xfaDatasets,
pageIndex,
structTreeRoot,
]) =>
pdfManager.ensure(this, "_create", [
xref,
ref,
pdfManager,
idFactory,
acroForm,
attachments,
xfaDatasets,
collectFields,
pageIndex,
structTreeRoot,
pageRef,
])
);
static async create(
xref,
ref,
annotationGlobals,
idFactory,
collectFields,
pageRef
) {
const pageIndex = collectFields
? await this._getPageIndex(xref, ref, annotationGlobals.pdfManager)
: null;
return annotationGlobals.pdfManager.ensure(this, "_create", [
xref,
ref,
annotationGlobals,
idFactory,
pageIndex,
pageRef,
]);
}
/**
@ -124,14 +138,9 @@ class AnnotationFactory {
static _create(
xref,
ref,
pdfManager,
annotationGlobals,
idFactory,
acroForm,
attachments = null,
xfaDatasets,
collectFields,
pageIndex = -1,
structTreeRoot = null,
pageIndex = null,
pageRef = null
) {
const dict = xref.fetchIfRef(ref);
@ -139,6 +148,7 @@ class AnnotationFactory {
return undefined;
}
const { acroForm, pdfManager } = annotationGlobals;
const id =
ref instanceof Ref ? ref.toString() : `annot_${idFactory.createObjId()}`;
@ -146,8 +156,6 @@ class AnnotationFactory {
let subtype = dict.get("Subtype");
subtype = subtype instanceof Name ? subtype.name : null;
const acroFormDict = acroForm instanceof Dict ? acroForm : Dict.empty;
// Return the right annotation object based on the subtype and field type.
const parameters = {
xref,
@ -155,16 +163,11 @@ class AnnotationFactory {
dict,
subtype,
id,
pdfManager,
acroForm: acroFormDict,
attachments,
xfaDatasets,
collectFields,
annotationGlobals,
needAppearances:
!collectFields && acroFormDict.get("NeedAppearances") === true,
pageIndex === null && acroForm.get("NeedAppearances") === true,
pageIndex,
evaluatorOptions: pdfManager.evaluatorOptions,
structTreeRoot,
pageRef,
};
@ -241,7 +244,7 @@ class AnnotationFactory {
return new FileAttachmentAnnotation(parameters);
default:
if (!collectFields) {
if (pageIndex === null) {
if (!subtype) {
warn("Annotation is missing the required /Subtype.");
} else {
@ -404,6 +407,7 @@ class AnnotationFactory {
}
static async printNewAnnotations(
annotationGlobals,
evaluator,
task,
annotations,
@ -422,18 +426,28 @@ class AnnotationFactory {
switch (annotation.annotationType) {
case AnnotationEditorType.FREETEXT:
promises.push(
FreeTextAnnotation.createNewPrintAnnotation(xref, annotation, {
evaluator,
task,
evaluatorOptions: options,
})
FreeTextAnnotation.createNewPrintAnnotation(
annotationGlobals,
xref,
annotation,
{
evaluator,
task,
evaluatorOptions: options,
}
)
);
break;
case AnnotationEditorType.INK:
promises.push(
InkAnnotation.createNewPrintAnnotation(xref, annotation, {
evaluatorOptions: options,
})
InkAnnotation.createNewPrintAnnotation(
annotationGlobals,
xref,
annotation,
{
evaluatorOptions: options,
}
)
);
break;
case AnnotationEditorType.STAMP:
@ -450,10 +464,15 @@ class AnnotationFactory {
image.imageStream = image.smaskStream = null;
}
promises.push(
StampAnnotation.createNewPrintAnnotation(xref, annotation, {
image,
evaluatorOptions: options,
})
StampAnnotation.createNewPrintAnnotation(
annotationGlobals,
xref,
annotation,
{
image,
evaluatorOptions: options,
}
)
);
break;
}
@ -582,7 +601,7 @@ function getTransformMatrix(rect, bbox, matrix) {
class Annotation {
constructor(params) {
const { dict, xref } = params;
const { dict, xref, annotationGlobals } = params;
this.setTitle(dict.get("T"));
this.setContents(dict.get("Contents"));
@ -610,11 +629,15 @@ class Annotation {
const isLocked = !!(this.flags & AnnotationFlag.LOCKED);
const isContentLocked = !!(this.flags & AnnotationFlag.LOCKEDCONTENTS);
if (params.structTreeRoot) {
if (annotationGlobals.structTreeRoot) {
let structParent = dict.get("StructParent");
structParent =
Number.isInteger(structParent) && structParent >= 0 ? structParent : -1;
params.structTreeRoot.addAnnotationIdToPage(params.pageRef, structParent);
annotationGlobals.structTreeRoot.addAnnotationIdToPage(
params.pageRef,
structParent
);
}
// Expose public properties using a data object.
@ -636,7 +659,7 @@ class Annotation {
noHTML: isLocked && isContentLocked,
};
if (params.collectFields) {
if (params.pageIndex !== null) {
// Fields can act as container for other fields and have
// some actions even if no Annotation inherit from them.
// Those fields can be referenced by CO (calculation order).
@ -767,9 +790,11 @@ class Annotation {
}
setDefaultAppearance(params) {
const { dict, annotationGlobals } = params;
const defaultAppearance =
getInheritableProperty({ dict: params.dict, key: "DA" }) ||
params.acroForm.get("DA");
getInheritableProperty({ dict, key: "DA" }) ||
annotationGlobals.acroForm.get("DA");
this._defaultAppearance =
typeof defaultAppearance === "string" ? defaultAppearance : "";
this.data.defaultAppearanceData = parseDefaultAppearance(
@ -1652,13 +1677,19 @@ class MarkupAnnotation extends Annotation {
return { ref: annotationRef, data: buffer.join("") };
}
static async createNewPrintAnnotation(xref, annotation, params) {
static async createNewPrintAnnotation(
annotationGlobals,
xref,
annotation,
params
) {
const ap = await this.createNewAppearanceStream(annotation, xref, params);
const annotationDict = this.createNewDict(annotation, xref, { ap });
const newAnnotation = new this.prototype.constructor({
dict: annotationDict,
xref,
annotationGlobals,
evaluatorOptions: params.evaluatorOptions,
});
@ -1674,7 +1705,7 @@ class WidgetAnnotation extends Annotation {
constructor(params) {
super(params);
const { dict, xref } = params;
const { dict, xref, annotationGlobals } = params;
const data = this.data;
this._needAppearances = params.needAppearances;
@ -1701,12 +1732,13 @@ class WidgetAnnotation extends Annotation {
});
data.defaultFieldValue = this._decodeFormValue(defaultFieldValue);
if (fieldValue === undefined && params.xfaDatasets) {
if (fieldValue === undefined && annotationGlobals.xfaDatasets) {
// Try to figure out if we have something in the xfa dataset.
const path = this._title.str;
if (path) {
this._hasValueFromXFA = true;
data.fieldValue = fieldValue = params.xfaDatasets.getValue(path);
data.fieldValue = fieldValue =
annotationGlobals.xfaDatasets.getValue(path);
}
}
@ -1729,7 +1761,7 @@ class WidgetAnnotation extends Annotation {
data.fieldType = fieldType instanceof Name ? fieldType.name : null;
const localResources = getInheritableProperty({ dict, key: "DR" });
const acroFormResources = params.acroForm.get("DR");
const acroFormResources = annotationGlobals.acroForm.get("DR");
const appearanceResources = this.appearance?.dict.get("Resources");
this._fieldResources = {
@ -3268,22 +3300,20 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
_processPushButton(params) {
if (
!params.dict.has("A") &&
!params.dict.has("AA") &&
!this.data.alternativeText
) {
const { dict, annotationGlobals } = params;
if (!dict.has("A") && !dict.has("AA") && !this.data.alternativeText) {
warn("Push buttons without action dictionaries are not supported");
return;
}
this.data.isTooltipOnly = !params.dict.has("A") && !params.dict.has("AA");
this.data.isTooltipOnly = !dict.has("A") && !dict.has("AA");
Catalog.parseDestDictionary({
destDict: params.dict,
destDict: dict,
resultObj: this.data,
docBaseUrl: params.pdfManager.docBaseUrl,
docAttachments: params.attachments,
docBaseUrl: annotationGlobals.baseUrl,
docAttachments: annotationGlobals.attachments,
});
}
@ -3641,9 +3671,10 @@ class LinkAnnotation extends Annotation {
constructor(params) {
super(params);
const { dict, annotationGlobals } = params;
this.data.annotationType = AnnotationType.LINK;
const quadPoints = getQuadPoints(params.dict, this.rectangle);
const quadPoints = getQuadPoints(dict, this.rectangle);
if (quadPoints) {
this.data.quadPoints = quadPoints;
}
@ -3652,10 +3683,10 @@ class LinkAnnotation extends Annotation {
this.data.borderColor ||= this.data.color;
Catalog.parseDestDictionary({
destDict: params.dict,
destDict: dict,
resultObj: this.data,
docBaseUrl: params.pdfManager.docBaseUrl,
docAttachments: params.attachments,
docBaseUrl: annotationGlobals.baseUrl,
docAttachments: annotationGlobals.attachments,
});
}
}

View File

@ -310,7 +310,7 @@ class Catalog {
Catalog.parseDestDictionary({
destDict: outlineDict,
resultObj: data,
docBaseUrl: this.pdfManager.docBaseUrl,
docBaseUrl: this.baseUrl,
docAttachments: this.attachments,
});
const title = outlineDict.get("Title");
@ -1405,7 +1405,7 @@ class Catalog {
}
}
}
return shadow(this, "baseUrl", null);
return shadow(this, "baseUrl", this.pdfManager.docBaseUrl);
}
/**

View File

@ -435,9 +435,12 @@ class Page {
let newAnnotationsPromise = Promise.resolve(null);
if (newAnnotationsByPage) {
let imagePromises;
const newAnnotations = newAnnotationsByPage.get(this.pageIndex);
if (newAnnotations) {
const annotationGlobalsPromise =
this.pdfManager.ensureDoc("annotationGlobals");
let imagePromises;
// An annotation can contain a reference to a bitmap, but this bitmap
// is defined in another annotation. So we need to find this annotation
// and generate the bitmap.
@ -476,11 +479,21 @@ class Page {
deletedAnnotations = new RefSet();
this.#replaceIdByRef(newAnnotations, deletedAnnotations, null);
newAnnotationsPromise = AnnotationFactory.printNewAnnotations(
partialEvaluator,
task,
newAnnotations,
imagePromises
newAnnotationsPromise = annotationGlobalsPromise.then(
annotationGlobals => {
if (!annotationGlobals) {
return null;
}
return AnnotationFactory.printNewAnnotations(
annotationGlobals,
partialEvaluator,
task,
newAnnotations,
imagePromises
);
}
);
}
}
@ -672,7 +685,7 @@ class Page {
async getAnnotationsData(handler, task, intent) {
const annotations = await this._parsedAnnotations;
if (annotations.length === 0) {
return [];
return annotations;
}
const annotationsData = [],
@ -732,16 +745,25 @@ class Page {
}
get _parsedAnnotations() {
const parsedAnnotations = this.pdfManager
const promise = this.pdfManager
.ensure(this, "annotations")
.then(() => {
.then(async annots => {
if (annots.length === 0) {
return annots;
}
const annotationGlobals =
await this.pdfManager.ensureDoc("annotationGlobals");
if (!annotationGlobals) {
return [];
}
const annotationPromises = [];
for (const annotationRef of this.annotations) {
for (const annotationRef of annots) {
annotationPromises.push(
AnnotationFactory.create(
this.xref,
annotationRef,
this.pdfManager,
annotationGlobals,
this._localIdFactory,
/* collectFields */ false,
this.ref
@ -752,34 +774,28 @@ class Page {
);
}
return Promise.all(annotationPromises).then(function (annotations) {
if (annotations.length === 0) {
return annotations;
const sortedAnnotations = [];
let popupAnnotations;
// Ensure that PopupAnnotations are handled last, since they depend on
// their parent Annotation in the display layer; fixes issue 11362.
for (const annotation of await Promise.all(annotationPromises)) {
if (!annotation) {
continue;
}
if (annotation instanceof PopupAnnotation) {
(popupAnnotations ||= []).push(annotation);
continue;
}
sortedAnnotations.push(annotation);
}
if (popupAnnotations) {
sortedAnnotations.push(...popupAnnotations);
}
const sortedAnnotations = [];
let popupAnnotations;
// Ensure that PopupAnnotations are handled last, since they depend on
// their parent Annotation in the display layer; fixes issue 11362.
for (const annotation of annotations) {
if (!annotation) {
continue;
}
if (annotation instanceof PopupAnnotation) {
(popupAnnotations ||= []).push(annotation);
continue;
}
sortedAnnotations.push(annotation);
}
if (popupAnnotations) {
sortedAnnotations.push(...popupAnnotations);
}
return sortedAnnotations;
});
return sortedAnnotations;
});
return shadow(this, "_parsedAnnotations", parsedAnnotations);
return shadow(this, "_parsedAnnotations", promise);
}
get jsActions() {
@ -1704,10 +1720,7 @@ class PDFDocument {
: clearGlobalCaches();
}
/**
* @private
*/
_collectFieldObjects(name, fieldRef, promises) {
#collectFieldObjects(name, fieldRef, promises, annotationGlobals) {
const field = this.xref.fetchIfRef(fieldRef);
if (field.has("T")) {
const partName = stringToPDFString(field.get("T"));
@ -1721,22 +1734,21 @@ class PDFDocument {
AnnotationFactory.create(
this.xref,
fieldRef,
this.pdfManager,
annotationGlobals,
this._localIdFactory,
/* collectFields */ true,
/* pageRef */ null
)
.then(annotation => annotation?.getFieldObject())
.catch(function (reason) {
warn(`_collectFieldObjects: "${reason}".`);
warn(`#collectFieldObjects: "${reason}".`);
return null;
})
);
if (field.has("Kids")) {
const kids = field.get("Kids");
for (const kid of kids) {
this._collectFieldObjects(name, kid, promises);
for (const kid of field.get("Kids")) {
this.#collectFieldObjects(name, kid, promises, annotationGlobals);
}
}
}
@ -1746,29 +1758,41 @@ class PDFDocument {
return shadow(this, "fieldObjects", Promise.resolve(null));
}
const allFields = Object.create(null);
const fieldPromises = new Map();
for (const fieldRef of this.catalog.acroForm.get("Fields")) {
this._collectFieldObjects("", fieldRef, fieldPromises);
}
const promise = this.pdfManager
.ensureDoc("annotationGlobals")
.then(async annotationGlobals => {
if (!annotationGlobals) {
return null;
}
const allPromises = [];
for (const [name, promises] of fieldPromises) {
allPromises.push(
Promise.all(promises).then(fields => {
fields = fields.filter(field => !!field);
if (fields.length > 0) {
allFields[name] = fields;
}
})
);
}
const allFields = Object.create(null);
const fieldPromises = new Map();
for (const fieldRef of this.catalog.acroForm.get("Fields")) {
this.#collectFieldObjects(
"",
fieldRef,
fieldPromises,
annotationGlobals
);
}
return shadow(
this,
"fieldObjects",
Promise.all(allPromises).then(() => allFields)
);
const allPromises = [];
for (const [name, promises] of fieldPromises) {
allPromises.push(
Promise.all(promises).then(fields => {
fields = fields.filter(field => !!field);
if (fields.length > 0) {
allFields[name] = fields;
}
})
);
}
await Promise.all(allPromises);
return allFields;
});
return shadow(this, "fieldObjects", promise);
}
get hasJSActions() {
@ -1818,6 +1842,14 @@ class PDFDocument {
}
return shadow(this, "calculationOrderIds", ids);
}
get annotationGlobals() {
return shadow(
this,
"annotationGlobals",
AnnotationFactory.createGlobals(this.pdfManager)
);
}
}
export { Page, PDFDocument };

View File

@ -16,7 +16,6 @@
import {
createValidAbsoluteUrl,
FeatureTest,
shadow,
unreachable,
warn,
} from "../shared/util.js";
@ -62,8 +61,7 @@ class BasePdfManager {
}
get docBaseUrl() {
const catalog = this.pdfDocument.catalog;
return shadow(this, "docBaseUrl", catalog.baseUrl || this._docBaseUrl);
return this._docBaseUrl;
}
ensureDoc(prop, args) {

File diff suppressed because it is too large Load Diff