Don't store complex data in PDFDocument.formInfo, and replace the fields object with a hasFields boolean instead

*This patch is based on a couple of smaller things that I noticed when working on PR 12479.*

 - Don't store the /Fields on the `formInfo` getter, since that feels like overloading it with unintended (and too complex) data, and utilize a `hasFields` boolean instead.
   This functionality was originally added in PR 12271, to help determine what kind of form data a PDF document contains, and I think that we should ensure that the return value of `formInfo` only consists of "simple" data.
   With these changes the `fieldObjects` getter instead has to look-up the /Fields manually, however that shouldn't be a problem since the access is guarded by a `formInfo.hasFields` check which ensures that the data both exists and is valid. Furthermore, most documents doesn't even have any /AcroForm data anyway.

 - Determine the `hasFields` property *first*, to ensure that it's always correct even if there's errors when checking e.g. the /XFA or /SigFlags entires, since the `fieldObjects` getter depends on it.

 - Simplify a loop in `fieldObjects`, since the object being accessed is a `Map` and those have built-in iteration support.

 - Use a higher logging level for errors in the `formInfo` getter, and include the actual error message, since that'd have helped with fixing PR 12479 a lot quicker.

 - Update the JSDoc comment in `src/display/api.js` to list the return values correctly, and also slightly extend/improve the description.
This commit is contained in:
Jonas Jenwald 2020-10-16 12:20:44 +02:00
parent b710fbcb00
commit 3351d3476d
3 changed files with 22 additions and 24 deletions

View File

@ -708,20 +708,23 @@ class PDFDocument {
}
get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false, fields: null };
const formInfo = { hasFields: false, hasAcroForm: false, hasXfa: false };
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "formInfo", formInfo);
}
try {
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
formInfo.hasFields = hasFields; // Used by the `fieldObjects` getter.
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa =
formInfo.hasXfa =
(Array.isArray(xfa) && xfa.length > 0) ||
(isStream(xfa) && !xfa.isEmpty);
formInfo.hasXfa = hasXfa;
// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only document signatures.
@ -730,20 +733,15 @@ class PDFDocument {
// store (invisible) document signatures. This can be detected using
// the first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasOnlyDocumentSignatures =
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
if (hasFields) {
formInfo.fields = fields;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch form information.");
warn(`Cannot fetch form information: "${ex}".`);
}
return shadow(this, "formInfo", formInfo);
}
@ -976,19 +974,18 @@ class PDFDocument {
}
get fieldObjects() {
const formInfo = this.formInfo;
if (!formInfo.fields) {
if (!this.formInfo.hasFields) {
return shadow(this, "fieldObjects", Promise.resolve(null));
}
const allFields = Object.create(null);
const fieldPromises = new Map();
for (const fieldRef of formInfo.fields) {
for (const fieldRef of this.catalog.acroForm.get("Fields")) {
this._collectFieldObjects("", fieldRef, fieldPromises);
}
const allPromises = [];
for (const [name, promises] of fieldPromises.entries()) {
for (const [name, promises] of fieldPromises) {
allPromises.push(
Promise.all(promises).then(fields => {
fields = fields.filter(field => field !== null);

View File

@ -878,8 +878,9 @@ class PDFDocumentProxy {
}
/**
* @returns {Promise<Array<Object>>} A promise that is resolved with an
* {Array<Object>} containing field data for the JS sandbox.
* @returns {Promise<Array<Object> | null>} A promise that is resolved with an
* {Array<Object>} containing /AcroForm field data for the JS sandbox,
* or `null` when no field data is present in the PDF file.
*/
getFieldObjects() {
return this._transport.getFieldObjects();

View File

@ -64,7 +64,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
fields: null,
hasFields: false,
});
});
@ -77,7 +77,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
fields: null,
hasFields: false,
});
acroForm.set("XFA", ["foo", "bar"]);
@ -85,7 +85,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
fields: null,
hasFields: false,
});
acroForm.set("XFA", new StringStream(""));
@ -93,7 +93,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
fields: null,
hasFields: false,
});
acroForm.set("XFA", new StringStream("non-empty"));
@ -101,7 +101,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
fields: null,
hasFields: false,
});
});
@ -114,7 +114,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
fields: null,
hasFields: false,
});
acroForm.set("Fields", ["foo", "bar"]);
@ -122,7 +122,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
fields: ["foo", "bar"],
hasFields: true,
});
// If the first bit of the `SigFlags` entry is set and the `Fields` array
@ -133,7 +133,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
fields: ["foo", "bar"],
hasFields: true,
});
const annotationDict = new Dict();
@ -156,7 +156,7 @@ describe("document", function () {
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
fields: [kidsRef],
hasFields: true,
});
});
});