Redo the form type detection logic and include unit tests
Good form type detection is important to get reliable telemetry and to only show the fallback bar if a form cannot be filled out by the user. PDF.js only supports AcroForm data, so XFA data is explicitly unsupported (tracked in issue #2373). However, the previous form type detection couldn't separate AcroForm and XFA well enough, causing form type telemetry to be incorrect sometimes and the fallback bar to be shown for forms that could in fact be filled out by the user. The solution in this commit is found by studying the specification and the form documents that are available to us. In a nutshell the rules are: - There is XFA data if the `XFA` entry is a non-empty array or stream. - There is AcroForm data if the `Fields` entry is a non-empty array and it doesn't consist of only document signatures. The document signatures part was not handled in the old code, causing a document with only XFA data to also be marked as having AcroForm data. Moreover, the old code didn't check all the data types. Now that AcroForm and XFA can be distinguished, the viewer is configured to only show the fallback bar for documents that only have XFA data. If a document also has AcroForm data, the viewer can use that to render the form. We have not found documents where the XFA data was necessary in that case. Finally, we include unit tests to ensure that all cases are covered and move the form type detection out of the `parse` function so that it's only executed if the document information is actually requested (potentially making initial parsing a tiny bit faster).
This commit is contained in:
parent
f0bf62ff54
commit
280207c740
@ -582,16 +582,6 @@ class PDFDocument {
|
||||
if (this.catalog.version) {
|
||||
this._version = this.catalog.version;
|
||||
}
|
||||
|
||||
// Check if AcroForms are present in the document.
|
||||
this._hasAcroForm = !!this.catalog.acroForm;
|
||||
if (this._hasAcroForm) {
|
||||
this.xfa = this.catalog.acroForm.get("XFA");
|
||||
const fields = this.catalog.acroForm.get("Fields");
|
||||
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
|
||||
this._hasAcroForm = false; // No fields and no XFA, so it's not a form.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get linearization() {
|
||||
@ -697,6 +687,69 @@ class PDFDocument {
|
||||
return shadow(this, "numPages", num);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
|
||||
const RECURSION_LIMIT = 10;
|
||||
return fields.every(field => {
|
||||
field = this.xref.fetchIfRef(field);
|
||||
if (field.has("Kids")) {
|
||||
if (++recursionDepth > RECURSION_LIMIT) {
|
||||
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
|
||||
return false;
|
||||
}
|
||||
return this._hasOnlyDocumentSignatures(
|
||||
field.get("Kids"),
|
||||
recursionDepth
|
||||
);
|
||||
}
|
||||
const isSignature = isName(field.get("FT"), "Sig");
|
||||
const rectangle = field.get("Rect");
|
||||
const isInvisible =
|
||||
Array.isArray(rectangle) && rectangle.every(value => value === 0);
|
||||
return isSignature && isInvisible;
|
||||
});
|
||||
}
|
||||
|
||||
get formInfo() {
|
||||
const formInfo = { hasAcroForm: false, hasXfa: false };
|
||||
const acroForm = this.catalog.acroForm;
|
||||
if (!acroForm) {
|
||||
return shadow(this, "formInfo", formInfo);
|
||||
}
|
||||
|
||||
try {
|
||||
// The document contains XFA data if the `XFA` entry is a non-empty
|
||||
// array or stream.
|
||||
const xfa = acroForm.get("XFA");
|
||||
const hasXfa =
|
||||
(Array.isArray(xfa) && xfa.length > 0) ||
|
||||
(isStream(xfa) && !xfa.isEmpty);
|
||||
formInfo.hasXfa = hasXfa;
|
||||
|
||||
// The document contains AcroForm data if the `Fields` entry is a
|
||||
// non-empty array and it doesn't consist of only document signatures.
|
||||
// This second check is required for files that don't actually contain
|
||||
// AcroForm data (only XFA data), but that use the `Fields` entry to
|
||||
// store (invisible) document signatures. This can be detected using
|
||||
// the first bit of the `SigFlags` integer (see Table 219 in the
|
||||
// specification).
|
||||
const fields = acroForm.get("Fields");
|
||||
const hasFields = Array.isArray(fields) && fields.length > 0;
|
||||
const sigFlags = acroForm.get("SigFlags");
|
||||
const hasOnlyDocumentSignatures =
|
||||
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
|
||||
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch form information.");
|
||||
}
|
||||
return shadow(this, "formInfo", formInfo);
|
||||
}
|
||||
|
||||
get documentInfo() {
|
||||
const DocumentInfoValidators = {
|
||||
Title: isString,
|
||||
@ -722,8 +775,8 @@ class PDFDocument {
|
||||
const docInfo = {
|
||||
PDFFormatVersion: version,
|
||||
IsLinearized: !!this.linearization,
|
||||
IsAcroFormPresent: this._hasAcroForm,
|
||||
IsXFAPresent: !!this.xfa,
|
||||
IsAcroFormPresent: this.formInfo.hasAcroForm,
|
||||
IsXFAPresent: this.formInfo.hasXfa,
|
||||
IsCollectionPresent: !!this.catalog.collection,
|
||||
};
|
||||
|
||||
|
@ -13,7 +13,10 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { createIdFactory } from "./test_utils.js";
|
||||
import { createIdFactory, XRefMock } from "./test_utils.js";
|
||||
import { Dict, Name, Ref } from "../../src/core/primitives.js";
|
||||
import { PDFDocument } from "../../src/core/document.js";
|
||||
import { StringStream } from "../../src/core/stream.js";
|
||||
|
||||
describe("document", function () {
|
||||
describe("Page", function () {
|
||||
@ -40,4 +43,111 @@ describe("document", function () {
|
||||
expect(idFactory1.getDocId()).toEqual("g_d0");
|
||||
});
|
||||
});
|
||||
|
||||
describe("PDFDocument", function () {
|
||||
const pdfManager = {
|
||||
get docId() {
|
||||
return "d0";
|
||||
},
|
||||
};
|
||||
const stream = new StringStream("Dummy_PDF_data");
|
||||
|
||||
function getDocument(acroForm) {
|
||||
const pdfDocument = new PDFDocument(pdfManager, stream);
|
||||
pdfDocument.catalog = { acroForm };
|
||||
return pdfDocument;
|
||||
}
|
||||
|
||||
it("should get form info when no form data is present", function () {
|
||||
const pdfDocument = getDocument(null);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("should get form info when XFA is present", function () {
|
||||
const acroForm = new Dict();
|
||||
|
||||
// The `XFA` entry can only be a non-empty array or stream.
|
||||
acroForm.set("XFA", []);
|
||||
let pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", ["foo", "bar"]);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: true,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", new StringStream(""));
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", new StringStream("non-empty"));
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("should get form info when AcroForm is present", function () {
|
||||
const acroForm = new Dict();
|
||||
|
||||
// The `Fields` entry can only be a non-empty array.
|
||||
acroForm.set("Fields", []);
|
||||
let pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("Fields", ["foo", "bar"]);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: true,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
// If the first bit of the `SigFlags` entry is set and the `Fields` array
|
||||
// only contains document signatures, then there is no AcroForm data.
|
||||
acroForm.set("Fields", ["foo", "bar"]);
|
||||
acroForm.set("SigFlags", 2);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: true,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
const annotationDict = new Dict();
|
||||
annotationDict.set("FT", Name.get("Sig"));
|
||||
annotationDict.set("Rect", [0, 0, 0, 0]);
|
||||
const annotationRef = Ref.get(11, 0);
|
||||
|
||||
const kidsDict = new Dict();
|
||||
kidsDict.set("Kids", [annotationRef]);
|
||||
const kidsRef = Ref.get(10, 0);
|
||||
|
||||
pdfDocument.xref = new XRefMock([
|
||||
{ ref: annotationRef, data: annotationDict },
|
||||
{ ref: kidsRef, data: kidsDict },
|
||||
]);
|
||||
|
||||
acroForm.set("Fields", [kidsRef]);
|
||||
acroForm.set("SigFlags", 3);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
12
web/app.js
12
web/app.js
@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
|
||||
this.setTitle(contentDispositionFilename);
|
||||
}
|
||||
|
||||
if (info.IsXFAPresent) {
|
||||
if (info.IsXFAPresent && !info.IsAcroFormPresent) {
|
||||
console.warn("Warning: XFA is not supported");
|
||||
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
|
||||
} else if (
|
||||
info.IsAcroFormPresent &&
|
||||
(info.IsAcroFormPresent || info.IsXFAPresent) &&
|
||||
!this.pdfViewer.renderInteractiveForms
|
||||
) {
|
||||
console.warn("Warning: AcroForm support is not enabled");
|
||||
console.warn("Warning: Interactive form support is not enabled");
|
||||
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
|
||||
}
|
||||
|
||||
@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
|
||||
});
|
||||
}
|
||||
let formType = null;
|
||||
if (info.IsAcroFormPresent) {
|
||||
formType = info.IsXFAPresent ? "xfa" : "acroform";
|
||||
if (info.IsXFAPresent) {
|
||||
formType = "xfa";
|
||||
} else if (info.IsAcroFormPresent) {
|
||||
formType = "acroform";
|
||||
}
|
||||
this.externalServices.reportTelemetry({
|
||||
type: "documentInfo",
|
||||
|
Loading…
x
Reference in New Issue
Block a user