Merge pull request #12271 from timvandermeij/acroform-type-detection

Improve AcroForm/XFA form type detection
This commit is contained in:
Tim van der Meij 2020-08-26 00:18:00 +02:00 committed by GitHub
commit 4ffdbe6ec9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 260 additions and 74 deletions

View File

@ -51,7 +51,7 @@ class AnnotationFactory {
* instance.
*/
static create(xref, ref, pdfManager, idFactory) {
return pdfManager.ensureDoc("acroForm").then(acroForm => {
return pdfManager.ensureCatalog("acroForm").then(acroForm => {
return pdfManager.ensure(this, "_create", [
xref,
ref,

View File

@ -552,6 +552,7 @@ class PDFDocument {
this.stream = stream;
this.xref = new XRef(stream, pdfManager);
this._pagePromises = [];
this._version = null;
const idCounters = {
font: 0,
@ -572,42 +573,15 @@ class PDFDocument {
}
parse(recoveryMode) {
this.setup(recoveryMode);
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
const version = this.catalog.catDict.get("Version");
if (isName(version)) {
this.pdfFormatVersion = version.name;
}
// Check if AcroForms are present in the document.
try {
this.acroForm = this.catalog.catDict.get("AcroForm");
if (this.acroForm) {
this.xfa = this.acroForm.get("XFA");
const fields = this.acroForm.get("Fields");
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
this.acroForm = null; // No fields and no XFA, so it's not a form.
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
this.acroForm = null;
}
// Check if a Collection dictionary is present in the document.
try {
const collection = this.catalog.catDict.get("Collection");
if (isDict(collection) && collection.size > 0) {
this.collection = collection;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection dictionary.");
// The `checkHeader` method is called before this method and parses the
// version from the header. The specification states in section 7.5.2
// that the version from the catalog, if present, should overwrite the
// version from the header.
if (this.catalog.version) {
this._version = this.catalog.version;
}
}
@ -693,9 +667,9 @@ class PDFDocument {
}
version += String.fromCharCode(ch);
}
if (!this.pdfFormatVersion) {
if (!this._version) {
// Remove the "%PDF-" prefix.
this.pdfFormatVersion = version.substring(5);
this._version = version.substring(5);
}
}
@ -703,17 +677,75 @@ class PDFDocument {
this.xref.setStartXRef(this.startXRef);
}
setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
}
get numPages() {
const linearization = this.linearization;
const num = linearization ? linearization.numPages : this.catalog.numPages;
return shadow(this, "numPages", num);
}
/**
* @private
*/
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
const RECURSION_LIMIT = 10;
return fields.every(field => {
field = this.xref.fetchIfRef(field);
if (field.has("Kids")) {
if (++recursionDepth > RECURSION_LIMIT) {
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
return false;
}
return this._hasOnlyDocumentSignatures(
field.get("Kids"),
recursionDepth
);
}
const isSignature = isName(field.get("FT"), "Sig");
const rectangle = field.get("Rect");
const isInvisible =
Array.isArray(rectangle) && rectangle.every(value => value === 0);
return isSignature && isInvisible;
});
}
get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false };
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "formInfo", formInfo);
}
try {
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa =
(Array.isArray(xfa) && xfa.length > 0) ||
(isStream(xfa) && !xfa.isEmpty);
formInfo.hasXfa = hasXfa;
// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only document signatures.
// This second check is required for files that don't actually contain
// AcroForm data (only XFA data), but that use the `Fields` entry to
// store (invisible) document signatures. This can be detected using
// the first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasOnlyDocumentSignatures =
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch form information.");
}
return shadow(this, "formInfo", formInfo);
}
get documentInfo() {
const DocumentInfoValidators = {
Title: isString,
@ -727,7 +759,7 @@ class PDFDocument {
Trapped: isName,
};
let version = this.pdfFormatVersion;
let version = this._version;
if (
typeof version !== "string" ||
!PDF_HEADER_VERSION_REGEXP.test(version)
@ -739,9 +771,9 @@ class PDFDocument {
const docInfo = {
PDFFormatVersion: version,
IsLinearized: !!this.linearization,
IsAcroFormPresent: !!this.acroForm,
IsXFAPresent: !!this.xfa,
IsCollectionPresent: !!this.collection,
IsAcroFormPresent: this.formInfo.hasAcroForm,
IsXFAPresent: this.formInfo.hasXfa,
IsCollectionPresent: !!this.catalog.collection,
};
let infoDict;

View File

@ -65,8 +65,8 @@ class Catalog {
this.pdfManager = pdfManager;
this.xref = xref;
this.catDict = xref.getCatalogObj();
if (!isDict(this.catDict)) {
this._catDict = xref.getCatalogObj();
if (!isDict(this._catDict)) {
throw new FormatError("Catalog object is not a dictionary.");
}
@ -76,8 +76,48 @@ class Catalog {
this.pageKidsCountCache = new RefSetCache();
}
get version() {
const version = this._catDict.get("Version");
if (!isName(version)) {
return shadow(this, "version", null);
}
return shadow(this, "version", version.name);
}
get collection() {
let collection = null;
try {
const obj = this._catDict.get("Collection");
if (isDict(obj) && obj.size > 0) {
collection = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection entry; assuming no collection is present.");
}
return shadow(this, "collection", collection);
}
get acroForm() {
let acroForm = null;
try {
const obj = this._catDict.get("AcroForm");
if (isDict(obj) && obj.size > 0) {
acroForm = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no forms are present.");
}
return shadow(this, "acroForm", acroForm);
}
get metadata() {
const streamRef = this.catDict.getRaw("Metadata");
const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) {
return shadow(this, "metadata", null);
}
@ -112,7 +152,7 @@ class Catalog {
}
get toplevelPagesDict() {
const pagesObj = this.catDict.get("Pages");
const pagesObj = this._catDict.get("Pages");
if (!isDict(pagesObj)) {
throw new FormatError("Invalid top-level pages dictionary.");
}
@ -136,7 +176,7 @@ class Catalog {
* @private
*/
_readDocumentOutline() {
let obj = this.catDict.get("Outlines");
let obj = this._catDict.get("Outlines");
if (!isDict(obj)) {
return null;
}
@ -257,7 +297,7 @@ class Catalog {
get optionalContentConfig() {
let config = null;
try {
const properties = this.catDict.get("OCProperties");
const properties = this._catDict.get("OCProperties");
if (!properties) {
return shadow(this, "optionalContentConfig", null);
}
@ -370,12 +410,12 @@ class Catalog {
* @private
*/
_readDests() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
if (obj && obj.has("Dests")) {
return new NameTree(obj.getRaw("Dests"), this.xref);
} else if (this.catDict.has("Dests")) {
} else if (this._catDict.has("Dests")) {
// Simple destination dictionary.
return this.catDict.get("Dests");
return this._catDict.get("Dests");
}
return undefined;
}
@ -397,7 +437,7 @@ class Catalog {
* @private
*/
_readPageLabels() {
const obj = this.catDict.getRaw("PageLabels");
const obj = this._catDict.getRaw("PageLabels");
if (!obj) {
return null;
}
@ -497,7 +537,7 @@ class Catalog {
}
get pageLayout() {
const obj = this.catDict.get("PageLayout");
const obj = this._catDict.get("PageLayout");
// Purposely use a non-standard default value, rather than 'SinglePage', to
// allow differentiating between `undefined` and /SinglePage since that does
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
@ -518,7 +558,7 @@ class Catalog {
}
get pageMode() {
const obj = this.catDict.get("PageMode");
const obj = this._catDict.get("PageMode");
let pageMode = "UseNone"; // Default value.
if (isName(obj)) {
@ -556,7 +596,7 @@ class Catalog {
NumCopies: Number.isInteger,
};
const obj = this.catDict.get("ViewerPreferences");
const obj = this._catDict.get("ViewerPreferences");
let prefs = null;
if (isDict(obj)) {
@ -681,7 +721,7 @@ class Catalog {
* NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
*/
get openAction() {
const obj = this.catDict.get("OpenAction");
const obj = this._catDict.get("OpenAction");
let openAction = null;
if (isDict(obj)) {
@ -714,7 +754,7 @@ class Catalog {
}
get attachments() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
let attachments = null;
if (obj && obj.has("EmbeddedFiles")) {
@ -732,7 +772,7 @@ class Catalog {
}
get javaScript() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
let javaScript = null;
function appendIfJavaScriptDict(jsDict) {
@ -768,7 +808,7 @@ class Catalog {
}
// Append OpenAction "JavaScript" actions to the JavaScript array.
const openAction = this.catDict.get("OpenAction");
const openAction = this._catDict.get("OpenAction");
if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
appendIfJavaScriptDict(openAction);
}
@ -813,7 +853,7 @@ class Catalog {
getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.catDict.getRaw("Pages")];
const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet();
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;

View File

@ -41,7 +41,9 @@ describe("annotation", function () {
constructor(params) {
this.docBaseUrl = params.docBaseUrl || null;
this.pdfDocument = {
acroForm: new Dict(),
catalog: {
acroForm: new Dict(),
},
};
}
@ -56,8 +58,8 @@ describe("annotation", function () {
});
}
ensureDoc(prop, args) {
return this.ensure(this.pdfDocument, prop, args);
ensureCatalog(prop, args) {
return this.ensure(this.pdfDocument.catalog, prop, args);
}
}

View File

@ -13,7 +13,10 @@
* limitations under the License.
*/
import { createIdFactory } from "./test_utils.js";
import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref } from "../../src/core/primitives.js";
import { PDFDocument } from "../../src/core/document.js";
import { StringStream } from "../../src/core/stream.js";
describe("document", function () {
describe("Page", function () {
@ -40,4 +43,111 @@ describe("document", function () {
expect(idFactory1.getDocId()).toEqual("g_d0");
});
});
describe("PDFDocument", function () {
const pdfManager = {
get docId() {
return "d0";
},
};
const stream = new StringStream("Dummy_PDF_data");
function getDocument(acroForm) {
const pdfDocument = new PDFDocument(pdfManager, stream);
pdfDocument.catalog = { acroForm };
return pdfDocument;
}
it("should get form info when no form data is present", function () {
const pdfDocument = getDocument(null);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
it("should get form info when XFA is present", function () {
const acroForm = new Dict();
// The `XFA` entry can only be a non-empty array or stream.
acroForm.set("XFA", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
acroForm.set("XFA", new StringStream(""));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", new StringStream("non-empty"));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
});
it("should get form info when AcroForm is present", function () {
const acroForm = new Dict();
// The `Fields` entry can only be a non-empty array.
acroForm.set("Fields", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("Fields", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
// If the first bit of the `SigFlags` entry is set and the `Fields` array
// only contains document signatures, then there is no AcroForm data.
acroForm.set("Fields", ["foo", "bar"]);
acroForm.set("SigFlags", 2);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
const annotationDict = new Dict();
annotationDict.set("FT", Name.get("Sig"));
annotationDict.set("Rect", [0, 0, 0, 0]);
const annotationRef = Ref.get(11, 0);
const kidsDict = new Dict();
kidsDict.set("Kids", [annotationRef]);
const kidsRef = Ref.get(10, 0);
pdfDocument.xref = new XRefMock([
{ ref: annotationRef, data: annotationDict },
{ ref: kidsRef, data: kidsDict },
]);
acroForm.set("Fields", [kidsRef]);
acroForm.set("SigFlags", 3);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
});
});

View File

@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
this.setTitle(contentDispositionFilename);
}
if (info.IsXFAPresent) {
if (info.IsXFAPresent && !info.IsAcroFormPresent) {
console.warn("Warning: XFA is not supported");
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
} else if (
info.IsAcroFormPresent &&
(info.IsAcroFormPresent || info.IsXFAPresent) &&
!this.pdfViewer.renderInteractiveForms
) {
console.warn("Warning: AcroForm support is not enabled");
console.warn("Warning: Interactive form support is not enabled");
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
}
@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
});
}
let formType = null;
if (info.IsAcroFormPresent) {
formType = info.IsXFAPresent ? "xfa" : "acroform";
if (info.IsXFAPresent) {
formType = "xfa";
} else if (info.IsAcroFormPresent) {
formType = "acroform";
}
this.externalServices.reportTelemetry({
type: "documentInfo",