Merge pull request #12271 from timvandermeij/acroform-type-detection

Improve AcroForm/XFA form type detection
This commit is contained in:
Tim van der Meij 2020-08-26 00:18:00 +02:00 committed by GitHub
commit 4ffdbe6ec9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 260 additions and 74 deletions

View File

@ -51,7 +51,7 @@ class AnnotationFactory {
* instance. * instance.
*/ */
static create(xref, ref, pdfManager, idFactory) { static create(xref, ref, pdfManager, idFactory) {
return pdfManager.ensureDoc("acroForm").then(acroForm => { return pdfManager.ensureCatalog("acroForm").then(acroForm => {
return pdfManager.ensure(this, "_create", [ return pdfManager.ensure(this, "_create", [
xref, xref,
ref, ref,

View File

@ -552,6 +552,7 @@ class PDFDocument {
this.stream = stream; this.stream = stream;
this.xref = new XRef(stream, pdfManager); this.xref = new XRef(stream, pdfManager);
this._pagePromises = []; this._pagePromises = [];
this._version = null;
const idCounters = { const idCounters = {
font: 0, font: 0,
@ -572,42 +573,15 @@ class PDFDocument {
} }
parse(recoveryMode) { parse(recoveryMode) {
this.setup(recoveryMode); this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
const version = this.catalog.catDict.get("Version"); // The `checkHeader` method is called before this method and parses the
if (isName(version)) { // version from the header. The specification states in section 7.5.2
this.pdfFormatVersion = version.name; // that the version from the catalog, if present, should overwrite the
} // version from the header.
if (this.catalog.version) {
// Check if AcroForms are present in the document. this._version = this.catalog.version;
try {
this.acroForm = this.catalog.catDict.get("AcroForm");
if (this.acroForm) {
this.xfa = this.acroForm.get("XFA");
const fields = this.acroForm.get("Fields");
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
this.acroForm = null; // No fields and no XFA, so it's not a form.
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
this.acroForm = null;
}
// Check if a Collection dictionary is present in the document.
try {
const collection = this.catalog.catDict.get("Collection");
if (isDict(collection) && collection.size > 0) {
this.collection = collection;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection dictionary.");
} }
} }
@ -693,9 +667,9 @@ class PDFDocument {
} }
version += String.fromCharCode(ch); version += String.fromCharCode(ch);
} }
if (!this.pdfFormatVersion) { if (!this._version) {
// Remove the "%PDF-" prefix. // Remove the "%PDF-" prefix.
this.pdfFormatVersion = version.substring(5); this._version = version.substring(5);
} }
} }
@ -703,17 +677,75 @@ class PDFDocument {
this.xref.setStartXRef(this.startXRef); this.xref.setStartXRef(this.startXRef);
} }
setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
}
get numPages() { get numPages() {
const linearization = this.linearization; const linearization = this.linearization;
const num = linearization ? linearization.numPages : this.catalog.numPages; const num = linearization ? linearization.numPages : this.catalog.numPages;
return shadow(this, "numPages", num); return shadow(this, "numPages", num);
} }
/**
* @private
*/
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
const RECURSION_LIMIT = 10;
return fields.every(field => {
field = this.xref.fetchIfRef(field);
if (field.has("Kids")) {
if (++recursionDepth > RECURSION_LIMIT) {
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
return false;
}
return this._hasOnlyDocumentSignatures(
field.get("Kids"),
recursionDepth
);
}
const isSignature = isName(field.get("FT"), "Sig");
const rectangle = field.get("Rect");
const isInvisible =
Array.isArray(rectangle) && rectangle.every(value => value === 0);
return isSignature && isInvisible;
});
}
get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false };
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "formInfo", formInfo);
}
try {
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa =
(Array.isArray(xfa) && xfa.length > 0) ||
(isStream(xfa) && !xfa.isEmpty);
formInfo.hasXfa = hasXfa;
// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only document signatures.
// This second check is required for files that don't actually contain
// AcroForm data (only XFA data), but that use the `Fields` entry to
// store (invisible) document signatures. This can be detected using
// the first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasOnlyDocumentSignatures =
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch form information.");
}
return shadow(this, "formInfo", formInfo);
}
get documentInfo() { get documentInfo() {
const DocumentInfoValidators = { const DocumentInfoValidators = {
Title: isString, Title: isString,
@ -727,7 +759,7 @@ class PDFDocument {
Trapped: isName, Trapped: isName,
}; };
let version = this.pdfFormatVersion; let version = this._version;
if ( if (
typeof version !== "string" || typeof version !== "string" ||
!PDF_HEADER_VERSION_REGEXP.test(version) !PDF_HEADER_VERSION_REGEXP.test(version)
@ -739,9 +771,9 @@ class PDFDocument {
const docInfo = { const docInfo = {
PDFFormatVersion: version, PDFFormatVersion: version,
IsLinearized: !!this.linearization, IsLinearized: !!this.linearization,
IsAcroFormPresent: !!this.acroForm, IsAcroFormPresent: this.formInfo.hasAcroForm,
IsXFAPresent: !!this.xfa, IsXFAPresent: this.formInfo.hasXfa,
IsCollectionPresent: !!this.collection, IsCollectionPresent: !!this.catalog.collection,
}; };
let infoDict; let infoDict;

View File

@ -65,8 +65,8 @@ class Catalog {
this.pdfManager = pdfManager; this.pdfManager = pdfManager;
this.xref = xref; this.xref = xref;
this.catDict = xref.getCatalogObj(); this._catDict = xref.getCatalogObj();
if (!isDict(this.catDict)) { if (!isDict(this._catDict)) {
throw new FormatError("Catalog object is not a dictionary."); throw new FormatError("Catalog object is not a dictionary.");
} }
@ -76,8 +76,48 @@ class Catalog {
this.pageKidsCountCache = new RefSetCache(); this.pageKidsCountCache = new RefSetCache();
} }
get version() {
const version = this._catDict.get("Version");
if (!isName(version)) {
return shadow(this, "version", null);
}
return shadow(this, "version", version.name);
}
get collection() {
let collection = null;
try {
const obj = this._catDict.get("Collection");
if (isDict(obj) && obj.size > 0) {
collection = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection entry; assuming no collection is present.");
}
return shadow(this, "collection", collection);
}
get acroForm() {
let acroForm = null;
try {
const obj = this._catDict.get("AcroForm");
if (isDict(obj) && obj.size > 0) {
acroForm = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no forms are present.");
}
return shadow(this, "acroForm", acroForm);
}
get metadata() { get metadata() {
const streamRef = this.catDict.getRaw("Metadata"); const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) { if (!isRef(streamRef)) {
return shadow(this, "metadata", null); return shadow(this, "metadata", null);
} }
@ -112,7 +152,7 @@ class Catalog {
} }
get toplevelPagesDict() { get toplevelPagesDict() {
const pagesObj = this.catDict.get("Pages"); const pagesObj = this._catDict.get("Pages");
if (!isDict(pagesObj)) { if (!isDict(pagesObj)) {
throw new FormatError("Invalid top-level pages dictionary."); throw new FormatError("Invalid top-level pages dictionary.");
} }
@ -136,7 +176,7 @@ class Catalog {
* @private * @private
*/ */
_readDocumentOutline() { _readDocumentOutline() {
let obj = this.catDict.get("Outlines"); let obj = this._catDict.get("Outlines");
if (!isDict(obj)) { if (!isDict(obj)) {
return null; return null;
} }
@ -257,7 +297,7 @@ class Catalog {
get optionalContentConfig() { get optionalContentConfig() {
let config = null; let config = null;
try { try {
const properties = this.catDict.get("OCProperties"); const properties = this._catDict.get("OCProperties");
if (!properties) { if (!properties) {
return shadow(this, "optionalContentConfig", null); return shadow(this, "optionalContentConfig", null);
} }
@ -370,12 +410,12 @@ class Catalog {
* @private * @private
*/ */
_readDests() { _readDests() {
const obj = this.catDict.get("Names"); const obj = this._catDict.get("Names");
if (obj && obj.has("Dests")) { if (obj && obj.has("Dests")) {
return new NameTree(obj.getRaw("Dests"), this.xref); return new NameTree(obj.getRaw("Dests"), this.xref);
} else if (this.catDict.has("Dests")) { } else if (this._catDict.has("Dests")) {
// Simple destination dictionary. // Simple destination dictionary.
return this.catDict.get("Dests"); return this._catDict.get("Dests");
} }
return undefined; return undefined;
} }
@ -397,7 +437,7 @@ class Catalog {
* @private * @private
*/ */
_readPageLabels() { _readPageLabels() {
const obj = this.catDict.getRaw("PageLabels"); const obj = this._catDict.getRaw("PageLabels");
if (!obj) { if (!obj) {
return null; return null;
} }
@ -497,7 +537,7 @@ class Catalog {
} }
get pageLayout() { get pageLayout() {
const obj = this.catDict.get("PageLayout"); const obj = this._catDict.get("PageLayout");
// Purposely use a non-standard default value, rather than 'SinglePage', to // Purposely use a non-standard default value, rather than 'SinglePage', to
// allow differentiating between `undefined` and /SinglePage since that does // allow differentiating between `undefined` and /SinglePage since that does
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader. // affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
@ -518,7 +558,7 @@ class Catalog {
} }
get pageMode() { get pageMode() {
const obj = this.catDict.get("PageMode"); const obj = this._catDict.get("PageMode");
let pageMode = "UseNone"; // Default value. let pageMode = "UseNone"; // Default value.
if (isName(obj)) { if (isName(obj)) {
@ -556,7 +596,7 @@ class Catalog {
NumCopies: Number.isInteger, NumCopies: Number.isInteger,
}; };
const obj = this.catDict.get("ViewerPreferences"); const obj = this._catDict.get("ViewerPreferences");
let prefs = null; let prefs = null;
if (isDict(obj)) { if (isDict(obj)) {
@ -681,7 +721,7 @@ class Catalog {
* NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below. * NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
*/ */
get openAction() { get openAction() {
const obj = this.catDict.get("OpenAction"); const obj = this._catDict.get("OpenAction");
let openAction = null; let openAction = null;
if (isDict(obj)) { if (isDict(obj)) {
@ -714,7 +754,7 @@ class Catalog {
} }
get attachments() { get attachments() {
const obj = this.catDict.get("Names"); const obj = this._catDict.get("Names");
let attachments = null; let attachments = null;
if (obj && obj.has("EmbeddedFiles")) { if (obj && obj.has("EmbeddedFiles")) {
@ -732,7 +772,7 @@ class Catalog {
} }
get javaScript() { get javaScript() {
const obj = this.catDict.get("Names"); const obj = this._catDict.get("Names");
let javaScript = null; let javaScript = null;
function appendIfJavaScriptDict(jsDict) { function appendIfJavaScriptDict(jsDict) {
@ -768,7 +808,7 @@ class Catalog {
} }
// Append OpenAction "JavaScript" actions to the JavaScript array. // Append OpenAction "JavaScript" actions to the JavaScript array.
const openAction = this.catDict.get("OpenAction"); const openAction = this._catDict.get("OpenAction");
if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) { if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
appendIfJavaScriptDict(openAction); appendIfJavaScriptDict(openAction);
} }
@ -813,7 +853,7 @@ class Catalog {
getPageDict(pageIndex) { getPageDict(pageIndex) {
const capability = createPromiseCapability(); const capability = createPromiseCapability();
const nodesToVisit = [this.catDict.getRaw("Pages")]; const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet(); const visitedNodes = new RefSet();
const xref = this.xref, const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache; pageKidsCountCache = this.pageKidsCountCache;

View File

@ -41,7 +41,9 @@ describe("annotation", function () {
constructor(params) { constructor(params) {
this.docBaseUrl = params.docBaseUrl || null; this.docBaseUrl = params.docBaseUrl || null;
this.pdfDocument = { this.pdfDocument = {
catalog: {
acroForm: new Dict(), acroForm: new Dict(),
},
}; };
} }
@ -56,8 +58,8 @@ describe("annotation", function () {
}); });
} }
ensureDoc(prop, args) { ensureCatalog(prop, args) {
return this.ensure(this.pdfDocument, prop, args); return this.ensure(this.pdfDocument.catalog, prop, args);
} }
} }

View File

@ -13,7 +13,10 @@
* limitations under the License. * limitations under the License.
*/ */
import { createIdFactory } from "./test_utils.js"; import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref } from "../../src/core/primitives.js";
import { PDFDocument } from "../../src/core/document.js";
import { StringStream } from "../../src/core/stream.js";
describe("document", function () { describe("document", function () {
describe("Page", function () { describe("Page", function () {
@ -40,4 +43,111 @@ describe("document", function () {
expect(idFactory1.getDocId()).toEqual("g_d0"); expect(idFactory1.getDocId()).toEqual("g_d0");
}); });
}); });
describe("PDFDocument", function () {
const pdfManager = {
get docId() {
return "d0";
},
};
const stream = new StringStream("Dummy_PDF_data");
function getDocument(acroForm) {
const pdfDocument = new PDFDocument(pdfManager, stream);
pdfDocument.catalog = { acroForm };
return pdfDocument;
}
it("should get form info when no form data is present", function () {
const pdfDocument = getDocument(null);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
it("should get form info when XFA is present", function () {
const acroForm = new Dict();
// The `XFA` entry can only be a non-empty array or stream.
acroForm.set("XFA", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
acroForm.set("XFA", new StringStream(""));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", new StringStream("non-empty"));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
});
it("should get form info when AcroForm is present", function () {
const acroForm = new Dict();
// The `Fields` entry can only be a non-empty array.
acroForm.set("Fields", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("Fields", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
// If the first bit of the `SigFlags` entry is set and the `Fields` array
// only contains document signatures, then there is no AcroForm data.
acroForm.set("Fields", ["foo", "bar"]);
acroForm.set("SigFlags", 2);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
const annotationDict = new Dict();
annotationDict.set("FT", Name.get("Sig"));
annotationDict.set("Rect", [0, 0, 0, 0]);
const annotationRef = Ref.get(11, 0);
const kidsDict = new Dict();
kidsDict.set("Kids", [annotationRef]);
const kidsRef = Ref.get(10, 0);
pdfDocument.xref = new XRefMock([
{ ref: annotationRef, data: annotationDict },
{ ref: kidsRef, data: kidsDict },
]);
acroForm.set("Fields", [kidsRef]);
acroForm.set("SigFlags", 3);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
});
}); });

View File

@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
this.setTitle(contentDispositionFilename); this.setTitle(contentDispositionFilename);
} }
if (info.IsXFAPresent) { if (info.IsXFAPresent && !info.IsAcroFormPresent) {
console.warn("Warning: XFA is not supported"); console.warn("Warning: XFA is not supported");
this._delayedFallback(UNSUPPORTED_FEATURES.forms); this._delayedFallback(UNSUPPORTED_FEATURES.forms);
} else if ( } else if (
info.IsAcroFormPresent && (info.IsAcroFormPresent || info.IsXFAPresent) &&
!this.pdfViewer.renderInteractiveForms !this.pdfViewer.renderInteractiveForms
) { ) {
console.warn("Warning: AcroForm support is not enabled"); console.warn("Warning: Interactive form support is not enabled");
this._delayedFallback(UNSUPPORTED_FEATURES.forms); this._delayedFallback(UNSUPPORTED_FEATURES.forms);
} }
@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
}); });
} }
let formType = null; let formType = null;
if (info.IsAcroFormPresent) { if (info.IsXFAPresent) {
formType = info.IsXFAPresent ? "xfa" : "acroform"; formType = "xfa";
} else if (info.IsAcroFormPresent) {
formType = "acroform";
} }
this.externalServices.reportTelemetry({ this.externalServices.reportTelemetry({
type: "documentInfo", type: "documentInfo",