Merge pull request #12271 from timvandermeij/acroform-type-detection
Improve AcroForm/XFA form type detection
This commit is contained in:
commit
4ffdbe6ec9
@ -51,7 +51,7 @@ class AnnotationFactory {
|
||||
* instance.
|
||||
*/
|
||||
static create(xref, ref, pdfManager, idFactory) {
|
||||
return pdfManager.ensureDoc("acroForm").then(acroForm => {
|
||||
return pdfManager.ensureCatalog("acroForm").then(acroForm => {
|
||||
return pdfManager.ensure(this, "_create", [
|
||||
xref,
|
||||
ref,
|
||||
|
@ -552,6 +552,7 @@ class PDFDocument {
|
||||
this.stream = stream;
|
||||
this.xref = new XRef(stream, pdfManager);
|
||||
this._pagePromises = [];
|
||||
this._version = null;
|
||||
|
||||
const idCounters = {
|
||||
font: 0,
|
||||
@ -572,42 +573,15 @@ class PDFDocument {
|
||||
}
|
||||
|
||||
parse(recoveryMode) {
|
||||
this.setup(recoveryMode);
|
||||
this.xref.parse(recoveryMode);
|
||||
this.catalog = new Catalog(this.pdfManager, this.xref);
|
||||
|
||||
const version = this.catalog.catDict.get("Version");
|
||||
if (isName(version)) {
|
||||
this.pdfFormatVersion = version.name;
|
||||
}
|
||||
|
||||
// Check if AcroForms are present in the document.
|
||||
try {
|
||||
this.acroForm = this.catalog.catDict.get("AcroForm");
|
||||
if (this.acroForm) {
|
||||
this.xfa = this.acroForm.get("XFA");
|
||||
const fields = this.acroForm.get("Fields");
|
||||
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
|
||||
this.acroForm = null; // No fields and no XFA, so it's not a form.
|
||||
}
|
||||
}
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
|
||||
this.acroForm = null;
|
||||
}
|
||||
|
||||
// Check if a Collection dictionary is present in the document.
|
||||
try {
|
||||
const collection = this.catalog.catDict.get("Collection");
|
||||
if (isDict(collection) && collection.size > 0) {
|
||||
this.collection = collection;
|
||||
}
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch Collection dictionary.");
|
||||
// The `checkHeader` method is called before this method and parses the
|
||||
// version from the header. The specification states in section 7.5.2
|
||||
// that the version from the catalog, if present, should overwrite the
|
||||
// version from the header.
|
||||
if (this.catalog.version) {
|
||||
this._version = this.catalog.version;
|
||||
}
|
||||
}
|
||||
|
||||
@ -693,9 +667,9 @@ class PDFDocument {
|
||||
}
|
||||
version += String.fromCharCode(ch);
|
||||
}
|
||||
if (!this.pdfFormatVersion) {
|
||||
if (!this._version) {
|
||||
// Remove the "%PDF-" prefix.
|
||||
this.pdfFormatVersion = version.substring(5);
|
||||
this._version = version.substring(5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -703,17 +677,75 @@ class PDFDocument {
|
||||
this.xref.setStartXRef(this.startXRef);
|
||||
}
|
||||
|
||||
setup(recoveryMode) {
|
||||
this.xref.parse(recoveryMode);
|
||||
this.catalog = new Catalog(this.pdfManager, this.xref);
|
||||
}
|
||||
|
||||
get numPages() {
|
||||
const linearization = this.linearization;
|
||||
const num = linearization ? linearization.numPages : this.catalog.numPages;
|
||||
return shadow(this, "numPages", num);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
|
||||
const RECURSION_LIMIT = 10;
|
||||
return fields.every(field => {
|
||||
field = this.xref.fetchIfRef(field);
|
||||
if (field.has("Kids")) {
|
||||
if (++recursionDepth > RECURSION_LIMIT) {
|
||||
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
|
||||
return false;
|
||||
}
|
||||
return this._hasOnlyDocumentSignatures(
|
||||
field.get("Kids"),
|
||||
recursionDepth
|
||||
);
|
||||
}
|
||||
const isSignature = isName(field.get("FT"), "Sig");
|
||||
const rectangle = field.get("Rect");
|
||||
const isInvisible =
|
||||
Array.isArray(rectangle) && rectangle.every(value => value === 0);
|
||||
return isSignature && isInvisible;
|
||||
});
|
||||
}
|
||||
|
||||
get formInfo() {
|
||||
const formInfo = { hasAcroForm: false, hasXfa: false };
|
||||
const acroForm = this.catalog.acroForm;
|
||||
if (!acroForm) {
|
||||
return shadow(this, "formInfo", formInfo);
|
||||
}
|
||||
|
||||
try {
|
||||
// The document contains XFA data if the `XFA` entry is a non-empty
|
||||
// array or stream.
|
||||
const xfa = acroForm.get("XFA");
|
||||
const hasXfa =
|
||||
(Array.isArray(xfa) && xfa.length > 0) ||
|
||||
(isStream(xfa) && !xfa.isEmpty);
|
||||
formInfo.hasXfa = hasXfa;
|
||||
|
||||
// The document contains AcroForm data if the `Fields` entry is a
|
||||
// non-empty array and it doesn't consist of only document signatures.
|
||||
// This second check is required for files that don't actually contain
|
||||
// AcroForm data (only XFA data), but that use the `Fields` entry to
|
||||
// store (invisible) document signatures. This can be detected using
|
||||
// the first bit of the `SigFlags` integer (see Table 219 in the
|
||||
// specification).
|
||||
const fields = acroForm.get("Fields");
|
||||
const hasFields = Array.isArray(fields) && fields.length > 0;
|
||||
const sigFlags = acroForm.get("SigFlags");
|
||||
const hasOnlyDocumentSignatures =
|
||||
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
|
||||
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch form information.");
|
||||
}
|
||||
return shadow(this, "formInfo", formInfo);
|
||||
}
|
||||
|
||||
get documentInfo() {
|
||||
const DocumentInfoValidators = {
|
||||
Title: isString,
|
||||
@ -727,7 +759,7 @@ class PDFDocument {
|
||||
Trapped: isName,
|
||||
};
|
||||
|
||||
let version = this.pdfFormatVersion;
|
||||
let version = this._version;
|
||||
if (
|
||||
typeof version !== "string" ||
|
||||
!PDF_HEADER_VERSION_REGEXP.test(version)
|
||||
@ -739,9 +771,9 @@ class PDFDocument {
|
||||
const docInfo = {
|
||||
PDFFormatVersion: version,
|
||||
IsLinearized: !!this.linearization,
|
||||
IsAcroFormPresent: !!this.acroForm,
|
||||
IsXFAPresent: !!this.xfa,
|
||||
IsCollectionPresent: !!this.collection,
|
||||
IsAcroFormPresent: this.formInfo.hasAcroForm,
|
||||
IsXFAPresent: this.formInfo.hasXfa,
|
||||
IsCollectionPresent: !!this.catalog.collection,
|
||||
};
|
||||
|
||||
let infoDict;
|
||||
|
@ -65,8 +65,8 @@ class Catalog {
|
||||
this.pdfManager = pdfManager;
|
||||
this.xref = xref;
|
||||
|
||||
this.catDict = xref.getCatalogObj();
|
||||
if (!isDict(this.catDict)) {
|
||||
this._catDict = xref.getCatalogObj();
|
||||
if (!isDict(this._catDict)) {
|
||||
throw new FormatError("Catalog object is not a dictionary.");
|
||||
}
|
||||
|
||||
@ -76,8 +76,48 @@ class Catalog {
|
||||
this.pageKidsCountCache = new RefSetCache();
|
||||
}
|
||||
|
||||
get version() {
|
||||
const version = this._catDict.get("Version");
|
||||
if (!isName(version)) {
|
||||
return shadow(this, "version", null);
|
||||
}
|
||||
return shadow(this, "version", version.name);
|
||||
}
|
||||
|
||||
get collection() {
|
||||
let collection = null;
|
||||
try {
|
||||
const obj = this._catDict.get("Collection");
|
||||
if (isDict(obj) && obj.size > 0) {
|
||||
collection = obj;
|
||||
}
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch Collection entry; assuming no collection is present.");
|
||||
}
|
||||
return shadow(this, "collection", collection);
|
||||
}
|
||||
|
||||
get acroForm() {
|
||||
let acroForm = null;
|
||||
try {
|
||||
const obj = this._catDict.get("AcroForm");
|
||||
if (isDict(obj) && obj.size > 0) {
|
||||
acroForm = obj;
|
||||
}
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
info("Cannot fetch AcroForm entry; assuming no forms are present.");
|
||||
}
|
||||
return shadow(this, "acroForm", acroForm);
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
const streamRef = this.catDict.getRaw("Metadata");
|
||||
const streamRef = this._catDict.getRaw("Metadata");
|
||||
if (!isRef(streamRef)) {
|
||||
return shadow(this, "metadata", null);
|
||||
}
|
||||
@ -112,7 +152,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
get toplevelPagesDict() {
|
||||
const pagesObj = this.catDict.get("Pages");
|
||||
const pagesObj = this._catDict.get("Pages");
|
||||
if (!isDict(pagesObj)) {
|
||||
throw new FormatError("Invalid top-level pages dictionary.");
|
||||
}
|
||||
@ -136,7 +176,7 @@ class Catalog {
|
||||
* @private
|
||||
*/
|
||||
_readDocumentOutline() {
|
||||
let obj = this.catDict.get("Outlines");
|
||||
let obj = this._catDict.get("Outlines");
|
||||
if (!isDict(obj)) {
|
||||
return null;
|
||||
}
|
||||
@ -257,7 +297,7 @@ class Catalog {
|
||||
get optionalContentConfig() {
|
||||
let config = null;
|
||||
try {
|
||||
const properties = this.catDict.get("OCProperties");
|
||||
const properties = this._catDict.get("OCProperties");
|
||||
if (!properties) {
|
||||
return shadow(this, "optionalContentConfig", null);
|
||||
}
|
||||
@ -370,12 +410,12 @@ class Catalog {
|
||||
* @private
|
||||
*/
|
||||
_readDests() {
|
||||
const obj = this.catDict.get("Names");
|
||||
const obj = this._catDict.get("Names");
|
||||
if (obj && obj.has("Dests")) {
|
||||
return new NameTree(obj.getRaw("Dests"), this.xref);
|
||||
} else if (this.catDict.has("Dests")) {
|
||||
} else if (this._catDict.has("Dests")) {
|
||||
// Simple destination dictionary.
|
||||
return this.catDict.get("Dests");
|
||||
return this._catDict.get("Dests");
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
@ -397,7 +437,7 @@ class Catalog {
|
||||
* @private
|
||||
*/
|
||||
_readPageLabels() {
|
||||
const obj = this.catDict.getRaw("PageLabels");
|
||||
const obj = this._catDict.getRaw("PageLabels");
|
||||
if (!obj) {
|
||||
return null;
|
||||
}
|
||||
@ -497,7 +537,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
get pageLayout() {
|
||||
const obj = this.catDict.get("PageLayout");
|
||||
const obj = this._catDict.get("PageLayout");
|
||||
// Purposely use a non-standard default value, rather than 'SinglePage', to
|
||||
// allow differentiating between `undefined` and /SinglePage since that does
|
||||
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
|
||||
@ -518,7 +558,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
get pageMode() {
|
||||
const obj = this.catDict.get("PageMode");
|
||||
const obj = this._catDict.get("PageMode");
|
||||
let pageMode = "UseNone"; // Default value.
|
||||
|
||||
if (isName(obj)) {
|
||||
@ -556,7 +596,7 @@ class Catalog {
|
||||
NumCopies: Number.isInteger,
|
||||
};
|
||||
|
||||
const obj = this.catDict.get("ViewerPreferences");
|
||||
const obj = this._catDict.get("ViewerPreferences");
|
||||
let prefs = null;
|
||||
|
||||
if (isDict(obj)) {
|
||||
@ -681,7 +721,7 @@ class Catalog {
|
||||
* NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
|
||||
*/
|
||||
get openAction() {
|
||||
const obj = this.catDict.get("OpenAction");
|
||||
const obj = this._catDict.get("OpenAction");
|
||||
let openAction = null;
|
||||
|
||||
if (isDict(obj)) {
|
||||
@ -714,7 +754,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
get attachments() {
|
||||
const obj = this.catDict.get("Names");
|
||||
const obj = this._catDict.get("Names");
|
||||
let attachments = null;
|
||||
|
||||
if (obj && obj.has("EmbeddedFiles")) {
|
||||
@ -732,7 +772,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
get javaScript() {
|
||||
const obj = this.catDict.get("Names");
|
||||
const obj = this._catDict.get("Names");
|
||||
|
||||
let javaScript = null;
|
||||
function appendIfJavaScriptDict(jsDict) {
|
||||
@ -768,7 +808,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
// Append OpenAction "JavaScript" actions to the JavaScript array.
|
||||
const openAction = this.catDict.get("OpenAction");
|
||||
const openAction = this._catDict.get("OpenAction");
|
||||
if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
|
||||
appendIfJavaScriptDict(openAction);
|
||||
}
|
||||
@ -813,7 +853,7 @@ class Catalog {
|
||||
|
||||
getPageDict(pageIndex) {
|
||||
const capability = createPromiseCapability();
|
||||
const nodesToVisit = [this.catDict.getRaw("Pages")];
|
||||
const nodesToVisit = [this._catDict.getRaw("Pages")];
|
||||
const visitedNodes = new RefSet();
|
||||
const xref = this.xref,
|
||||
pageKidsCountCache = this.pageKidsCountCache;
|
||||
|
@ -41,7 +41,9 @@ describe("annotation", function () {
|
||||
constructor(params) {
|
||||
this.docBaseUrl = params.docBaseUrl || null;
|
||||
this.pdfDocument = {
|
||||
acroForm: new Dict(),
|
||||
catalog: {
|
||||
acroForm: new Dict(),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@ -56,8 +58,8 @@ describe("annotation", function () {
|
||||
});
|
||||
}
|
||||
|
||||
ensureDoc(prop, args) {
|
||||
return this.ensure(this.pdfDocument, prop, args);
|
||||
ensureCatalog(prop, args) {
|
||||
return this.ensure(this.pdfDocument.catalog, prop, args);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,10 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { createIdFactory } from "./test_utils.js";
|
||||
import { createIdFactory, XRefMock } from "./test_utils.js";
|
||||
import { Dict, Name, Ref } from "../../src/core/primitives.js";
|
||||
import { PDFDocument } from "../../src/core/document.js";
|
||||
import { StringStream } from "../../src/core/stream.js";
|
||||
|
||||
describe("document", function () {
|
||||
describe("Page", function () {
|
||||
@ -40,4 +43,111 @@ describe("document", function () {
|
||||
expect(idFactory1.getDocId()).toEqual("g_d0");
|
||||
});
|
||||
});
|
||||
|
||||
describe("PDFDocument", function () {
|
||||
const pdfManager = {
|
||||
get docId() {
|
||||
return "d0";
|
||||
},
|
||||
};
|
||||
const stream = new StringStream("Dummy_PDF_data");
|
||||
|
||||
function getDocument(acroForm) {
|
||||
const pdfDocument = new PDFDocument(pdfManager, stream);
|
||||
pdfDocument.catalog = { acroForm };
|
||||
return pdfDocument;
|
||||
}
|
||||
|
||||
it("should get form info when no form data is present", function () {
|
||||
const pdfDocument = getDocument(null);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("should get form info when XFA is present", function () {
|
||||
const acroForm = new Dict();
|
||||
|
||||
// The `XFA` entry can only be a non-empty array or stream.
|
||||
acroForm.set("XFA", []);
|
||||
let pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", ["foo", "bar"]);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: true,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", new StringStream(""));
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("XFA", new StringStream("non-empty"));
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("should get form info when AcroForm is present", function () {
|
||||
const acroForm = new Dict();
|
||||
|
||||
// The `Fields` entry can only be a non-empty array.
|
||||
acroForm.set("Fields", []);
|
||||
let pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
acroForm.set("Fields", ["foo", "bar"]);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: true,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
// If the first bit of the `SigFlags` entry is set and the `Fields` array
|
||||
// only contains document signatures, then there is no AcroForm data.
|
||||
acroForm.set("Fields", ["foo", "bar"]);
|
||||
acroForm.set("SigFlags", 2);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: true,
|
||||
hasXfa: false,
|
||||
});
|
||||
|
||||
const annotationDict = new Dict();
|
||||
annotationDict.set("FT", Name.get("Sig"));
|
||||
annotationDict.set("Rect", [0, 0, 0, 0]);
|
||||
const annotationRef = Ref.get(11, 0);
|
||||
|
||||
const kidsDict = new Dict();
|
||||
kidsDict.set("Kids", [annotationRef]);
|
||||
const kidsRef = Ref.get(10, 0);
|
||||
|
||||
pdfDocument.xref = new XRefMock([
|
||||
{ ref: annotationRef, data: annotationDict },
|
||||
{ ref: kidsRef, data: kidsDict },
|
||||
]);
|
||||
|
||||
acroForm.set("Fields", [kidsRef]);
|
||||
acroForm.set("SigFlags", 3);
|
||||
pdfDocument = getDocument(acroForm);
|
||||
expect(pdfDocument.formInfo).toEqual({
|
||||
hasAcroForm: false,
|
||||
hasXfa: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
12
web/app.js
12
web/app.js
@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
|
||||
this.setTitle(contentDispositionFilename);
|
||||
}
|
||||
|
||||
if (info.IsXFAPresent) {
|
||||
if (info.IsXFAPresent && !info.IsAcroFormPresent) {
|
||||
console.warn("Warning: XFA is not supported");
|
||||
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
|
||||
} else if (
|
||||
info.IsAcroFormPresent &&
|
||||
(info.IsAcroFormPresent || info.IsXFAPresent) &&
|
||||
!this.pdfViewer.renderInteractiveForms
|
||||
) {
|
||||
console.warn("Warning: AcroForm support is not enabled");
|
||||
console.warn("Warning: Interactive form support is not enabled");
|
||||
this._delayedFallback(UNSUPPORTED_FEATURES.forms);
|
||||
}
|
||||
|
||||
@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
|
||||
});
|
||||
}
|
||||
let formType = null;
|
||||
if (info.IsAcroFormPresent) {
|
||||
formType = info.IsXFAPresent ? "xfa" : "acroform";
|
||||
if (info.IsXFAPresent) {
|
||||
formType = "xfa";
|
||||
} else if (info.IsAcroFormPresent) {
|
||||
formType = "acroform";
|
||||
}
|
||||
this.externalServices.reportTelemetry({
|
||||
type: "documentInfo",
|
||||
|
Loading…
Reference in New Issue
Block a user