From 935d95b4629ce70363d6a4cf67951ae77fae7905 Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Sat, 22 Aug 2020 22:21:38 +0200
Subject: [PATCH 1/6] Move the version logic from the document to the catalog

The `Version` entry is part of the catalog, not of the document, so its
logic should be placed there instead. The document should look in the
catalog to fetch it, and not have knowledge of `catDict`, which is a
member internal to the catalog.

Moreover, make the version member private on the document instance. It's
only used internally and was also never intended to be public. For users
it's exposed by the `getMetadata` API endpoint as `PDFFormatVersion`.

Finally, clarify how the version from the header and the version from
the catalog are treated using a comment.
---
 src/core/document.js | 16 ++++++++++------
 src/core/obj.js      |  8 ++++++++
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/core/document.js b/src/core/document.js
index 404b046da..3076490dc 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -552,6 +552,7 @@ class PDFDocument {
     this.stream = stream;
     this.xref = new XRef(stream, pdfManager);
     this._pagePromises = [];
+    this._version = null;
 
     const idCounters = {
       font: 0,
@@ -574,9 +575,12 @@ class PDFDocument {
   parse(recoveryMode) {
     this.setup(recoveryMode);
 
-    const version = this.catalog.catDict.get("Version");
-    if (isName(version)) {
-      this.pdfFormatVersion = version.name;
+    // The `checkHeader` method is called before this method and parses the
+    // version from the header. The specification states in section 7.5.2
+    // that the version from the catalog, if present, should overwrite the
+    // version from the header.
+    if (this.catalog.version) {
+      this._version = this.catalog.version;
     }
 
     // Check if AcroForms are present in the document.
@@ -693,9 +697,9 @@ class PDFDocument {
       }
       version += String.fromCharCode(ch);
     }
-    if (!this.pdfFormatVersion) {
+    if (!this._version) {
       // Remove the "%PDF-" prefix.
-      this.pdfFormatVersion = version.substring(5);
+      this._version = version.substring(5);
     }
   }
 
@@ -727,7 +731,7 @@ class PDFDocument {
       Trapped: isName,
     };
 
-    let version = this.pdfFormatVersion;
+    let version = this._version;
     if (
       typeof version !== "string" ||
       !PDF_HEADER_VERSION_REGEXP.test(version)
diff --git a/src/core/obj.js b/src/core/obj.js
index 3eb437fbe..47dcd902b 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -76,6 +76,14 @@ class Catalog {
     this.pageKidsCountCache = new RefSetCache();
   }
 
+  get version() {
+    const version = this.catDict.get("Version");
+    if (!isName(version)) {
+      return shadow(this, "version", null);
+    }
+    return shadow(this, "version", version.name);
+  }
+
   get metadata() {
     const streamRef = this.catDict.getRaw("Metadata");
     if (!isRef(streamRef)) {

From b41a2f4d5a7e6c1acdac49c92869a12be18dd45c Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Sat, 22 Aug 2020 22:47:15 +0200
Subject: [PATCH 2/6] Move the collection logic from the document to the
 catalog

The `Collection` entry is part of the catalog, not of the document, so
its logic should be placed there instead. The document should look in the
catalog to fetch it, and not have knowledge of `catDict`, which is a
member internal to the catalog.

Moreover, remove the collection member from the document instance. It's
only used internally and was also never intended to be public. For users
it's exposed by the `getMetadata` API endpoint as `IsCollectionPresent`.
Moving this out of the `parse` function makes sure that the getter is
only executed if the document information is actually requested
(potentially making initial parsing a tiny bit faster).
---
 src/core/document.js | 15 +--------------
 src/core/obj.js      | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/core/document.js b/src/core/document.js
index 3076490dc..4513ae71d 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -600,19 +600,6 @@ class PDFDocument {
       info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
       this.acroForm = null;
     }
-
-    // Check if a Collection dictionary is present in the document.
-    try {
-      const collection = this.catalog.catDict.get("Collection");
-      if (isDict(collection) && collection.size > 0) {
-        this.collection = collection;
-      }
-    } catch (ex) {
-      if (ex instanceof MissingDataException) {
-        throw ex;
-      }
-      info("Cannot fetch Collection dictionary.");
-    }
   }
 
   get linearization() {
@@ -745,7 +732,7 @@ class PDFDocument {
       IsLinearized: !!this.linearization,
       IsAcroFormPresent: !!this.acroForm,
       IsXFAPresent: !!this.xfa,
-      IsCollectionPresent: !!this.collection,
+      IsCollectionPresent: !!this.catalog.collection,
     };
 
     let infoDict;
diff --git a/src/core/obj.js b/src/core/obj.js
index 47dcd902b..eaf885a7f 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -84,6 +84,22 @@ class Catalog {
     return shadow(this, "version", version.name);
   }
 
+  get collection() {
+    let collection = null;
+    try {
+      const obj = this.catDict.get("Collection");
+      if (isDict(obj) && obj.size > 0) {
+        collection = obj;
+      }
+    } catch (ex) {
+      if (ex instanceof MissingDataException) {
+        throw ex;
+      }
+      info("Cannot fetch Collection entry; assuming no collection is present.");
+    }
+    return shadow(this, "collection", collection);
+  }
+
   get metadata() {
     const streamRef = this.catDict.getRaw("Metadata");
     if (!isRef(streamRef)) {

From f20f0bcc78c43dac8b100192419b6b3111bd5696 Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Sat, 22 Aug 2020 23:33:19 +0200
Subject: [PATCH 3/6] Move the AcroForm logic from the document to the catalog

The `AcroForm` entry is part of the catalog, not of the document, so its
logic should be placed there instead. The document should look in the
catalog to fetch it, and not have knowledge of `catDict`, which is a
member internal to the catalog.

Moreover, make the AcroForm member private on the document instance. It's
only used internally and was also never intended to be public. For users
it's exposed by the `getMetadata` API endpoint as `IsAcroFormPresent`.
Only a boolean is exposed, so we now also only store the boolean on the
document instance.

Finally, the annotation code needs access to the full AcroForm
dictionary, so it's updated to fetch the data from the catalog instead
of the document that now only holds the boolean.
---
 src/core/annotation.js       |  2 +-
 src/core/document.js         | 22 +++++++---------------
 src/core/obj.js              | 16 ++++++++++++++++
 test/unit/annotation_spec.js |  8 +++++---
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/core/annotation.js b/src/core/annotation.js
index 5fe3d8654..8c1b8eaa3 100644
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@@ -51,7 +51,7 @@ class AnnotationFactory {
    *   instance.
    */
   static create(xref, ref, pdfManager, idFactory) {
-    return pdfManager.ensureDoc("acroForm").then(acroForm => {
+    return pdfManager.ensureCatalog("acroForm").then(acroForm => {
       return pdfManager.ensure(this, "_create", [
         xref,
         ref,
diff --git a/src/core/document.js b/src/core/document.js
index 4513ae71d..98ef34638 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -584,21 +584,13 @@ class PDFDocument {
     }
 
     // Check if AcroForms are present in the document.
-    try {
-      this.acroForm = this.catalog.catDict.get("AcroForm");
-      if (this.acroForm) {
-        this.xfa = this.acroForm.get("XFA");
-        const fields = this.acroForm.get("Fields");
-        if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
-          this.acroForm = null; // No fields and no XFA, so it's not a form.
-        }
+    this._hasAcroForm = !!this.catalog.acroForm;
+    if (this._hasAcroForm) {
+      this.xfa = this.catalog.acroForm.get("XFA");
+      const fields = this.catalog.acroForm.get("Fields");
+      if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
+        this._hasAcroForm = false; // No fields and no XFA, so it's not a form.
       }
-    } catch (ex) {
-      if (ex instanceof MissingDataException) {
-        throw ex;
-      }
-      info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
-      this.acroForm = null;
     }
   }
 
@@ -730,7 +722,7 @@ class PDFDocument {
     const docInfo = {
       PDFFormatVersion: version,
       IsLinearized: !!this.linearization,
-      IsAcroFormPresent: !!this.acroForm,
+      IsAcroFormPresent: this._hasAcroForm,
       IsXFAPresent: !!this.xfa,
       IsCollectionPresent: !!this.catalog.collection,
     };
diff --git a/src/core/obj.js b/src/core/obj.js
index eaf885a7f..e7d8303a0 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -100,6 +100,22 @@ class Catalog {
     return shadow(this, "collection", collection);
   }
 
+  get acroForm() {
+    let acroForm = null;
+    try {
+      const obj = this.catDict.get("AcroForm");
+      if (isDict(obj) && obj.size > 0) {
+        acroForm = obj;
+      }
+    } catch (ex) {
+      if (ex instanceof MissingDataException) {
+        throw ex;
+      }
+      info("Cannot fetch AcroForm entry; assuming no forms are present.");
+    }
+    return shadow(this, "acroForm", acroForm);
+  }
+
   get metadata() {
     const streamRef = this.catDict.getRaw("Metadata");
     if (!isRef(streamRef)) {
diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js
index f948a926d..d7fef39bf 100644
--- a/test/unit/annotation_spec.js
+++ b/test/unit/annotation_spec.js
@@ -41,7 +41,9 @@ describe("annotation", function () {
     constructor(params) {
       this.docBaseUrl = params.docBaseUrl || null;
       this.pdfDocument = {
-        acroForm: new Dict(),
+        catalog: {
+          acroForm: new Dict(),
+        },
       };
     }
 
@@ -56,8 +58,8 @@ describe("annotation", function () {
       });
     }
 
-    ensureDoc(prop, args) {
-      return this.ensure(this.pdfDocument, prop, args);
+    ensureCatalog(prop, args) {
+      return this.ensure(this.pdfDocument.catalog, prop, args);
     }
   }
 

From f0bf62ff54199745d902546cbb73ca1922e7daf1 Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Sat, 22 Aug 2020 23:38:50 +0200
Subject: [PATCH 4/6] Mark the `catDict` member as private in the `Catalog`
 class

Not only is `catDict` never accessed anymore outside of this file, it
should also never happen since it's internal to the catalog. If data
from it is needed elsewhere, the catalog should provide a getter for it
that can do basic data integrity checks and abstract away any
unnecessary details.
---
 src/core/obj.js | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/core/obj.js b/src/core/obj.js
index e7d8303a0..f4b9d2e85 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -65,8 +65,8 @@ class Catalog {
     this.pdfManager = pdfManager;
     this.xref = xref;
 
-    this.catDict = xref.getCatalogObj();
-    if (!isDict(this.catDict)) {
+    this._catDict = xref.getCatalogObj();
+    if (!isDict(this._catDict)) {
       throw new FormatError("Catalog object is not a dictionary.");
     }
 
@@ -77,7 +77,7 @@ class Catalog {
   }
 
   get version() {
-    const version = this.catDict.get("Version");
+    const version = this._catDict.get("Version");
     if (!isName(version)) {
       return shadow(this, "version", null);
     }
@@ -87,7 +87,7 @@ class Catalog {
   get collection() {
     let collection = null;
     try {
-      const obj = this.catDict.get("Collection");
+      const obj = this._catDict.get("Collection");
       if (isDict(obj) && obj.size > 0) {
         collection = obj;
       }
@@ -103,7 +103,7 @@ class Catalog {
   get acroForm() {
     let acroForm = null;
     try {
-      const obj = this.catDict.get("AcroForm");
+      const obj = this._catDict.get("AcroForm");
       if (isDict(obj) && obj.size > 0) {
         acroForm = obj;
       }
@@ -117,7 +117,7 @@ class Catalog {
   }
 
   get metadata() {
-    const streamRef = this.catDict.getRaw("Metadata");
+    const streamRef = this._catDict.getRaw("Metadata");
     if (!isRef(streamRef)) {
       return shadow(this, "metadata", null);
     }
@@ -152,7 +152,7 @@ class Catalog {
   }
 
   get toplevelPagesDict() {
-    const pagesObj = this.catDict.get("Pages");
+    const pagesObj = this._catDict.get("Pages");
     if (!isDict(pagesObj)) {
       throw new FormatError("Invalid top-level pages dictionary.");
     }
@@ -176,7 +176,7 @@ class Catalog {
    * @private
    */
   _readDocumentOutline() {
-    let obj = this.catDict.get("Outlines");
+    let obj = this._catDict.get("Outlines");
     if (!isDict(obj)) {
       return null;
     }
@@ -297,7 +297,7 @@ class Catalog {
   get optionalContentConfig() {
     let config = null;
     try {
-      const properties = this.catDict.get("OCProperties");
+      const properties = this._catDict.get("OCProperties");
       if (!properties) {
         return shadow(this, "optionalContentConfig", null);
       }
@@ -410,12 +410,12 @@ class Catalog {
    * @private
    */
   _readDests() {
-    const obj = this.catDict.get("Names");
+    const obj = this._catDict.get("Names");
     if (obj && obj.has("Dests")) {
       return new NameTree(obj.getRaw("Dests"), this.xref);
-    } else if (this.catDict.has("Dests")) {
+    } else if (this._catDict.has("Dests")) {
       // Simple destination dictionary.
-      return this.catDict.get("Dests");
+      return this._catDict.get("Dests");
     }
     return undefined;
   }
@@ -437,7 +437,7 @@ class Catalog {
    * @private
    */
   _readPageLabels() {
-    const obj = this.catDict.getRaw("PageLabels");
+    const obj = this._catDict.getRaw("PageLabels");
     if (!obj) {
       return null;
     }
@@ -537,7 +537,7 @@ class Catalog {
   }
 
   get pageLayout() {
-    const obj = this.catDict.get("PageLayout");
+    const obj = this._catDict.get("PageLayout");
     // Purposely use a non-standard default value, rather than 'SinglePage', to
     // allow differentiating between `undefined` and /SinglePage since that does
     // affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
@@ -558,7 +558,7 @@ class Catalog {
   }
 
   get pageMode() {
-    const obj = this.catDict.get("PageMode");
+    const obj = this._catDict.get("PageMode");
     let pageMode = "UseNone"; // Default value.
 
     if (isName(obj)) {
@@ -596,7 +596,7 @@ class Catalog {
       NumCopies: Number.isInteger,
     };
 
-    const obj = this.catDict.get("ViewerPreferences");
+    const obj = this._catDict.get("ViewerPreferences");
     let prefs = null;
 
     if (isDict(obj)) {
@@ -721,7 +721,7 @@ class Catalog {
    * NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
    */
   get openAction() {
-    const obj = this.catDict.get("OpenAction");
+    const obj = this._catDict.get("OpenAction");
     let openAction = null;
 
     if (isDict(obj)) {
@@ -754,7 +754,7 @@ class Catalog {
   }
 
   get attachments() {
-    const obj = this.catDict.get("Names");
+    const obj = this._catDict.get("Names");
     let attachments = null;
 
     if (obj && obj.has("EmbeddedFiles")) {
@@ -772,7 +772,7 @@ class Catalog {
   }
 
   get javaScript() {
-    const obj = this.catDict.get("Names");
+    const obj = this._catDict.get("Names");
 
     let javaScript = null;
     function appendIfJavaScriptDict(jsDict) {
@@ -808,7 +808,7 @@ class Catalog {
     }
 
     // Append OpenAction "JavaScript" actions to the JavaScript array.
-    const openAction = this.catDict.get("OpenAction");
+    const openAction = this._catDict.get("OpenAction");
     if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
       appendIfJavaScriptDict(openAction);
     }
@@ -853,7 +853,7 @@ class Catalog {
 
   getPageDict(pageIndex) {
     const capability = createPromiseCapability();
-    const nodesToVisit = [this.catDict.getRaw("Pages")];
+    const nodesToVisit = [this._catDict.getRaw("Pages")];
     const visitedNodes = new RefSet();
     const xref = this.xref,
       pageKidsCountCache = this.pageKidsCountCache;

From 280207c7402342ad7a1334dc40a077967f1080ef Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Sun, 23 Aug 2020 14:04:49 +0200
Subject: [PATCH 5/6] Redo the form type detection logic and include unit tests

Good form type detection is important to get reliable telemetry and to
only show the fallback bar if a form cannot be filled out by the user.

PDF.js only supports AcroForm data, so XFA data is explicitly unsupported
(tracked in issue #2373). However, the previous form type detection
couldn't separate AcroForm and XFA well enough, causing form type
telemetry to be incorrect sometimes and the fallback bar to be shown for
forms that could in fact be filled out by the user.

The solution in this commit is found by studying the specification and
the form documents that are available to us. In a nutshell the rules are:

- There is XFA data if the `XFA` entry is a non-empty array or stream.
- There is AcroForm data if the `Fields` entry is a non-empty array and
  it doesn't consist of only document signatures.

The document signatures part was not handled in the old code, causing a
document with only XFA data to also be marked as having AcroForm data.
Moreover, the old code didn't check all the data types.

Now that AcroForm and XFA can be distinguished, the viewer is configured
to only show the fallback bar for documents that only have XFA data. If
a document also has AcroForm data, the viewer can use that to render the
form. We have not found documents where the XFA data was necessary in
that case.

Finally, we include unit tests to ensure that all cases are covered and
move the form type detection out of the `parse` function so that it's
only executed if the document information is actually requested
(potentially making initial parsing a tiny bit faster).
---
 src/core/document.js       |  77 +++++++++++++++++++++----
 test/unit/document_spec.js | 112 ++++++++++++++++++++++++++++++++++++-
 web/app.js                 |  12 ++--
 3 files changed, 183 insertions(+), 18 deletions(-)

diff --git a/src/core/document.js b/src/core/document.js
index 98ef34638..c970399da 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -582,16 +582,6 @@ class PDFDocument {
     if (this.catalog.version) {
       this._version = this.catalog.version;
     }
-
-    // Check if AcroForms are present in the document.
-    this._hasAcroForm = !!this.catalog.acroForm;
-    if (this._hasAcroForm) {
-      this.xfa = this.catalog.acroForm.get("XFA");
-      const fields = this.catalog.acroForm.get("Fields");
-      if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
-        this._hasAcroForm = false; // No fields and no XFA, so it's not a form.
-      }
-    }
   }
 
   get linearization() {
@@ -697,6 +687,69 @@ class PDFDocument {
     return shadow(this, "numPages", num);
   }
 
+  /**
+   * @private
+   */
+  _hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
+    const RECURSION_LIMIT = 10;
+    return fields.every(field => {
+      field = this.xref.fetchIfRef(field);
+      if (field.has("Kids")) {
+        if (++recursionDepth > RECURSION_LIMIT) {
+          warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
+          return false;
+        }
+        return this._hasOnlyDocumentSignatures(
+          field.get("Kids"),
+          recursionDepth
+        );
+      }
+      const isSignature = isName(field.get("FT"), "Sig");
+      const rectangle = field.get("Rect");
+      const isInvisible =
+        Array.isArray(rectangle) && rectangle.every(value => value === 0);
+      return isSignature && isInvisible;
+    });
+  }
+
+  get formInfo() {
+    const formInfo = { hasAcroForm: false, hasXfa: false };
+    const acroForm = this.catalog.acroForm;
+    if (!acroForm) {
+      return shadow(this, "formInfo", formInfo);
+    }
+
+    try {
+      // The document contains XFA data if the `XFA` entry is a non-empty
+      // array or stream.
+      const xfa = acroForm.get("XFA");
+      const hasXfa =
+        (Array.isArray(xfa) && xfa.length > 0) ||
+        (isStream(xfa) && !xfa.isEmpty);
+      formInfo.hasXfa = hasXfa;
+
+      // The document contains AcroForm data if the `Fields` entry is a
+      // non-empty array and it doesn't consist of only document signatures.
+      // This second check is required for files that don't actually contain
+      // AcroForm data (only XFA data), but that use the `Fields` entry to
+      // store (invisible) document signatures. This can be detected using
+      // the first bit of the `SigFlags` integer (see Table 219 in the
+      // specification).
+      const fields = acroForm.get("Fields");
+      const hasFields = Array.isArray(fields) && fields.length > 0;
+      const sigFlags = acroForm.get("SigFlags");
+      const hasOnlyDocumentSignatures =
+        !!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
+      formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
+    } catch (ex) {
+      if (ex instanceof MissingDataException) {
+        throw ex;
+      }
+      info("Cannot fetch form information.");
+    }
+    return shadow(this, "formInfo", formInfo);
+  }
+
   get documentInfo() {
     const DocumentInfoValidators = {
       Title: isString,
@@ -722,8 +775,8 @@ class PDFDocument {
     const docInfo = {
       PDFFormatVersion: version,
       IsLinearized: !!this.linearization,
-      IsAcroFormPresent: this._hasAcroForm,
-      IsXFAPresent: !!this.xfa,
+      IsAcroFormPresent: this.formInfo.hasAcroForm,
+      IsXFAPresent: this.formInfo.hasXfa,
       IsCollectionPresent: !!this.catalog.collection,
     };
 
diff --git a/test/unit/document_spec.js b/test/unit/document_spec.js
index 503a3ce95..0586898d7 100644
--- a/test/unit/document_spec.js
+++ b/test/unit/document_spec.js
@@ -13,7 +13,10 @@
  * limitations under the License.
  */
 
-import { createIdFactory } from "./test_utils.js";
+import { createIdFactory, XRefMock } from "./test_utils.js";
+import { Dict, Name, Ref } from "../../src/core/primitives.js";
+import { PDFDocument } from "../../src/core/document.js";
+import { StringStream } from "../../src/core/stream.js";
 
 describe("document", function () {
   describe("Page", function () {
@@ -40,4 +43,111 @@ describe("document", function () {
       expect(idFactory1.getDocId()).toEqual("g_d0");
     });
   });
+
+  describe("PDFDocument", function () {
+    const pdfManager = {
+      get docId() {
+        return "d0";
+      },
+    };
+    const stream = new StringStream("Dummy_PDF_data");
+
+    function getDocument(acroForm) {
+      const pdfDocument = new PDFDocument(pdfManager, stream);
+      pdfDocument.catalog = { acroForm };
+      return pdfDocument;
+    }
+
+    it("should get form info when no form data is present", function () {
+      const pdfDocument = getDocument(null);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: false,
+      });
+    });
+
+    it("should get form info when XFA is present", function () {
+      const acroForm = new Dict();
+
+      // The `XFA` entry can only be a non-empty array or stream.
+      acroForm.set("XFA", []);
+      let pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: false,
+      });
+
+      acroForm.set("XFA", ["foo", "bar"]);
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: true,
+      });
+
+      acroForm.set("XFA", new StringStream(""));
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: false,
+      });
+
+      acroForm.set("XFA", new StringStream("non-empty"));
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: true,
+      });
+    });
+
+    it("should get form info when AcroForm is present", function () {
+      const acroForm = new Dict();
+
+      // The `Fields` entry can only be a non-empty array.
+      acroForm.set("Fields", []);
+      let pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: false,
+      });
+
+      acroForm.set("Fields", ["foo", "bar"]);
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: true,
+        hasXfa: false,
+      });
+
+      // If the first bit of the `SigFlags` entry is set and the `Fields` array
+      // only contains document signatures, then there is no AcroForm data.
+      acroForm.set("Fields", ["foo", "bar"]);
+      acroForm.set("SigFlags", 2);
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: true,
+        hasXfa: false,
+      });
+
+      const annotationDict = new Dict();
+      annotationDict.set("FT", Name.get("Sig"));
+      annotationDict.set("Rect", [0, 0, 0, 0]);
+      const annotationRef = Ref.get(11, 0);
+
+      const kidsDict = new Dict();
+      kidsDict.set("Kids", [annotationRef]);
+      const kidsRef = Ref.get(10, 0);
+
+      pdfDocument.xref = new XRefMock([
+        { ref: annotationRef, data: annotationDict },
+        { ref: kidsRef, data: kidsDict },
+      ]);
+
+      acroForm.set("Fields", [kidsRef]);
+      acroForm.set("SigFlags", 3);
+      pdfDocument = getDocument(acroForm);
+      expect(pdfDocument.formInfo).toEqual({
+        hasAcroForm: false,
+        hasXfa: false,
+      });
+    });
+  });
 });
diff --git a/web/app.js b/web/app.js
index 9bbe26deb..1ba89133d 100644
--- a/web/app.js
+++ b/web/app.js
@@ -1426,14 +1426,14 @@ const PDFViewerApplication = {
       this.setTitle(contentDispositionFilename);
     }
 
-    if (info.IsXFAPresent) {
+    if (info.IsXFAPresent && !info.IsAcroFormPresent) {
       console.warn("Warning: XFA is not supported");
       this._delayedFallback(UNSUPPORTED_FEATURES.forms);
     } else if (
-      info.IsAcroFormPresent &&
+      (info.IsAcroFormPresent || info.IsXFAPresent) &&
       !this.pdfViewer.renderInteractiveForms
     ) {
-      console.warn("Warning: AcroForm support is not enabled");
+      console.warn("Warning: Interactive form support is not enabled");
       this._delayedFallback(UNSUPPORTED_FEATURES.forms);
     }
 
@@ -1454,8 +1454,10 @@ const PDFViewerApplication = {
       });
     }
     let formType = null;
-    if (info.IsAcroFormPresent) {
-      formType = info.IsXFAPresent ? "xfa" : "acroform";
+    if (info.IsXFAPresent) {
+      formType = "xfa";
+    } else if (info.IsAcroFormPresent) {
+      formType = "acroform";
     }
     this.externalServices.reportTelemetry({
       type: "documentInfo",

From 0f229d537f96ed2e6a945959a5e7b4a2865d6c16 Mon Sep 17 00:00:00 2001
From: Tim van der Meij <timvandermeij@gmail.com>
Date: Tue, 25 Aug 2020 23:22:21 +0200
Subject: [PATCH 6/6] Inline the `setup` method in the `parse` method in
 `src/core/document.js`

Now that the `parse` method is simplified we can inline the `setup`
method in the `parse` method since it's only two lines of code. This
avoids some indirection.
---
 src/core/document.js | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/core/document.js b/src/core/document.js
index c970399da..cd459f6f7 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -573,7 +573,8 @@ class PDFDocument {
   }
 
   parse(recoveryMode) {
-    this.setup(recoveryMode);
+    this.xref.parse(recoveryMode);
+    this.catalog = new Catalog(this.pdfManager, this.xref);
 
     // The `checkHeader` method is called before this method and parses the
     // version from the header. The specification states in section 7.5.2
@@ -676,11 +677,6 @@ class PDFDocument {
     this.xref.setStartXRef(this.startXRef);
   }
 
-  setup(recoveryMode) {
-    this.xref.parse(recoveryMode);
-    this.catalog = new Catalog(this.pdfManager, this.xref);
-  }
-
   get numPages() {
     const linearization = this.linearization;
     const num = linearization ? linearization.numPages : this.catalog.numPages;