[Acroform] Use the full path to find the node in the XFA datasets where to store the value

I noticed several 'Path not found' errors because of a field called #subform[2]. From the XFA specs, the hash is used for a class of elements in the template tree. When we're looking for a node in the datasets tree, it doesn't make sense to search for a class. Hence the path element starting with a hash are just skipped.
2023-02-22 22:08:21 +01:00 · 2023-02-22 22:08:21 +01:00 · 3a21423386
commit 3a21423386
parent e676c9388d
8 changed files with 135 additions and 5 deletions
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@ -1125,7 +1125,12 @@ class Annotation {
      }

      if (loopDict.has("T")) {
-        fieldName.unshift(stringToPDFString(loopDict.get("T")));
+        const t = stringToPDFString(loopDict.get("T"));
+        if (!t.startsWith("#")) {
+          // If it starts with a # then it's a class which is not a concept for
+          // datasets elements (https://www.pdfa.org/norm-refs/XFA-3_3.pdf#page=96).
+          fieldName.unshift(t);
+        }
      }
    }
    return fieldName.join(".");
@ -1860,7 +1865,7 @@ class WidgetAnnotation extends Annotation {
    }

    const xfa = {
-      path: stringToPDFString(dict.get("T") || ""),
+      path: this.data.fieldName,
      value,
    };

@ -2787,7 +2792,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
    }

    const xfa = {
-      path: stringToPDFString(dict.get("T") || ""),
+      path: this.data.fieldName,
      value: value ? this.data.exportValue : "",
    };

@ -2850,7 +2855,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
    }

    const xfa = {
-      path: stringToPDFString(dict.get("T") || ""),
+      path: this.data.fieldName,
      value: value ? this.data.buttonValue : "",
    };

--- a/src/core/worker.js
+++ b/src/core/worker.js
@ -831,6 +831,13 @@ class WorkerMessageHandler {
      setupDoc(docParams);
      docParams = null; // we don't need docParams anymore -- saving memory.
    });
+
+    if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
+      handler.on("GetXFADatasets", function (data) {
+        return pdfManager.ensureDoc("xfaDatasets");
+      });
+    }
+
    return workerHandlerName;
  }

--- a/src/core/writer.js
+++ b/src/core/writer.js
@ -139,7 +139,12 @@ function writeXFADataForAcroform(str, newRefs) {
    if (!path) {
      continue;
    }
-    const node = xml.documentElement.searchNode(parseXFAPath(path), 0);
+    const nodePath = parseXFAPath(path);
+    let node = xml.documentElement.searchNode(nodePath, 0);
+    if (!node && nodePath.length > 1) {
+      // If we're lucky the last element in the path will identify the node.
+      node = xml.documentElement.searchNode([nodePath.at(-1)], 0);
+    }
    if (node) {
      if (Array.isArray(value)) {
        node.childNodes = value.map(val => new SimpleDOMNode("value", val));
--- a/src/display/api.js
+++ b/src/display/api.js
@ -782,6 +782,15 @@ class PDFDocumentProxy {
  constructor(pdfInfo, transport) {
    this._pdfInfo = pdfInfo;
    this._transport = transport;
+
+    if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
+      // For testing purposes.
+      Object.defineProperty(this, "getXFADatasets", {
+        value: () => {
+          return this._transport.getXFADatasets();
+        },
+      });
+    }
  }

  /**
@ -2349,6 +2358,15 @@ class WorkerTransport {
    this.downloadInfoCapability = createPromiseCapability();

    this.setupMessageHandler();
+
+    if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
+      // For testing purposes.
+      Object.defineProperty(this, "getXFADatasets", {
+        value: () => {
+          return this.messageHandler.sendWithPromise("GetXFADatasets", null);
+        },
+      });
+    }
  }

  #cacheSimpleMethod(name, data = null) {
--- a/test/pdfs/f1040_2022.pdf.link
+++ b/test/pdfs/f1040_2022.pdf.link
@ -0,0 +1,2 @@
+https://web.archive.org/web/20230218015355/https://www.irs.gov/pub/irs-pdf/f1040.pdf
+
--- a/test/pdfs/issue16081.pdf.link
+++ b/test/pdfs/issue16081.pdf.link
@ -0,0 +1,2 @@
+https://github.com/mozilla/pdf.js/files/10808293/pptc153.pdf
+
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@ -7364,5 +7364,19 @@
      "rounds": 1,
      "type": "eq",
      "annotations": true
+   },
+   {
+      "id": "issue16081",
+      "file": "pdfs/issue16081.pdf",
+      "md5": "e890d55c9527a116431ebef3efb960ff",
+      "link": true,
+      "type": "other"
+   },
+   {
+      "id": "f1040_2022.pdf",
+      "file": "pdfs/f1040_2022.pdf",
+      "md5": "1eec7137e471a3d68a20855a04293b5b",
+      "link": true,
+      "type": "other"
   }
 ]
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@ -82,6 +82,21 @@ describe("api", function () {
      .join("");
  }

+  function getNamedNodeInXML(node, path) {
+    for (const component of path.split(".")) {
+      if (!node.childNodes) {
+        break;
+      }
+      for (const child of node.childNodes) {
+        if (child.nodeName === component) {
+          node = child;
+          break;
+        }
+      }
+    }
+    return node;
+  }
+
  describe("getDocument", function () {
    it("creates pdf doc from URL-string", async function () {
      const urlStr = TEST_PDFS_PATH + basicApiFileName;
@ -1903,6 +1918,68 @@ describe("api", function () {
      await loadingTask.destroy();
    });

+    it("write a value in an annotation, save the pdf and check the value in xfa datasets (1)", async function () {
+      if (isNodeJS) {
+        pending("Linked test-cases are not supported in Node.js.");
+      }
+
+      let loadingTask = getDocument(buildGetDocumentParams("issue16081.pdf"));
+      let pdfDoc = await loadingTask.promise;
+      const value = "Hello World";
+
+      pdfDoc.annotationStorage.setValue("2055R", { value });
+
+      const data = await pdfDoc.saveDocument();
+      await loadingTask.destroy();
+
+      loadingTask = getDocument(data);
+      pdfDoc = await loadingTask.promise;
+      const datasets = await pdfDoc.getXFADatasets();
+
+      const surName = getNamedNodeInXML(
+        datasets.node,
+        "xfa:data.PPTC_153.Page1.PersonalInformation.TitleAndNameInformation.PersonalInfo.Surname.#text"
+      );
+      expect(surName.nodeValue).toEqual(value);
+
+      await loadingTask.destroy();
+    });
+
+    it("write a value in an annotation, save the pdf and check the value in xfa datasets (2)", async function () {
+      if (isNodeJS) {
+        pending("Linked test-cases are not supported in Node.js.");
+      }
+
+      // In this file the path to the fields are wrong but the last path element
+      // is unique so we can guess what the node is.
+      let loadingTask = getDocument(buildGetDocumentParams("f1040_2022.pdf"));
+      let pdfDoc = await loadingTask.promise;
+
+      pdfDoc.annotationStorage.setValue("1573R", { value: "hello" });
+      pdfDoc.annotationStorage.setValue("1577R", { value: "world" });
+
+      const data = await pdfDoc.saveDocument();
+      await loadingTask.destroy();
+
+      loadingTask = getDocument(data);
+      pdfDoc = await loadingTask.promise;
+      const datasets = await pdfDoc.getXFADatasets();
+
+      const firstName = getNamedNodeInXML(
+        datasets.node,
+        "xfa:data.topmostSubform.f1_02.#text"
+      );
+      expect(firstName.nodeValue).toEqual("hello");
+
+      const lastName = getNamedNodeInXML(
+        datasets.node,
+        "xfa:data.topmostSubform.f1_06.#text"
+      );
+      expect(lastName.nodeValue).toEqual("world");
+
+      await loadingTask.destroy();
+    });
+
    describe("Cross-origin", function () {
      let loadingTask;
      function _checkCanLoad(expectSuccess, filename, options) {
				`@ -0,0 +1,2 @@`
				`https://web.archive.org/web/20230218015355/https://www.irs.gov/pub/irs-pdf/f1040.pdf`
				`@ -0,0 +1,2 @@`
				`https://github.com/mozilla/pdf.js/files/10808293/pptc153.pdf`