diff --git a/src/core/annotation.js b/src/core/annotation.js index 1c3490f2b..5a1bf56a0 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1125,7 +1125,12 @@ class Annotation { } if (loopDict.has("T")) { - fieldName.unshift(stringToPDFString(loopDict.get("T"))); + const t = stringToPDFString(loopDict.get("T")); + if (!t.startsWith("#")) { + // If it starts with a # then it's a class which is not a concept for + // datasets elements (https://www.pdfa.org/norm-refs/XFA-3_3.pdf#page=96). + fieldName.unshift(t); + } } } return fieldName.join("."); @@ -1860,7 +1865,7 @@ class WidgetAnnotation extends Annotation { } const xfa = { - path: stringToPDFString(dict.get("T") || ""), + path: this.data.fieldName, value, }; @@ -2787,7 +2792,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { } const xfa = { - path: stringToPDFString(dict.get("T") || ""), + path: this.data.fieldName, value: value ? this.data.exportValue : "", }; @@ -2850,7 +2855,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { } const xfa = { - path: stringToPDFString(dict.get("T") || ""), + path: this.data.fieldName, value: value ? this.data.buttonValue : "", }; diff --git a/src/core/worker.js b/src/core/worker.js index 484759f4e..13661b528 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -831,6 +831,13 @@ class WorkerMessageHandler { setupDoc(docParams); docParams = null; // we don't need docParams anymore -- saving memory. }); + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + handler.on("GetXFADatasets", function (data) { + return pdfManager.ensureDoc("xfaDatasets"); + }); + } + return workerHandlerName; } diff --git a/src/core/writer.js b/src/core/writer.js index 2fe03aaa3..2f3e629c3 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -139,7 +139,12 @@ function writeXFADataForAcroform(str, newRefs) { if (!path) { continue; } - const node = xml.documentElement.searchNode(parseXFAPath(path), 0); + const nodePath = parseXFAPath(path); + let node = xml.documentElement.searchNode(nodePath, 0); + if (!node && nodePath.length > 1) { + // If we're lucky the last element in the path will identify the node. + node = xml.documentElement.searchNode([nodePath.at(-1)], 0); + } if (node) { if (Array.isArray(value)) { node.childNodes = value.map(val => new SimpleDOMNode("value", val)); diff --git a/src/display/api.js b/src/display/api.js index e578f15fc..4d07cb27f 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -782,6 +782,15 @@ class PDFDocumentProxy { constructor(pdfInfo, transport) { this._pdfInfo = pdfInfo; this._transport = transport; + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + // For testing purposes. + Object.defineProperty(this, "getXFADatasets", { + value: () => { + return this._transport.getXFADatasets(); + }, + }); + } } /** @@ -2349,6 +2358,15 @@ class WorkerTransport { this.downloadInfoCapability = createPromiseCapability(); this.setupMessageHandler(); + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + // For testing purposes. + Object.defineProperty(this, "getXFADatasets", { + value: () => { + return this.messageHandler.sendWithPromise("GetXFADatasets", null); + }, + }); + } } #cacheSimpleMethod(name, data = null) { diff --git a/test/pdfs/f1040_2022.pdf.link b/test/pdfs/f1040_2022.pdf.link new file mode 100644 index 000000000..52f989524 --- /dev/null +++ b/test/pdfs/f1040_2022.pdf.link @@ -0,0 +1,2 @@ +https://web.archive.org/web/20230218015355/https://www.irs.gov/pub/irs-pdf/f1040.pdf + diff --git a/test/pdfs/issue16081.pdf.link b/test/pdfs/issue16081.pdf.link new file mode 100644 index 000000000..860122adb --- /dev/null +++ b/test/pdfs/issue16081.pdf.link @@ -0,0 +1,2 @@ +https://github.com/mozilla/pdf.js/files/10808293/pptc153.pdf + diff --git a/test/test_manifest.json b/test/test_manifest.json index 2137c1ce7..d2daad3a1 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -7364,5 +7364,19 @@ "rounds": 1, "type": "eq", "annotations": true + }, + { + "id": "issue16081", + "file": "pdfs/issue16081.pdf", + "md5": "e890d55c9527a116431ebef3efb960ff", + "link": true, + "type": "other" + }, + { + "id": "f1040_2022.pdf", + "file": "pdfs/f1040_2022.pdf", + "md5": "1eec7137e471a3d68a20855a04293b5b", + "link": true, + "type": "other" } ] diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a6063a364..00d428749 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -82,6 +82,21 @@ describe("api", function () { .join(""); } + function getNamedNodeInXML(node, path) { + for (const component of path.split(".")) { + if (!node.childNodes) { + break; + } + for (const child of node.childNodes) { + if (child.nodeName === component) { + node = child; + break; + } + } + } + return node; + } + describe("getDocument", function () { it("creates pdf doc from URL-string", async function () { const urlStr = TEST_PDFS_PATH + basicApiFileName; @@ -1903,6 +1918,68 @@ describe("api", function () { await loadingTask.destroy(); }); + it("write a value in an annotation, save the pdf and check the value in xfa datasets (1)", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + + let loadingTask = getDocument(buildGetDocumentParams("issue16081.pdf")); + let pdfDoc = await loadingTask.promise; + const value = "Hello World"; + + pdfDoc.annotationStorage.setValue("2055R", { value }); + + const data = await pdfDoc.saveDocument(); + await loadingTask.destroy(); + + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + const datasets = await pdfDoc.getXFADatasets(); + + const surName = getNamedNodeInXML( + datasets.node, + "xfa:data.PPTC_153.Page1.PersonalInformation.TitleAndNameInformation.PersonalInfo.Surname.#text" + ); + expect(surName.nodeValue).toEqual(value); + + await loadingTask.destroy(); + }); + + it("write a value in an annotation, save the pdf and check the value in xfa datasets (2)", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + + // In this file the path to the fields are wrong but the last path element + // is unique so we can guess what the node is. + let loadingTask = getDocument(buildGetDocumentParams("f1040_2022.pdf")); + let pdfDoc = await loadingTask.promise; + + pdfDoc.annotationStorage.setValue("1573R", { value: "hello" }); + pdfDoc.annotationStorage.setValue("1577R", { value: "world" }); + + const data = await pdfDoc.saveDocument(); + await loadingTask.destroy(); + + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + const datasets = await pdfDoc.getXFADatasets(); + + const firstName = getNamedNodeInXML( + datasets.node, + "xfa:data.topmostSubform.f1_02.#text" + ); + expect(firstName.nodeValue).toEqual("hello"); + + const lastName = getNamedNodeInXML( + datasets.node, + "xfa:data.topmostSubform.f1_06.#text" + ); + expect(lastName.nodeValue).toEqual("world"); + + await loadingTask.destroy(); + }); + describe("Cross-origin", function () { let loadingTask; function _checkCanLoad(expectSuccess, filename, options) {