[Acroform] Use the full path to find the node in the XFA datasets where to store the value

I noticed several 'Path not found' errors because of a field called #subform[2].
From the XFA specs, the hash is used for a class of elements in the template tree.
When we're looking for a node in the datasets tree, it doesn't make sense to search
for a class. Hence the path element starting with a hash are just skipped.
This commit is contained in:
Calixte Denizet 2023-02-22 22:08:21 +01:00
parent e676c9388d
commit 3a21423386
8 changed files with 135 additions and 5 deletions

View File

@ -1125,7 +1125,12 @@ class Annotation {
}
if (loopDict.has("T")) {
fieldName.unshift(stringToPDFString(loopDict.get("T")));
const t = stringToPDFString(loopDict.get("T"));
if (!t.startsWith("#")) {
// If it starts with a # then it's a class which is not a concept for
// datasets elements (https://www.pdfa.org/norm-refs/XFA-3_3.pdf#page=96).
fieldName.unshift(t);
}
}
}
return fieldName.join(".");
@ -1860,7 +1865,7 @@ class WidgetAnnotation extends Annotation {
}
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
path: this.data.fieldName,
value,
};
@ -2787,7 +2792,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
path: this.data.fieldName,
value: value ? this.data.exportValue : "",
};
@ -2850,7 +2855,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
path: this.data.fieldName,
value: value ? this.data.buttonValue : "",
};

View File

@ -831,6 +831,13 @@ class WorkerMessageHandler {
setupDoc(docParams);
docParams = null; // we don't need docParams anymore -- saving memory.
});
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
handler.on("GetXFADatasets", function (data) {
return pdfManager.ensureDoc("xfaDatasets");
});
}
return workerHandlerName;
}

View File

@ -139,7 +139,12 @@ function writeXFADataForAcroform(str, newRefs) {
if (!path) {
continue;
}
const node = xml.documentElement.searchNode(parseXFAPath(path), 0);
const nodePath = parseXFAPath(path);
let node = xml.documentElement.searchNode(nodePath, 0);
if (!node && nodePath.length > 1) {
// If we're lucky the last element in the path will identify the node.
node = xml.documentElement.searchNode([nodePath.at(-1)], 0);
}
if (node) {
if (Array.isArray(value)) {
node.childNodes = value.map(val => new SimpleDOMNode("value", val));

View File

@ -782,6 +782,15 @@ class PDFDocumentProxy {
constructor(pdfInfo, transport) {
this._pdfInfo = pdfInfo;
this._transport = transport;
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getXFADatasets", {
value: () => {
return this._transport.getXFADatasets();
},
});
}
}
/**
@ -2349,6 +2358,15 @@ class WorkerTransport {
this.downloadInfoCapability = createPromiseCapability();
this.setupMessageHandler();
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getXFADatasets", {
value: () => {
return this.messageHandler.sendWithPromise("GetXFADatasets", null);
},
});
}
}
#cacheSimpleMethod(name, data = null) {

View File

@ -0,0 +1,2 @@
https://web.archive.org/web/20230218015355/https://www.irs.gov/pub/irs-pdf/f1040.pdf

View File

@ -0,0 +1,2 @@
https://github.com/mozilla/pdf.js/files/10808293/pptc153.pdf

View File

@ -7364,5 +7364,19 @@
"rounds": 1,
"type": "eq",
"annotations": true
},
{
"id": "issue16081",
"file": "pdfs/issue16081.pdf",
"md5": "e890d55c9527a116431ebef3efb960ff",
"link": true,
"type": "other"
},
{
"id": "f1040_2022.pdf",
"file": "pdfs/f1040_2022.pdf",
"md5": "1eec7137e471a3d68a20855a04293b5b",
"link": true,
"type": "other"
}
]

View File

@ -82,6 +82,21 @@ describe("api", function () {
.join("");
}
function getNamedNodeInXML(node, path) {
for (const component of path.split(".")) {
if (!node.childNodes) {
break;
}
for (const child of node.childNodes) {
if (child.nodeName === component) {
node = child;
break;
}
}
}
return node;
}
describe("getDocument", function () {
it("creates pdf doc from URL-string", async function () {
const urlStr = TEST_PDFS_PATH + basicApiFileName;
@ -1903,6 +1918,68 @@ describe("api", function () {
await loadingTask.destroy();
});
it("write a value in an annotation, save the pdf and check the value in xfa datasets (1)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
let loadingTask = getDocument(buildGetDocumentParams("issue16081.pdf"));
let pdfDoc = await loadingTask.promise;
const value = "Hello World";
pdfDoc.annotationStorage.setValue("2055R", { value });
const data = await pdfDoc.saveDocument();
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
const datasets = await pdfDoc.getXFADatasets();
const surName = getNamedNodeInXML(
datasets.node,
"xfa:data.PPTC_153.Page1.PersonalInformation.TitleAndNameInformation.PersonalInfo.Surname.#text"
);
expect(surName.nodeValue).toEqual(value);
await loadingTask.destroy();
});
it("write a value in an annotation, save the pdf and check the value in xfa datasets (2)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
// In this file the path to the fields are wrong but the last path element
// is unique so we can guess what the node is.
let loadingTask = getDocument(buildGetDocumentParams("f1040_2022.pdf"));
let pdfDoc = await loadingTask.promise;
pdfDoc.annotationStorage.setValue("1573R", { value: "hello" });
pdfDoc.annotationStorage.setValue("1577R", { value: "world" });
const data = await pdfDoc.saveDocument();
await loadingTask.destroy();
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
const datasets = await pdfDoc.getXFADatasets();
const firstName = getNamedNodeInXML(
datasets.node,
"xfa:data.topmostSubform.f1_02.#text"
);
expect(firstName.nodeValue).toEqual("hello");
const lastName = getNamedNodeInXML(
datasets.node,
"xfa:data.topmostSubform.f1_06.#text"
);
expect(lastName.nodeValue).toEqual("world");
await loadingTask.destroy();
});
describe("Cross-origin", function () {
let loadingTask;
function _checkCanLoad(expectSuccess, filename, options) {