Save form data in XFA datasets when pdf is a mix of acroforms and xfa (#12344)

* Move display/xml_parser.js in shared to use it in worker

* Save form data in XFA datasets when pdf is a mix of acroforms and xfa

Co-authored-by: Brendan Dahl <brendan.dahl@gmail.com>
This commit is contained in:
calixteman 2020-09-09 00:13:52 +02:00 committed by GitHub
parent 622e2fbd3a
commit 68b99c59ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 416 additions and 19 deletions

View File

@ -1073,6 +1073,7 @@ class WidgetAnnotation extends Annotation {
return null; return null;
} }
const value = annotationStorage[this.data.id];
const bbox = [ const bbox = [
0, 0,
0, 0,
@ -1080,11 +1081,15 @@ class WidgetAnnotation extends Annotation {
this.data.rect[3] - this.data.rect[1], this.data.rect[3] - this.data.rect[1],
]; ];
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value,
};
const newRef = evaluator.xref.getNewRef(); const newRef = evaluator.xref.getNewRef();
const AP = new Dict(evaluator.xref); const AP = new Dict(evaluator.xref);
AP.set("N", newRef); AP.set("N", newRef);
const value = annotationStorage[this.data.id];
const encrypt = evaluator.xref.encrypt; const encrypt = evaluator.xref.encrypt;
let originalTransform = null; let originalTransform = null;
let newTransform = null; let newTransform = null;
@ -1120,9 +1125,9 @@ class WidgetAnnotation extends Annotation {
return [ return [
// data for the original object // data for the original object
// V field changed + reference for new AP // V field changed + reference for new AP
{ ref: this.ref, data: bufferOriginal.join("") }, { ref: this.ref, data: bufferOriginal.join(""), xfa },
// data for the new AP // data for the new AP
{ ref: newRef, data: bufferNew.join("") }, { ref: newRef, data: bufferNew.join(""), xfa: null },
]; ];
} }
@ -1521,6 +1526,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
return null; return null;
} }
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value: value ? this.data.exportValue : "",
};
const name = Name.get(value ? this.data.exportValue : "Off"); const name = Name.get(value ? this.data.exportValue : "Off");
dict.set("V", name); dict.set("V", name);
dict.set("AS", name); dict.set("AS", name);
@ -1539,7 +1549,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
writeDict(dict, buffer, originalTransform); writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n"); buffer.push("\nendobj\n");
return [{ ref: this.ref, data: buffer.join("") }]; return [{ ref: this.ref, data: buffer.join(""), xfa }];
} }
async _saveRadioButton(evaluator, task, annotationStorage) { async _saveRadioButton(evaluator, task, annotationStorage) {
@ -1555,6 +1565,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
return null; return null;
} }
const xfa = {
path: stringToPDFString(dict.get("T") || ""),
value: value ? this.data.buttonValue : "",
};
const name = Name.get(value ? this.data.buttonValue : "Off"); const name = Name.get(value ? this.data.buttonValue : "Off");
let parentBuffer = null; let parentBuffer = null;
const encrypt = evaluator.xref.encrypt; const encrypt = evaluator.xref.encrypt;
@ -1593,9 +1608,13 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
writeDict(dict, buffer, originalTransform); writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n"); buffer.push("\nendobj\n");
const newRefs = [{ ref: this.ref, data: buffer.join("") }]; const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }];
if (parentBuffer !== null) { if (parentBuffer !== null) {
newRefs.push({ ref: this.parent, data: parentBuffer.join("") }); newRefs.push({
ref: this.parent,
data: parentBuffer.join(""),
xfa: null,
});
} }
return newRefs; return newRefs;

View File

@ -32,7 +32,7 @@ import {
VerbosityLevel, VerbosityLevel,
warn, warn,
} from "../shared/util.js"; } from "../shared/util.js";
import { clearPrimitiveCaches, Ref } from "./primitives.js"; import { clearPrimitiveCaches, Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js"; import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { incrementalUpdate } from "./writer.js"; import { incrementalUpdate } from "./writer.js";
import { isNodeJS } from "../shared/is_node.js"; import { isNodeJS } from "../shared/is_node.js";
@ -521,7 +521,10 @@ class WorkerMessageHandler {
filename, filename,
}) { }) {
pdfManager.requestLoadedStream(); pdfManager.requestLoadedStream();
const promises = [pdfManager.onLoadedStream()]; const promises = [
pdfManager.onLoadedStream(),
pdfManager.ensureCatalog("acroForm"),
];
const document = pdfManager.pdfDocument; const document = pdfManager.pdfDocument;
for (let pageIndex = 0; pageIndex < numPages; pageIndex++) { for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
promises.push( promises.push(
@ -532,7 +535,7 @@ class WorkerMessageHandler {
); );
} }
return Promise.all(promises).then(([stream, ...refs]) => { return Promise.all(promises).then(([stream, acroForm, ...refs]) => {
let newRefs = []; let newRefs = [];
for (const ref of refs) { for (const ref of refs) {
newRefs = ref newRefs = ref
@ -545,6 +548,20 @@ class WorkerMessageHandler {
return stream.bytes; return stream.bytes;
} }
acroForm = isDict(acroForm) ? acroForm : Dict.empty;
const xfa = acroForm.get("XFA") || [];
let xfaDatasets = null;
if (Array.isArray(xfa)) {
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] === "datasets") {
xfaDatasets = xfa[i + 1];
}
}
} else {
// TODO: Support XFA streams.
warn("Unsupported XFA type.");
}
const xref = document.xref; const xref = document.xref;
let newXrefInfo = Object.create(null); let newXrefInfo = Object.create(null);
if (xref.trailer) { if (xref.trailer) {
@ -572,7 +589,13 @@ class WorkerMessageHandler {
} }
xref.resetNewRef(); xref.resetNewRef();
return incrementalUpdate(stream.bytes, newXrefInfo, newRefs); return incrementalUpdate(
stream.bytes,
newXrefInfo,
newRefs,
xref,
xfaDatasets
);
}); });
}); });

View File

@ -14,8 +14,14 @@
*/ */
/* eslint no-var: error */ /* eslint no-var: error */
import { bytesToString, escapeString } from "../shared/util.js"; import {
bytesToString,
escapeString,
parseXFAPath,
warn,
} from "../shared/util.js";
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js"; import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js";
import { calculateMD5 } from "./crypto.js"; import { calculateMD5 } from "./crypto.js";
function writeDict(dict, buffer, transform) { function writeDict(dict, buffer, transform) {
@ -123,7 +129,55 @@ function computeMD5(filesize, xrefInfo) {
return bytesToString(calculateMD5(array)); return bytesToString(calculateMD5(array));
} }
function incrementalUpdate(originalData, xrefInfo, newRefs) { function updateXFA(datasetsRef, newRefs, xref) {
if (datasetsRef === null || xref === null) {
return;
}
const datasets = xref.fetchIfRef(datasetsRef);
const str = bytesToString(datasets.getBytes());
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString(
str
);
for (const { xfa } of newRefs) {
if (!xfa) {
continue;
}
const { path, value } = xfa;
if (!path) {
continue;
}
const node = xml.documentElement.searchNode(parseXFAPath(path), 0);
if (node) {
node.childNodes = [new SimpleDOMNode("#text", value)];
} else {
warn(`Node not found for path: ${path}`);
}
}
const buffer = [];
xml.documentElement.dump(buffer);
let updatedXml = buffer.join("");
const encrypt = xref.encrypt;
if (encrypt) {
const transform = encrypt.createCipherTransform(
datasetsRef.num,
datasetsRef.gen
);
updatedXml = transform.encryptString(updatedXml);
}
const data =
`${datasetsRef.num} ${datasetsRef.gen} obj\n` +
`<< /Type /EmbeddedFile /Length ${updatedXml.length}>>\nstream\n` +
updatedXml +
"\nendstream\nendobj\n";
newRefs.push({ ref: datasetsRef, data });
}
function incrementalUpdate(originalData, xrefInfo, newRefs, xref, datasetsRef) {
updateXFA(datasetsRef, newRefs, xref);
const newXref = new Dict(null); const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef; const refForXrefTable = xrefInfo.newRef;

View File

@ -14,7 +14,7 @@
*/ */
import { assert } from "../shared/util.js"; import { assert } from "../shared/util.js";
import { SimpleXMLParser } from "./xml_parser.js"; import { SimpleXMLParser } from "../shared/xml_parser.js";
class Metadata { class Metadata {
constructor(data) { constructor(data) {

View File

@ -910,6 +910,73 @@ const createObjectURL = (function createObjectURLClosure() {
}; };
})(); })();
/**
* AcroForm field names use an array like notation to refer to
* repeated XFA elements e.g. foo.bar[nnn].
* see: XFA Spec Chapter 3 - Repeated Elements
*
* @param {string} path - XFA path name.
* @returns {Array} - Array of Objects with the name and pos of
* each part of the path.
*/
function parseXFAPath(path) {
const positionPattern = /(.+)\[([0-9]+)\]$/;
return path.split(".").map(component => {
const m = component.match(positionPattern);
if (m) {
return { name: m[1], pos: parseInt(m[2], 10) };
}
return { name: component, pos: 0 };
});
}
const XMLEntities = {
/* < */ 0x3c: "&lt;",
/* > */ 0x3e: "&gt;",
/* & */ 0x26: "&amp;",
/* " */ 0x22: "&quot;",
/* ' */ 0x27: "&apos;",
};
function encodeToXmlString(str) {
const buffer = [];
let start = 0;
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.codePointAt(i);
if (0x20 <= char && char <= 0x7e) {
// ascii
const entity = XMLEntities[char];
if (entity) {
if (start < i) {
buffer.push(str.substring(start, i));
}
buffer.push(entity);
start = i + 1;
}
} else {
if (start < i) {
buffer.push(str.substring(start, i));
}
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
// char is represented by two u16
i++;
}
start = i + 1;
}
}
if (buffer.length === 0) {
return str;
}
if (start < str.length) {
buffer.push(str.substring(start, str.length));
}
return buffer.join("");
}
export { export {
BaseException, BaseException,
FONT_IDENTITY_MATRIX, FONT_IDENTITY_MATRIX,
@ -947,6 +1014,7 @@ export {
createPromiseCapability, createPromiseCapability,
createObjectURL, createObjectURL,
escapeString, escapeString,
encodeToXmlString,
getModificationDate, getModificationDate,
getVerbosityLevel, getVerbosityLevel,
info, info,
@ -959,6 +1027,7 @@ export {
createValidAbsoluteUrl, createValidAbsoluteUrl,
IsLittleEndianCached, IsLittleEndianCached,
IsEvalSupportedCached, IsEvalSupportedCached,
parseXFAPath,
removeNullCharacters, removeNullCharacters,
setVerbosityLevel, setVerbosityLevel,
shadow, shadow,

View File

@ -16,6 +16,8 @@
// The code for XMLParserBase copied from // The code for XMLParserBase copied from
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts // https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
import { encodeToXmlString } from "./util.js";
const XMLParserErrorCode = { const XMLParserErrorCode = {
NoError: 0, NoError: 0,
EndOfDocument: -1, EndOfDocument: -1,
@ -48,9 +50,9 @@ class XMLParserBase {
_resolveEntities(s) { _resolveEntities(s) {
return s.replace(/&([^;]+);/g, (all, entity) => { return s.replace(/&([^;]+);/g, (all, entity) => {
if (entity.substring(0, 2) === "#x") { if (entity.substring(0, 2) === "#x") {
return String.fromCharCode(parseInt(entity.substring(2), 16)); return String.fromCodePoint(parseInt(entity.substring(2), 16));
} else if (entity.substring(0, 1) === "#") { } else if (entity.substring(0, 1) === "#") {
return String.fromCharCode(parseInt(entity.substring(1), 10)); return String.fromCodePoint(parseInt(entity.substring(1), 10));
} }
switch (entity) { switch (entity) {
case "lt": case "lt":
@ -326,14 +328,99 @@ class SimpleDOMNode {
hasChildNodes() { hasChildNodes() {
return this.childNodes && this.childNodes.length > 0; return this.childNodes && this.childNodes.length > 0;
} }
searchNode(paths, pos) {
if (pos >= paths.length) {
return this;
}
const component = paths[pos];
const stack = [];
let node = this;
while (true) {
if (component.name === node.nodeName) {
if (component.pos === 0) {
const res = node.searchNode(paths, pos + 1);
if (res !== null) {
return res;
}
} else if (stack.length === 0) {
return null;
} else {
const [parent] = stack.pop();
let siblingPos = 0;
for (const child of parent.childNodes) {
if (component.name === child.nodeName) {
if (siblingPos === component.pos) {
return child.searchNode(paths, pos + 1);
}
siblingPos++;
}
}
// We didn't find the correct sibling
// so just return the first found node
return node.searchNode(paths, pos + 1);
}
}
if (node.childNodes && node.childNodes.length !== 0) {
stack.push([node, 0]);
node = node.childNodes[0];
} else if (stack.length === 0) {
return null;
} else {
while (stack.length !== 0) {
const [parent, currentPos] = stack.pop();
const newPos = currentPos + 1;
if (newPos < parent.childNodes.length) {
stack.push([parent, newPos]);
node = parent.childNodes[newPos];
break;
}
}
if (stack.length === 0) {
return null;
}
}
}
}
dump(buffer) {
if (this.nodeName === "#text") {
buffer.push(encodeToXmlString(this.nodeValue));
return;
}
buffer.push(`<${this.nodeName}`);
if (this.attributes) {
for (const attribute of this.attributes) {
buffer.push(
` ${attribute.name}=\"${encodeToXmlString(attribute.value)}\"`
);
}
}
if (this.hasChildNodes()) {
buffer.push(">");
for (const child of this.childNodes) {
child.dump(buffer);
}
buffer.push(`</${this.nodeName}>`);
} else if (this.nodeValue) {
buffer.push(`>${encodeToXmlString(this.nodeValue)}</${this.nodeName}>`);
} else {
buffer.push("/>");
}
}
} }
class SimpleXMLParser extends XMLParserBase { class SimpleXMLParser extends XMLParserBase {
constructor() { constructor(hasAttributes = false) {
super(); super();
this._currentFragment = null; this._currentFragment = null;
this._stack = null; this._stack = null;
this._errorCode = XMLParserErrorCode.NoError; this._errorCode = XMLParserErrorCode.NoError;
this._hasAttributes = hasAttributes;
} }
parseFromString(data) { parseFromString(data) {
@ -379,6 +466,9 @@ class SimpleXMLParser extends XMLParserBase {
onBeginElement(name, attributes, isEmpty) { onBeginElement(name, attributes, isEmpty) {
const node = new SimpleDOMNode(name); const node = new SimpleDOMNode(name);
node.childNodes = []; node.childNodes = [];
if (this._hasAttributes) {
node.attributes = attributes;
}
this._currentFragment.push(node); this._currentFragment.push(node);
if (isEmpty) { if (isEmpty) {
return; return;
@ -403,4 +493,4 @@ class SimpleXMLParser extends XMLParserBase {
} }
} }
export { SimpleXMLParser }; export { SimpleDOMNode, SimpleXMLParser };

View File

@ -37,6 +37,7 @@
"ui_utils_spec.js", "ui_utils_spec.js",
"unicode_spec.js", "unicode_spec.js",
"util_spec.js", "util_spec.js",
"writer_spec.js" "writer_spec.js",
"xml_spec.js"
] ]
} }

View File

@ -81,6 +81,7 @@ function initializePDFJS(callback) {
"pdfjs-test/unit/unicode_spec.js", "pdfjs-test/unit/unicode_spec.js",
"pdfjs-test/unit/util_spec.js", "pdfjs-test/unit/util_spec.js",
"pdfjs-test/unit/writer_spec.js", "pdfjs-test/unit/writer_spec.js",
"pdfjs-test/unit/xml_spec.js",
].map(function (moduleName) { ].map(function (moduleName) {
// eslint-disable-next-line no-unsanitized/method // eslint-disable-next-line no-unsanitized/method
return SystemJS.import(moduleName); return SystemJS.import(moduleName);

View File

@ -17,6 +17,7 @@ import {
bytesToString, bytesToString,
createPromiseCapability, createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
encodeToXmlString,
escapeString, escapeString,
getModificationDate, getModificationDate,
isArrayBuffer, isArrayBuffer,
@ -24,6 +25,7 @@ import {
isNum, isNum,
isSameOrigin, isSameOrigin,
isString, isString,
parseXFAPath,
removeNullCharacters, removeNullCharacters,
string32, string32,
stringToBytes, stringToBytes,
@ -331,4 +333,32 @@ describe("util", function () {
expect(getModificationDate(date)).toEqual("31410610020653"); expect(getModificationDate(date)).toEqual("31410610020653");
}); });
}); });
describe("parseXFAPath", function () {
it("should get a correctly parsed path", function () {
const path = "foo.bar[12].oof[3].rab.FOO[123].BAR[456]";
expect(parseXFAPath(path)).toEqual([
{ name: "foo", pos: 0 },
{ name: "bar", pos: 12 },
{ name: "oof", pos: 3 },
{ name: "rab", pos: 0 },
{ name: "FOO", pos: 123 },
{ name: "BAR", pos: 456 },
]);
});
});
describe("encodeToXmlString", function () {
it("should get a correctly encoded string with some entities", function () {
const str = "\"\u0397ell😂' & <W😂rld>";
expect(encodeToXmlString(str)).toEqual(
"&quot;&#x397;ell&#x1F602;&apos; &amp; &lt;W&#x1F602;rld&gt;"
);
});
it("should get a correctly encoded basic ascii string", function () {
const str = "hello world";
expect(encodeToXmlString(str)).toEqual(str);
});
});
}); });

View File

@ -37,7 +37,7 @@ describe("Writer", function () {
info: {}, info: {},
}; };
let data = incrementalUpdate(originalData, xrefInfo, newRefs); let data = incrementalUpdate(originalData, xrefInfo, newRefs, null, null);
data = bytesToString(data); data = bytesToString(data);
const expected = const expected =

110
test/unit/xml_spec.js Normal file
View File

@ -0,0 +1,110 @@
/* Copyright 2020 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { parseXFAPath } from "../../src/shared/util.js";
import { SimpleXMLParser } from "../../src/shared/xml_parser.js";
describe("XML", function () {
describe("searchNode", function () {
it("should search a node with a given path in xml tree", function () {
const xml = `
<a>
<b>
<c a="123"/>
<d/>
<e>
<f>
<g a="321"/>
</f>
</e>
<c a="456"/>
<c a="789"/>
<h/>
<c a="101112"/>
</b>
<h>
<i/>
<j/>
<k>
<g a="654"/>
</k>
</h>
<b>
<g a="987"/>
<h/>
<g a="121110"/>
</b>
</a>`;
const root = new SimpleXMLParser(true).parseFromString(xml)
.documentElement;
function getAttr(path) {
return root.searchNode(parseXFAPath(path), 0).attributes[0].value;
}
expect(getAttr("b.g")).toEqual("321");
expect(getAttr("e.f.g")).toEqual("321");
expect(getAttr("e.g")).toEqual("321");
expect(getAttr("g")).toEqual("321");
expect(getAttr("h.g")).toEqual("654");
expect(getAttr("b[0].g")).toEqual("321");
expect(getAttr("b[1].g")).toEqual("987");
expect(getAttr("b[1].g[0]")).toEqual("987");
expect(getAttr("b[1].g[1]")).toEqual("121110");
expect(getAttr("c")).toEqual("123");
expect(getAttr("c[1]")).toEqual("456");
expect(getAttr("c[2]")).toEqual("789");
expect(getAttr("c[3]")).toEqual("101112");
});
it("should dump a xml tree", function () {
let xml = `
<a>
<b>
<c a="123"/>
<d>hello</d>
<e>
<f>
<g a="321"/>
</f>
</e>
<c a="456"/>
<c a="789"/>
<h/>
<c a="101112"/>
</b>
<h>
<i/>
<j/>
<k>
W&#x1F602;rld
<g a="654"/>
</k>
</h>
<b>
<g a="987"/>
<h/>
<g a="121110"/>
</b>
</a>`;
xml = xml.replace(/\s+/g, "");
const root = new SimpleXMLParser(true).parseFromString(xml)
.documentElement;
const buffer = [];
root.dump(buffer);
expect(buffer.join("").replace(/\s+/g, "")).toEqual(xml);
});
});
});