diff --git a/src/core/annotation.js b/src/core/annotation.js index 6263608fc..bddaeab99 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1073,6 +1073,7 @@ class WidgetAnnotation extends Annotation { return null; } + const value = annotationStorage[this.data.id]; const bbox = [ 0, 0, @@ -1080,11 +1081,15 @@ class WidgetAnnotation extends Annotation { this.data.rect[3] - this.data.rect[1], ]; + const xfa = { + path: stringToPDFString(dict.get("T") || ""), + value, + }; + const newRef = evaluator.xref.getNewRef(); const AP = new Dict(evaluator.xref); AP.set("N", newRef); - const value = annotationStorage[this.data.id]; const encrypt = evaluator.xref.encrypt; let originalTransform = null; let newTransform = null; @@ -1120,9 +1125,9 @@ class WidgetAnnotation extends Annotation { return [ // data for the original object // V field changed + reference for new AP - { ref: this.ref, data: bufferOriginal.join("") }, + { ref: this.ref, data: bufferOriginal.join(""), xfa }, // data for the new AP - { ref: newRef, data: bufferNew.join("") }, + { ref: newRef, data: bufferNew.join(""), xfa: null }, ]; } @@ -1521,6 +1526,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { return null; } + const xfa = { + path: stringToPDFString(dict.get("T") || ""), + value: value ? this.data.exportValue : "", + }; + const name = Name.get(value ? this.data.exportValue : "Off"); dict.set("V", name); dict.set("AS", name); @@ -1539,7 +1549,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { writeDict(dict, buffer, originalTransform); buffer.push("\nendobj\n"); - return [{ ref: this.ref, data: buffer.join("") }]; + return [{ ref: this.ref, data: buffer.join(""), xfa }]; } async _saveRadioButton(evaluator, task, annotationStorage) { @@ -1555,6 +1565,11 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { return null; } + const xfa = { + path: stringToPDFString(dict.get("T") || ""), + value: value ? this.data.buttonValue : "", + }; + const name = Name.get(value ? this.data.buttonValue : "Off"); let parentBuffer = null; const encrypt = evaluator.xref.encrypt; @@ -1593,9 +1608,13 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { writeDict(dict, buffer, originalTransform); buffer.push("\nendobj\n"); - const newRefs = [{ ref: this.ref, data: buffer.join("") }]; + const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }]; if (parentBuffer !== null) { - newRefs.push({ ref: this.parent, data: parentBuffer.join("") }); + newRefs.push({ + ref: this.parent, + data: parentBuffer.join(""), + xfa: null, + }); } return newRefs; diff --git a/src/core/worker.js b/src/core/worker.js index d11070e6a..9d9f18b82 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -32,7 +32,7 @@ import { VerbosityLevel, warn, } from "../shared/util.js"; -import { clearPrimitiveCaches, Ref } from "./primitives.js"; +import { clearPrimitiveCaches, Dict, isDict, Ref } from "./primitives.js"; import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js"; import { incrementalUpdate } from "./writer.js"; import { isNodeJS } from "../shared/is_node.js"; @@ -521,7 +521,10 @@ class WorkerMessageHandler { filename, }) { pdfManager.requestLoadedStream(); - const promises = [pdfManager.onLoadedStream()]; + const promises = [ + pdfManager.onLoadedStream(), + pdfManager.ensureCatalog("acroForm"), + ]; const document = pdfManager.pdfDocument; for (let pageIndex = 0; pageIndex < numPages; pageIndex++) { promises.push( @@ -532,7 +535,7 @@ class WorkerMessageHandler { ); } - return Promise.all(promises).then(([stream, ...refs]) => { + return Promise.all(promises).then(([stream, acroForm, ...refs]) => { let newRefs = []; for (const ref of refs) { newRefs = ref @@ -545,6 +548,20 @@ class WorkerMessageHandler { return stream.bytes; } + acroForm = isDict(acroForm) ? acroForm : Dict.empty; + const xfa = acroForm.get("XFA") || []; + let xfaDatasets = null; + if (Array.isArray(xfa)) { + for (let i = 0, ii = xfa.length; i < ii; i += 2) { + if (xfa[i] === "datasets") { + xfaDatasets = xfa[i + 1]; + } + } + } else { + // TODO: Support XFA streams. + warn("Unsupported XFA type."); + } + const xref = document.xref; let newXrefInfo = Object.create(null); if (xref.trailer) { @@ -572,7 +589,13 @@ class WorkerMessageHandler { } xref.resetNewRef(); - return incrementalUpdate(stream.bytes, newXrefInfo, newRefs); + return incrementalUpdate( + stream.bytes, + newXrefInfo, + newRefs, + xref, + xfaDatasets + ); }); }); diff --git a/src/core/writer.js b/src/core/writer.js index c24c203ee..19442f5ea 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -14,8 +14,14 @@ */ /* eslint no-var: error */ -import { bytesToString, escapeString } from "../shared/util.js"; +import { + bytesToString, + escapeString, + parseXFAPath, + warn, +} from "../shared/util.js"; import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js"; +import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js"; import { calculateMD5 } from "./crypto.js"; function writeDict(dict, buffer, transform) { @@ -123,7 +129,55 @@ function computeMD5(filesize, xrefInfo) { return bytesToString(calculateMD5(array)); } -function incrementalUpdate(originalData, xrefInfo, newRefs) { +function updateXFA(datasetsRef, newRefs, xref) { + if (datasetsRef === null || xref === null) { + return; + } + const datasets = xref.fetchIfRef(datasetsRef); + const str = bytesToString(datasets.getBytes()); + const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString( + str + ); + + for (const { xfa } of newRefs) { + if (!xfa) { + continue; + } + const { path, value } = xfa; + if (!path) { + continue; + } + const node = xml.documentElement.searchNode(parseXFAPath(path), 0); + if (node) { + node.childNodes = [new SimpleDOMNode("#text", value)]; + } else { + warn(`Node not found for path: ${path}`); + } + } + const buffer = []; + xml.documentElement.dump(buffer); + let updatedXml = buffer.join(""); + + const encrypt = xref.encrypt; + if (encrypt) { + const transform = encrypt.createCipherTransform( + datasetsRef.num, + datasetsRef.gen + ); + updatedXml = transform.encryptString(updatedXml); + } + const data = + `${datasetsRef.num} ${datasetsRef.gen} obj\n` + + `<< /Type /EmbeddedFile /Length ${updatedXml.length}>>\nstream\n` + + updatedXml + + "\nendstream\nendobj\n"; + + newRefs.push({ ref: datasetsRef, data }); +} + +function incrementalUpdate(originalData, xrefInfo, newRefs, xref, datasetsRef) { + updateXFA(datasetsRef, newRefs, xref); + const newXref = new Dict(null); const refForXrefTable = xrefInfo.newRef; diff --git a/src/display/metadata.js b/src/display/metadata.js index 995518aaf..d29246228 100644 --- a/src/display/metadata.js +++ b/src/display/metadata.js @@ -14,7 +14,7 @@ */ import { assert } from "../shared/util.js"; -import { SimpleXMLParser } from "./xml_parser.js"; +import { SimpleXMLParser } from "../shared/xml_parser.js"; class Metadata { constructor(data) { diff --git a/src/shared/util.js b/src/shared/util.js index fade01b86..cd3a9486b 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -910,6 +910,73 @@ const createObjectURL = (function createObjectURLClosure() { }; })(); +/** + * AcroForm field names use an array like notation to refer to + * repeated XFA elements e.g. foo.bar[nnn]. + * see: XFA Spec Chapter 3 - Repeated Elements + * + * @param {string} path - XFA path name. + * @returns {Array} - Array of Objects with the name and pos of + * each part of the path. + */ +function parseXFAPath(path) { + const positionPattern = /(.+)\[([0-9]+)\]$/; + return path.split(".").map(component => { + const m = component.match(positionPattern); + if (m) { + return { name: m[1], pos: parseInt(m[2], 10) }; + } + return { name: component, pos: 0 }; + }); +} + +const XMLEntities = { + /* < */ 0x3c: "<", + /* > */ 0x3e: ">", + /* & */ 0x26: "&", + /* " */ 0x22: """, + /* ' */ 0x27: "'", +}; + +function encodeToXmlString(str) { + const buffer = []; + let start = 0; + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.codePointAt(i); + if (0x20 <= char && char <= 0x7e) { + // ascii + const entity = XMLEntities[char]; + if (entity) { + if (start < i) { + buffer.push(str.substring(start, i)); + } + buffer.push(entity); + start = i + 1; + } + } else { + if (start < i) { + buffer.push(str.substring(start, i)); + } + buffer.push(`&#x${char.toString(16).toUpperCase()};`); + if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) { + // char is represented by two u16 + i++; + } + start = i + 1; + } + } + + if (buffer.length === 0) { + return str; + } + + if (start < str.length) { + buffer.push(str.substring(start, str.length)); + } + + return buffer.join(""); +} + export { BaseException, FONT_IDENTITY_MATRIX, @@ -947,6 +1014,7 @@ export { createPromiseCapability, createObjectURL, escapeString, + encodeToXmlString, getModificationDate, getVerbosityLevel, info, @@ -959,6 +1027,7 @@ export { createValidAbsoluteUrl, IsLittleEndianCached, IsEvalSupportedCached, + parseXFAPath, removeNullCharacters, setVerbosityLevel, shadow, diff --git a/src/display/xml_parser.js b/src/shared/xml_parser.js similarity index 79% rename from src/display/xml_parser.js rename to src/shared/xml_parser.js index 6401a76c7..6cc1af14b 100644 --- a/src/display/xml_parser.js +++ b/src/shared/xml_parser.js @@ -16,6 +16,8 @@ // The code for XMLParserBase copied from // https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts +import { encodeToXmlString } from "./util.js"; + const XMLParserErrorCode = { NoError: 0, EndOfDocument: -1, @@ -48,9 +50,9 @@ class XMLParserBase { _resolveEntities(s) { return s.replace(/&([^;]+);/g, (all, entity) => { if (entity.substring(0, 2) === "#x") { - return String.fromCharCode(parseInt(entity.substring(2), 16)); + return String.fromCodePoint(parseInt(entity.substring(2), 16)); } else if (entity.substring(0, 1) === "#") { - return String.fromCharCode(parseInt(entity.substring(1), 10)); + return String.fromCodePoint(parseInt(entity.substring(1), 10)); } switch (entity) { case "lt": @@ -326,14 +328,99 @@ class SimpleDOMNode { hasChildNodes() { return this.childNodes && this.childNodes.length > 0; } + + searchNode(paths, pos) { + if (pos >= paths.length) { + return this; + } + + const component = paths[pos]; + const stack = []; + let node = this; + + while (true) { + if (component.name === node.nodeName) { + if (component.pos === 0) { + const res = node.searchNode(paths, pos + 1); + if (res !== null) { + return res; + } + } else if (stack.length === 0) { + return null; + } else { + const [parent] = stack.pop(); + let siblingPos = 0; + for (const child of parent.childNodes) { + if (component.name === child.nodeName) { + if (siblingPos === component.pos) { + return child.searchNode(paths, pos + 1); + } + siblingPos++; + } + } + // We didn't find the correct sibling + // so just return the first found node + return node.searchNode(paths, pos + 1); + } + } + + if (node.childNodes && node.childNodes.length !== 0) { + stack.push([node, 0]); + node = node.childNodes[0]; + } else if (stack.length === 0) { + return null; + } else { + while (stack.length !== 0) { + const [parent, currentPos] = stack.pop(); + const newPos = currentPos + 1; + if (newPos < parent.childNodes.length) { + stack.push([parent, newPos]); + node = parent.childNodes[newPos]; + break; + } + } + if (stack.length === 0) { + return null; + } + } + } + } + + dump(buffer) { + if (this.nodeName === "#text") { + buffer.push(encodeToXmlString(this.nodeValue)); + return; + } + + buffer.push(`<${this.nodeName}`); + if (this.attributes) { + for (const attribute of this.attributes) { + buffer.push( + ` ${attribute.name}=\"${encodeToXmlString(attribute.value)}\"` + ); + } + } + if (this.hasChildNodes()) { + buffer.push(">"); + for (const child of this.childNodes) { + child.dump(buffer); + } + buffer.push(``); + } else if (this.nodeValue) { + buffer.push(`>${encodeToXmlString(this.nodeValue)}`); + } else { + buffer.push("/>"); + } + } } class SimpleXMLParser extends XMLParserBase { - constructor() { + constructor(hasAttributes = false) { super(); this._currentFragment = null; this._stack = null; this._errorCode = XMLParserErrorCode.NoError; + this._hasAttributes = hasAttributes; } parseFromString(data) { @@ -379,6 +466,9 @@ class SimpleXMLParser extends XMLParserBase { onBeginElement(name, attributes, isEmpty) { const node = new SimpleDOMNode(name); node.childNodes = []; + if (this._hasAttributes) { + node.attributes = attributes; + } this._currentFragment.push(node); if (isEmpty) { return; @@ -403,4 +493,4 @@ class SimpleXMLParser extends XMLParserBase { } } -export { SimpleXMLParser }; +export { SimpleDOMNode, SimpleXMLParser }; diff --git a/test/unit/clitests.json b/test/unit/clitests.json index f68cc5a75..d4766617c 100644 --- a/test/unit/clitests.json +++ b/test/unit/clitests.json @@ -37,6 +37,7 @@ "ui_utils_spec.js", "unicode_spec.js", "util_spec.js", - "writer_spec.js" + "writer_spec.js", + "xml_spec.js" ] } diff --git a/test/unit/jasmine-boot.js b/test/unit/jasmine-boot.js index 0693b7272..d2b92ec02 100644 --- a/test/unit/jasmine-boot.js +++ b/test/unit/jasmine-boot.js @@ -81,6 +81,7 @@ function initializePDFJS(callback) { "pdfjs-test/unit/unicode_spec.js", "pdfjs-test/unit/util_spec.js", "pdfjs-test/unit/writer_spec.js", + "pdfjs-test/unit/xml_spec.js", ].map(function (moduleName) { // eslint-disable-next-line no-unsanitized/method return SystemJS.import(moduleName); diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index 96f9772ba..845447ff1 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -17,6 +17,7 @@ import { bytesToString, createPromiseCapability, createValidAbsoluteUrl, + encodeToXmlString, escapeString, getModificationDate, isArrayBuffer, @@ -24,6 +25,7 @@ import { isNum, isSameOrigin, isString, + parseXFAPath, removeNullCharacters, string32, stringToBytes, @@ -331,4 +333,32 @@ describe("util", function () { expect(getModificationDate(date)).toEqual("31410610020653"); }); }); + + describe("parseXFAPath", function () { + it("should get a correctly parsed path", function () { + const path = "foo.bar[12].oof[3].rab.FOO[123].BAR[456]"; + expect(parseXFAPath(path)).toEqual([ + { name: "foo", pos: 0 }, + { name: "bar", pos: 12 }, + { name: "oof", pos: 3 }, + { name: "rab", pos: 0 }, + { name: "FOO", pos: 123 }, + { name: "BAR", pos: 456 }, + ]); + }); + }); + + describe("encodeToXmlString", function () { + it("should get a correctly encoded string with some entities", function () { + const str = "\"\u0397ell😂' & "; + expect(encodeToXmlString(str)).toEqual( + ""Ηell😂' & <W😂rld>" + ); + }); + + it("should get a correctly encoded basic ascii string", function () { + const str = "hello world"; + expect(encodeToXmlString(str)).toEqual(str); + }); + }); }); diff --git a/test/unit/writer_spec.js b/test/unit/writer_spec.js index 1491e53cd..2f7196b96 100644 --- a/test/unit/writer_spec.js +++ b/test/unit/writer_spec.js @@ -37,7 +37,7 @@ describe("Writer", function () { info: {}, }; - let data = incrementalUpdate(originalData, xrefInfo, newRefs); + let data = incrementalUpdate(originalData, xrefInfo, newRefs, null, null); data = bytesToString(data); const expected = diff --git a/test/unit/xml_spec.js b/test/unit/xml_spec.js new file mode 100644 index 000000000..9d04e3aa8 --- /dev/null +++ b/test/unit/xml_spec.js @@ -0,0 +1,110 @@ +/* Copyright 2020 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { parseXFAPath } from "../../src/shared/util.js"; +import { SimpleXMLParser } from "../../src/shared/xml_parser.js"; + +describe("XML", function () { + describe("searchNode", function () { + it("should search a node with a given path in xml tree", function () { + const xml = ` + + + + + + + + + + + + + + + + + + + + + + + + + + + `; + const root = new SimpleXMLParser(true).parseFromString(xml) + .documentElement; + function getAttr(path) { + return root.searchNode(parseXFAPath(path), 0).attributes[0].value; + } + + expect(getAttr("b.g")).toEqual("321"); + expect(getAttr("e.f.g")).toEqual("321"); + expect(getAttr("e.g")).toEqual("321"); + expect(getAttr("g")).toEqual("321"); + expect(getAttr("h.g")).toEqual("654"); + expect(getAttr("b[0].g")).toEqual("321"); + expect(getAttr("b[1].g")).toEqual("987"); + expect(getAttr("b[1].g[0]")).toEqual("987"); + expect(getAttr("b[1].g[1]")).toEqual("121110"); + expect(getAttr("c")).toEqual("123"); + expect(getAttr("c[1]")).toEqual("456"); + expect(getAttr("c[2]")).toEqual("789"); + expect(getAttr("c[3]")).toEqual("101112"); + }); + + it("should dump a xml tree", function () { + let xml = ` + + + + hello + + + + + + + + + + + + + + + W😂rld + + + + + + + + + `; + xml = xml.replace(/\s+/g, ""); + const root = new SimpleXMLParser(true).parseFromString(xml) + .documentElement; + const buffer = []; + root.dump(buffer); + + expect(buffer.join("").replace(/\s+/g, "")).toEqual(xml); + }); + }); +});