diff --git a/src/core/xfa/som.js b/src/core/xfa/som.js new file mode 100644 index 000000000..d0d394fa1 --- /dev/null +++ b/src/core/xfa/som.js @@ -0,0 +1,232 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + $getChildrenByClass, + $getChildrenByName, + $getParent, + XFAObject, + XFAObjectArray, +} from "./xfa_object.js"; +import { warn } from "../../shared/util.js"; + +const namePattern = /^[^.[]+/; +const indexPattern = /^[^\]]+/; +const operators = { + dot: 0, + dotDot: 1, + dotHash: 2, + dotBracket: 3, + dotParen: 4, +}; + +const shortcuts = new Map([ + ["$data", root => root.datasets.data], + ["$template", root => root.template], + ["$connectionSet", root => root.connectionSet], + ["$form", root => root.form], + ["$layout", root => root.layout], + ["$host", root => root.host], + ["$dataWindow", root => root.dataWindow], + ["$event", root => root.event], + ["!", root => root.datasets], + ["$xfa", root => root], + ["xfa", root => root], +]); + +const somCache = new WeakMap(); + +function parseIndex(index) { + index = index.trim(); + if (index === "*") { + return Infinity; + } + return parseInt(index, 10) || 0; +} + +function parseExpression(expr, dotDotAllowed) { + let match = expr.match(namePattern); + if (!match) { + return null; + } + + let [name] = match; + const parsed = [ + { + name, + cacheName: "." + name, + index: 0, + js: null, + formCalc: null, + operator: operators.dot, + }, + ]; + + let pos = name.length; + + while (pos < expr.length) { + const spos = pos; + const char = expr.charAt(pos++); + if (char === "[") { + match = expr.slice(pos).match(indexPattern); + if (!match) { + warn("XFA - Invalid index in SOM expression"); + return null; + } + parsed[parsed.length - 1].index = parseIndex(match[0]); + pos += match[0].length + 1; + continue; + } + + let operator; + switch (expr.charAt(pos)) { + case ".": + if (!dotDotAllowed) { + return null; + } + pos++; + operator = operators.dotDot; + break; + case "#": + pos++; + operator = operators.dotHash; + break; + case "[": + // TODO: FormCalc expression so need to use the parser + operator = operators.dotBracket; + break; + case "(": + // TODO: + // Javascript expression: should be a boolean operation with a path + // so maybe we can have our own parser for that stuff or + // maybe use the formcalc one. + operator = operators.dotParen; + break; + default: + operator = operators.dot; + break; + } + + match = expr.slice(pos).match(namePattern); + if (!match) { + break; + } + + [name] = match; + pos += name.length; + parsed.push({ + name, + cacheName: expr.slice(spos, pos), + operator, + index: 0, + js: null, + formCalc: null, + }); + } + return parsed; +} + +function searchNode(root, container, expr, dotDotAllowed = true) { + const parsed = parseExpression(expr, dotDotAllowed); + if (!parsed) { + return null; + } + const fn = shortcuts.get(parsed[0].name); + let i = 0; + let isQualified; + if (fn) { + isQualified = true; + root = [fn(root)]; + i = 1; + } else { + isQualified = container === null; + root = [container || root]; + } + + for (let ii = parsed.length; i < ii; i++) { + const { name, cacheName, operator, index } = parsed[i]; + const nodes = []; + for (const node of root) { + if (!(node instanceof XFAObject)) { + continue; + } + + let cached = somCache.get(node); + if (!cached) { + cached = new Map(); + somCache.set(node, cached); + } + + let children = cached.get(cacheName); + if (!children) { + switch (operator) { + case operators.dot: + children = node[$getChildrenByName](name, false); + break; + case operators.dotDot: + children = node[$getChildrenByName](name, true); + break; + case operators.dotHash: + children = node[$getChildrenByClass](name); + if (children instanceof XFAObjectArray) { + children = children.children; + } else { + children = [children]; + } + break; + default: + break; + } + cached.set(cacheName, children); + } + + if (children.length > 0) { + nodes.push(children); + } + } + + if (nodes.length === 0 && !isQualified && i === 0) { + // We've an unqualified expression and we didn't find anything + // so look at container and siblings of container and so on. + // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.364.2157&rep=rep1&type=pdf#page=114 + const parent = container[$getParent](); + container = parent; + if (!container) { + return null; + } + i = -1; + root = [container]; + continue; + } + + if (isFinite(index)) { + root = nodes.filter(node => index < node.length).map(node => node[index]); + } else { + root = nodes.reduce((acc, node) => acc.concat(node), []); + } + } + + if (root.length === 0) { + return null; + } + + if (root.length === 1) { + return root[0]; + } + + return root; +} + +export { searchNode }; diff --git a/src/core/xfa/xfa_object.js b/src/core/xfa/xfa_object.js index 918b7d9f4..e081b71b8 100644 --- a/src/core/xfa/xfa_object.js +++ b/src/core/xfa/xfa_object.js @@ -24,7 +24,13 @@ const $cleanup = Symbol(); const $content = Symbol("content"); const $dump = Symbol(); const $finalize = Symbol(); +const $isDataValue = Symbol(); +const $getAttributeIt = Symbol(); +const $getChildrenByClass = Symbol(); +const $getChildrenByName = Symbol(); +const $getChildrenByNameIt = Symbol(); const $getChildren = Symbol(); +const $getParent = Symbol(); const $isTransparent = Symbol(); const $lastAttribute = Symbol(); const $namespaceId = Symbol("namespaceId"); @@ -139,12 +145,16 @@ class XFAObject { return shadow(this, _attributeNames, proto._attributes); } + [$getParent]() { + return this[_parent]; + } + [$getChildren](name = null) { if (!name) { return this[_children]; } - return this[_children].filter(c => c[$nodeName] === name); + return this[name]; } [$dump]() { @@ -363,6 +373,47 @@ class XFAObject { return clone; } + + [$getChildren](name = null) { + if (!name) { + return this[_children]; + } + + return this[_children].filter(c => c[$nodeName] === name); + } + + [$getChildrenByClass](name) { + return this[name]; + } + + [$getChildrenByName](name, allTransparent, first = true) { + return Array.from(this[$getChildrenByNameIt](name, allTransparent, first)); + } + + *[$getChildrenByNameIt](name, allTransparent, first = true) { + if (name === "parent") { + yield this[_parent]; + return; + } + + for (const child of this[_children]) { + if (child[$nodeName] === name) { + yield child; + } + + if (child.name === name) { + yield child; + } + + if (allTransparent || child[$isTransparent]()) { + yield* child[$getChildrenByNameIt](name, allTransparent, false); + } + } + + if (first && this[_attributeNames].has(name)) { + yield new XFAAttribute(this, name, this[name]); + } + } } class XFAObjectArray { @@ -398,10 +449,34 @@ class XFAObjectArray { clone[_children] = this[_children].map(c => c[_clone]()); return clone; } + + get children() { + return this[_children]; + } +} + +class XFAAttribute { + constructor(node, name, value) { + this[_parent] = node; + this[$nodeName] = name; + this[$content] = value; + } + + [$getParent]() { + return this[_parent]; + } + + [$isDataValue]() { + return true; + } + + [$text]() { + return this[$content]; + } } class XmlObject extends XFAObject { - constructor(nsId, name, attributes = Object.create(null)) { + constructor(nsId, name, attributes = null) { super(nsId, name); this[$content] = ""; if (name !== "#text") { @@ -412,6 +487,7 @@ class XmlObject extends XFAObject { [$onChild](child) { if (this[$content]) { const node = new XmlObject(this[$namespaceId], "#text"); + node[_parent] = this; node[$content] = this[$content]; this[$content] = ""; this[_children].push(node); @@ -428,6 +504,7 @@ class XmlObject extends XFAObject { [$finalize]() { if (this[$content] && this[_children].length > 0) { const node = new XmlObject(this[$namespaceId], "#text"); + node[_parent] = this; node[$content] = this[$content]; this[_children].push(node); delete this[$content]; @@ -440,6 +517,54 @@ class XmlObject extends XFAObject { } return this[_children].map(c => c[$text]()).join(""); } + + [$getChildren](name = null) { + if (!name) { + return this[_children]; + } + + return this[_children].filter(c => c[$nodeName] === name); + } + + [$getChildrenByClass](name) { + const value = this[_attributes][name]; + if (value !== undefined) { + return value; + } + return this[$getChildren](name); + } + + *[$getChildrenByNameIt](name, allTransparent) { + const value = this[_attributes][name]; + if (value !== undefined) { + yield new XFAAttribute(this, name, value); + } + + for (const child of this[_children]) { + if (child[$nodeName] === name) { + yield child; + } + + if (allTransparent) { + yield* child[$getChildrenByNameIt](name, allTransparent); + } + } + } + + *[$getAttributeIt](name) { + const value = this[_attributes][name]; + if (value !== undefined) { + yield new XFAAttribute(this, name, value); + } + + for (const child of this[_children]) { + yield* child[$getAttributeIt](name); + } + } + + [$isDataValue]() { + return this[_children].length === 0; + } } class ContentObject extends XFAObject { @@ -521,7 +646,13 @@ export { $content, $dump, $finalize, + $getAttributeIt, $getChildren, + $getChildrenByClass, + $getChildrenByName, + $getChildrenByNameIt, + $getParent, + $isDataValue, $isTransparent, $namespaceId, $nodeName, @@ -538,6 +669,7 @@ export { Option10, OptionObject, StringObject, + XFAAttribute, XFAObject, XFAObjectArray, XmlObject, diff --git a/test/unit/xfa_parser_spec.js b/test/unit/xfa_parser_spec.js index c5f5f9416..a8eba122a 100644 --- a/test/unit/xfa_parser_spec.js +++ b/test/unit/xfa_parser_spec.js @@ -13,7 +13,14 @@ * limitations under the License. */ -import { $dump, $getChildren, $text } from "../../src/core/xfa/xfa_object.js"; +import { + $dump, + $getChildren, + $getChildrenByClass, + $getChildrenByName, + $text, +} from "../../src/core/xfa/xfa_object.js"; +import { searchNode } from "../../src/core/xfa/som.js"; import { XFAParser } from "../../src/core/xfa/parser.js"; describe("XFAParser", function () { @@ -416,4 +423,240 @@ describe("XFAParser", function () { expect(field.value.text.$content).toEqual("Overriding text"); }); }); + + describe("Search in XFA", function () { + it("should search some nodes in a template object", function () { + const xml = ` + + + + + `; + const root = new XFAParser().parse(xml); + + let found = root[$getChildrenByName]("subform", true); + expect(found.map(x => x.id)).toEqual(["l", "m", "n", "o"]); + + found = root[$getChildrenByName]("Total_Price", true); + expect(found.map(x => x.id)).toEqual(["d", "h", "s", "k"]); + + found = root.template[$getChildrenByName]("Receipt", false); + const receipt = found[0]; + + found = receipt[$getChildrenByName]("Total_Price", false); + expect(found.map(x => x.id)).toEqual(["d", "h", "k"]); + + expect(receipt[$getChildrenByClass]("name")).toEqual("Receipt"); + const subforms = receipt[$getChildrenByClass]("subform"); + expect(subforms.children.map(x => x.id)).toEqual(["m", "n", "o"]); + }); + + it("should search some nodes in a template object using SOM", function () { + const xml = ` + + + + + `; + const root = new XFAParser().parse(xml); + expect(searchNode(root, null, "$template..Description.id")[$text]()).toBe( + "a" + ); + expect(searchNode(root, null, "$template..Description.id")[$text]()).toBe( + "a" + ); + expect( + searchNode(root, null, "$template..Description[0].id")[$text]() + ).toBe("a"); + expect( + searchNode(root, null, "$template..Description[1].id")[$text]() + ).toBe("e"); + expect( + searchNode(root, null, "$template..Description[2].id")[$text]() + ).toBe("p"); + expect(searchNode(root, null, "$template.Receipt.id")[$text]()).toBe("l"); + expect( + searchNode(root, null, "$template.Receipt.Description[1].id")[$text]() + ).toBe("e"); + expect(searchNode(root, null, "$template.Receipt.Description[2]")).toBe( + null + ); + expect( + searchNode(root, null, "$template.Receipt.foo.Description.id")[$text]() + ).toBe("p"); + expect( + searchNode(root, null, "$template.#subform.Sub_Total.id")[$text]() + ).toBe("i"); + expect( + searchNode(root, null, "$template.#subform.Units.id")[$text]() + ).toBe("b"); + expect( + searchNode(root, null, "$template.#subform.Units.parent.id")[$text]() + ).toBe("m"); + }); + + it("should search some nodes in a datasets object", function () { + const xml = ` + + + + + + 1 + + Giant Slingshot + 1 + 250.00 + 250.00 + + 2 + + Road Runner Bait, large bag + 5 + 12.00 + 60.00 + + 310.00 + 24.80 + 334.80 + + + + + `; + const root = new XFAParser().parse(xml); + const data = root.datasets.data; + + let found = data[$getChildrenByName]("Description", true); + expect(found.map(x => x[$text]())).toEqual([ + "Giant Slingshot", + "Road Runner Bait, large bag", + ]); + + found = data[$getChildrenByName]("Total_Price", true); + expect(found.map(x => x[$text]())).toEqual(["250.00", "60.00", "334.80"]); + }); + + it("should search some nodes using SOM from a non-root node", function () { + const xml = ` + + + + + + 1 + + Giant Slingshot + 1 + 250.00 + 250.00 + + 2 + + Road Runner Bait, large bag + 5 + 12.00 + 60.00 + + 310.00 + 24.80 + 334.80 + + + + + `; + const root = new XFAParser().parse(xml); + const [receipt] = root.datasets.data[$getChildren]("Receipt"); + expect( + searchNode(root, receipt, "Detail[*].Total_Price").map(x => x[$text]()) + ).toEqual(["250.00", "60.00"]); + + const units = searchNode(root, receipt, "Detail[1].Units"); + expect(units[$text]()).toBe("5"); + + let found = searchNode(root, units, "Total_Price"); + expect(found[$text]()).toBe("60.00"); + + found = searchNode(root, units, "Total_Pric"); + expect(found).toEqual(null); + }); + + it("should search some nodes in a datasets object using SOM", function () { + const xml = ` + + + + + + foo + bar + + + + + `; + const root = new XFAParser().parse(xml); + expect(searchNode(root, null, "$data.Receipt.Detail")[$text]()).toBe( + "Acme" + ); + expect(searchNode(root, null, "$data.Receipt.Detail[0]")[$text]()).toBe( + "Acme" + ); + expect(searchNode(root, null, "$data.Receipt.Detail[1]")[$text]()).toBe( + "foo" + ); + expect(searchNode(root, null, "$data.Receipt.Detail[2]")[$text]()).toBe( + "bar" + ); + }); + }); });