From 0b597304c102bfc6422ced1d30345acd37eb40ec Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 31 Mar 2022 19:18:30 +0200 Subject: [PATCH] [Annotations] Some annotations can have their values stored in the xfa:datasets - it aims to fix #14685; - add a basic object to get values from the parsed datasets; - these annotations don't have an appearance so we must create one when printing or saving. --- src/core/annotation.js | 49 ++++++++++++++++++------ src/core/dataset_reader.js | 70 +++++++++++++++++++++++++++++++++++ src/core/document.js | 42 +++++++++++++++++++++ src/core/xml_parser.js | 7 +++- test/pdfs/issue14685.pdf.link | 1 + test/test_manifest.json | 22 +++++++++++ 6 files changed, 178 insertions(+), 13 deletions(-) create mode 100644 src/core/dataset_reader.js create mode 100644 test/pdfs/issue14685.pdf.link diff --git a/src/core/annotation.js b/src/core/annotation.js index 6d902be97..36f1c37fb 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -72,14 +72,16 @@ class AnnotationFactory { static create(xref, ref, pdfManager, idFactory, collectFields) { return Promise.all([ pdfManager.ensureCatalog("acroForm"), + pdfManager.ensureDoc("xfaDatasets"), collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1, - ]).then(([acroForm, pageIndex]) => + ]).then(([acroForm, xfaDatasets, pageIndex]) => pdfManager.ensure(this, "_create", [ xref, ref, pdfManager, idFactory, acroForm, + xfaDatasets, collectFields, pageIndex, ]) @@ -95,6 +97,7 @@ class AnnotationFactory { pdfManager, idFactory, acroForm, + xfaDatasets, collectFields, pageIndex = -1 ) { @@ -119,6 +122,7 @@ class AnnotationFactory { id, pdfManager, acroForm: acroForm instanceof Dict ? acroForm : Dict.empty, + xfaDatasets, collectFields, pageIndex, }; @@ -1237,7 +1241,7 @@ class WidgetAnnotation extends Annotation { ); } - const fieldValue = getInheritableProperty({ + let fieldValue = getInheritableProperty({ dict, key: "V", getArray: true, @@ -1251,6 +1255,15 @@ class WidgetAnnotation extends Annotation { }); data.defaultFieldValue = this._decodeFormValue(defaultFieldValue); + if (fieldValue === undefined && params.xfaDatasets) { + // Try to figure out if we have something in the xfa dataset. + const path = this._title.str; + if (path) { + this._hasValueFromXFA = true; + data.fieldValue = fieldValue = params.xfaDatasets.getValue(path); + } + } + // When no "V" entry exists, let the fieldValue fallback to the "DV" entry // (fixes issue13823.pdf). if (fieldValue === undefined && data.defaultFieldValue !== null) { @@ -1401,17 +1414,20 @@ class WidgetAnnotation extends Annotation { } async save(evaluator, task, annotationStorage) { - if (!annotationStorage) { - return null; - } - const storageEntry = annotationStorage.get(this.data.id); - const value = storageEntry && storageEntry.value; + const storageEntry = annotationStorage + ? annotationStorage.get(this.data.id) + : undefined; + let value = storageEntry && storageEntry.value; if (value === this.data.fieldValue || value === undefined) { - return null; + if (!this._hasValueFromXFA) { + return null; + } + value = value || this.data.fieldValue; } // Value can be an array (with choice list and multiple selections) if ( + !this._hasValueFromXFA && Array.isArray(value) && Array.isArray(this.data.fieldValue) && value.length === this.data.fieldValue.length && @@ -1493,14 +1509,23 @@ class WidgetAnnotation extends Annotation { async _getAppearance(evaluator, task, annotationStorage) { const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD); - if (!annotationStorage || isPassword) { + if (isPassword) { return null; } - const storageEntry = annotationStorage.get(this.data.id); + const storageEntry = annotationStorage + ? annotationStorage.get(this.data.id) + : undefined; let value = storageEntry && storageEntry.value; if (value === undefined) { - // The annotation hasn't been rendered so use the appearance - return null; + if (!this._hasValueFromXFA || this.appearance) { + // The annotation hasn't been rendered so use the appearance. + return null; + } + // The annotation has its value in XFA datasets but not in the V field. + value = this.data.fieldValue; + if (!value) { + return ""; + } } value = value.trim(); diff --git a/src/core/dataset_reader.js b/src/core/dataset_reader.js new file mode 100644 index 000000000..7c0f6583c --- /dev/null +++ b/src/core/dataset_reader.js @@ -0,0 +1,70 @@ +/* Copyright 2022 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { parseXFAPath } from "./core_utils.js"; +import { SimpleXMLParser } from "./xml_parser.js"; + +class DatasetXMLParser extends SimpleXMLParser { + constructor(options) { + super(options); + this.node = null; + } + + onEndElement(name) { + const node = super.onEndElement(name); + if (node && name === "xfa:datasets") { + this.node = node; + + // We don't need anything else, so just kill the parser. + throw new Error("Aborting DatasetXMLParser."); + } + } +} + +class DatasetReader { + constructor(data) { + if (data.datasets) { + this.node = new SimpleXMLParser({ hasAttributes: true }).parseFromString( + data.datasets + ).documentElement; + } else { + const parser = new DatasetXMLParser({ hasAttributes: true }); + try { + parser.parseFromString(data.xdp); + } catch (_) {} + this.node = parser.node; + } + } + + getValue(path) { + if (!this.node || !path) { + return ""; + } + const node = this.node.searchNode(parseXFAPath(path), 0); + + if (!node) { + return ""; + } + + const first = node.firstChild; + if (first && first.nodeName === "value") { + return node.children.map(child => child.textContent); + } + + return node.textContent; + } +} + +export { DatasetReader }; diff --git a/src/core/document.js b/src/core/document.js index b129409d1..06e7aa509 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -47,6 +47,7 @@ import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./crypto.js"; import { Catalog } from "./catalog.js"; import { clearGlobalCaches } from "./cleanup_helper.js"; +import { DatasetReader } from "./dataset_reader.js"; import { Linearization } from "./parser.js"; import { NullStream } from "./stream.js"; import { ObjectLoader } from "./object_loader.js"; @@ -820,6 +821,47 @@ class PDFDocument { }); } + get xfaDatasets() { + const acroForm = this.catalog.acroForm; + if (!acroForm) { + return shadow(this, "xfaDatasets", null); + } + + const xfa = acroForm.get("XFA"); + if (xfa instanceof BaseStream && !xfa.isEmpty) { + try { + const xdp = stringToUTF8String(xfa.getString()); + return shadow(this, "xfaDatasets", new DatasetReader({ xdp })); + } catch (_) { + warn("XFA - Invalid utf-8 string."); + return shadow(this, "xfaDatasets", null); + } + } + + if (!Array.isArray(xfa) || xfa.length === 0) { + return null; + } + + for (let i = 0, ii = xfa.length; i < ii; i += 2) { + if (xfa[i] !== "datasets") { + continue; + } + const data = this.xref.fetchIfRef(xfa[i + 1]); + if (!(data instanceof BaseStream) || data.isEmpty) { + continue; + } + try { + const datasets = stringToUTF8String(data.getString()); + return shadow(this, "xfaDatasets", new DatasetReader({ datasets })); + } catch (_) { + warn("XFA - Invalid utf-8 string."); + return shadow(this, "xfaDatasets", null); + } + } + + return shadow(this, "xfaDatasets", null); + } + get xfaData() { const acroForm = this.catalog.acroForm; if (!acroForm) { diff --git a/src/core/xml_parser.js b/src/core/xml_parser.js index be0877b47..e322a364c 100644 --- a/src/core/xml_parser.js +++ b/src/core/xml_parser.js @@ -328,6 +328,10 @@ class SimpleDOMNode { .join(""); } + get children() { + return this.childNodes || []; + } + hasChildNodes() { return this.childNodes && this.childNodes.length > 0; } @@ -492,11 +496,12 @@ class SimpleXMLParser extends XMLParserBase { this._currentFragment = this._stack.pop() || []; const lastElement = this._currentFragment[this._currentFragment.length - 1]; if (!lastElement) { - return; + return null; } for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) { lastElement.childNodes[i].parentNode = lastElement; } + return lastElement; } onError(code) { diff --git a/test/pdfs/issue14685.pdf.link b/test/pdfs/issue14685.pdf.link new file mode 100644 index 000000000..c517d617e --- /dev/null +++ b/test/pdfs/issue14685.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/8283456/1647183160545.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index bb891b552..c067b7f4e 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -6332,5 +6332,27 @@ "md5": "5d1bfcc3b3130bfa7e33e43990e2213a", "rounds": 1, "type": "text" + }, + { "id": "issue14685", + "file": "pdfs/issue14685.pdf", + "md5": "2c608203b9b1d13455f0b1d9cebc9515", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, + { "id": "issue14685-print", + "file": "pdfs/issue14685.pdf", + "md5": "2c608203b9b1d13455f0b1d9cebc9515", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq", + "print": true, + "annotationStorage": { + "150R": { + "value": "Hello PDF.js World" + } + } } ]