[Annotations] Some annotations can have their values stored in the xfa:datasets

- it aims to fix #14685;
- add a basic object to get values from the parsed datasets;
- these annotations don't have an appearance so we must create one when printing or saving.
This commit is contained in:
Calixte Denizet 2022-03-31 19:18:30 +02:00
parent d6592b5e37
commit 0b597304c1
6 changed files with 178 additions and 13 deletions

View File

@ -72,14 +72,16 @@ class AnnotationFactory {
static create(xref, ref, pdfManager, idFactory, collectFields) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
]).then(([acroForm, pageIndex]) =>
]).then(([acroForm, xfaDatasets, pageIndex]) =>
pdfManager.ensure(this, "_create", [
xref,
ref,
pdfManager,
idFactory,
acroForm,
xfaDatasets,
collectFields,
pageIndex,
])
@ -95,6 +97,7 @@ class AnnotationFactory {
pdfManager,
idFactory,
acroForm,
xfaDatasets,
collectFields,
pageIndex = -1
) {
@ -119,6 +122,7 @@ class AnnotationFactory {
id,
pdfManager,
acroForm: acroForm instanceof Dict ? acroForm : Dict.empty,
xfaDatasets,
collectFields,
pageIndex,
};
@ -1237,7 +1241,7 @@ class WidgetAnnotation extends Annotation {
);
}
const fieldValue = getInheritableProperty({
let fieldValue = getInheritableProperty({
dict,
key: "V",
getArray: true,
@ -1251,6 +1255,15 @@ class WidgetAnnotation extends Annotation {
});
data.defaultFieldValue = this._decodeFormValue(defaultFieldValue);
if (fieldValue === undefined && params.xfaDatasets) {
// Try to figure out if we have something in the xfa dataset.
const path = this._title.str;
if (path) {
this._hasValueFromXFA = true;
data.fieldValue = fieldValue = params.xfaDatasets.getValue(path);
}
}
// When no "V" entry exists, let the fieldValue fallback to the "DV" entry
// (fixes issue13823.pdf).
if (fieldValue === undefined && data.defaultFieldValue !== null) {
@ -1401,17 +1414,20 @@ class WidgetAnnotation extends Annotation {
}
async save(evaluator, task, annotationStorage) {
if (!annotationStorage) {
const storageEntry = annotationStorage
? annotationStorage.get(this.data.id)
: undefined;
let value = storageEntry && storageEntry.value;
if (value === this.data.fieldValue || value === undefined) {
if (!this._hasValueFromXFA) {
return null;
}
const storageEntry = annotationStorage.get(this.data.id);
const value = storageEntry && storageEntry.value;
if (value === this.data.fieldValue || value === undefined) {
return null;
value = value || this.data.fieldValue;
}
// Value can be an array (with choice list and multiple selections)
if (
!this._hasValueFromXFA &&
Array.isArray(value) &&
Array.isArray(this.data.fieldValue) &&
value.length === this.data.fieldValue.length &&
@ -1493,15 +1509,24 @@ class WidgetAnnotation extends Annotation {
async _getAppearance(evaluator, task, annotationStorage) {
const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD);
if (!annotationStorage || isPassword) {
if (isPassword) {
return null;
}
const storageEntry = annotationStorage.get(this.data.id);
const storageEntry = annotationStorage
? annotationStorage.get(this.data.id)
: undefined;
let value = storageEntry && storageEntry.value;
if (value === undefined) {
// The annotation hasn't been rendered so use the appearance
if (!this._hasValueFromXFA || this.appearance) {
// The annotation hasn't been rendered so use the appearance.
return null;
}
// The annotation has its value in XFA datasets but not in the V field.
value = this.data.fieldValue;
if (!value) {
return "";
}
}
value = value.trim();

View File

@ -0,0 +1,70 @@
/* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { parseXFAPath } from "./core_utils.js";
import { SimpleXMLParser } from "./xml_parser.js";
class DatasetXMLParser extends SimpleXMLParser {
constructor(options) {
super(options);
this.node = null;
}
onEndElement(name) {
const node = super.onEndElement(name);
if (node && name === "xfa:datasets") {
this.node = node;
// We don't need anything else, so just kill the parser.
throw new Error("Aborting DatasetXMLParser.");
}
}
}
class DatasetReader {
constructor(data) {
if (data.datasets) {
this.node = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
data.datasets
).documentElement;
} else {
const parser = new DatasetXMLParser({ hasAttributes: true });
try {
parser.parseFromString(data.xdp);
} catch (_) {}
this.node = parser.node;
}
}
getValue(path) {
if (!this.node || !path) {
return "";
}
const node = this.node.searchNode(parseXFAPath(path), 0);
if (!node) {
return "";
}
const first = node.firstChild;
if (first && first.nodeName === "value") {
return node.children.map(child => child.textContent);
}
return node.textContent;
}
}
export { DatasetReader };

View File

@ -47,6 +47,7 @@ import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js";
@ -820,6 +821,47 @@ class PDFDocument {
});
}
get xfaDatasets() {
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "xfaDatasets", null);
}
const xfa = acroForm.get("XFA");
if (xfa instanceof BaseStream && !xfa.isEmpty) {
try {
const xdp = stringToUTF8String(xfa.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ xdp }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}
if (!Array.isArray(xfa) || xfa.length === 0) {
return null;
}
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] !== "datasets") {
continue;
}
const data = this.xref.fetchIfRef(xfa[i + 1]);
if (!(data instanceof BaseStream) || data.isEmpty) {
continue;
}
try {
const datasets = stringToUTF8String(data.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ datasets }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}
return shadow(this, "xfaDatasets", null);
}
get xfaData() {
const acroForm = this.catalog.acroForm;
if (!acroForm) {

View File

@ -328,6 +328,10 @@ class SimpleDOMNode {
.join("");
}
get children() {
return this.childNodes || [];
}
hasChildNodes() {
return this.childNodes && this.childNodes.length > 0;
}
@ -492,11 +496,12 @@ class SimpleXMLParser extends XMLParserBase {
this._currentFragment = this._stack.pop() || [];
const lastElement = this._currentFragment[this._currentFragment.length - 1];
if (!lastElement) {
return;
return null;
}
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
lastElement.childNodes[i].parentNode = lastElement;
}
return lastElement;
}
onError(code) {

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/8283456/1647183160545.pdf

View File

@ -6332,5 +6332,27 @@
"md5": "5d1bfcc3b3130bfa7e33e43990e2213a",
"rounds": 1,
"type": "text"
},
{ "id": "issue14685",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue14685-print",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq",
"print": true,
"annotationStorage": {
"150R": {
"value": "Hello PDF.js World"
}
}
}
]