[Annotations] Some annotations can have their values stored in the xfa:datasets

- it aims to fix #14685;
- add a basic object to get values from the parsed datasets;
- these annotations don't have an appearance so we must create one when printing or saving.
This commit is contained in:
Calixte Denizet 2022-03-31 19:18:30 +02:00
parent d6592b5e37
commit 0b597304c1
6 changed files with 178 additions and 13 deletions

View File

@ -72,14 +72,16 @@ class AnnotationFactory {
static create(xref, ref, pdfManager, idFactory, collectFields) { static create(xref, ref, pdfManager, idFactory, collectFields) {
return Promise.all([ return Promise.all([
pdfManager.ensureCatalog("acroForm"), pdfManager.ensureCatalog("acroForm"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1, collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
]).then(([acroForm, pageIndex]) => ]).then(([acroForm, xfaDatasets, pageIndex]) =>
pdfManager.ensure(this, "_create", [ pdfManager.ensure(this, "_create", [
xref, xref,
ref, ref,
pdfManager, pdfManager,
idFactory, idFactory,
acroForm, acroForm,
xfaDatasets,
collectFields, collectFields,
pageIndex, pageIndex,
]) ])
@ -95,6 +97,7 @@ class AnnotationFactory {
pdfManager, pdfManager,
idFactory, idFactory,
acroForm, acroForm,
xfaDatasets,
collectFields, collectFields,
pageIndex = -1 pageIndex = -1
) { ) {
@ -119,6 +122,7 @@ class AnnotationFactory {
id, id,
pdfManager, pdfManager,
acroForm: acroForm instanceof Dict ? acroForm : Dict.empty, acroForm: acroForm instanceof Dict ? acroForm : Dict.empty,
xfaDatasets,
collectFields, collectFields,
pageIndex, pageIndex,
}; };
@ -1237,7 +1241,7 @@ class WidgetAnnotation extends Annotation {
); );
} }
const fieldValue = getInheritableProperty({ let fieldValue = getInheritableProperty({
dict, dict,
key: "V", key: "V",
getArray: true, getArray: true,
@ -1251,6 +1255,15 @@ class WidgetAnnotation extends Annotation {
}); });
data.defaultFieldValue = this._decodeFormValue(defaultFieldValue); data.defaultFieldValue = this._decodeFormValue(defaultFieldValue);
if (fieldValue === undefined && params.xfaDatasets) {
// Try to figure out if we have something in the xfa dataset.
const path = this._title.str;
if (path) {
this._hasValueFromXFA = true;
data.fieldValue = fieldValue = params.xfaDatasets.getValue(path);
}
}
// When no "V" entry exists, let the fieldValue fallback to the "DV" entry // When no "V" entry exists, let the fieldValue fallback to the "DV" entry
// (fixes issue13823.pdf). // (fixes issue13823.pdf).
if (fieldValue === undefined && data.defaultFieldValue !== null) { if (fieldValue === undefined && data.defaultFieldValue !== null) {
@ -1401,17 +1414,20 @@ class WidgetAnnotation extends Annotation {
} }
async save(evaluator, task, annotationStorage) { async save(evaluator, task, annotationStorage) {
if (!annotationStorage) { const storageEntry = annotationStorage
return null; ? annotationStorage.get(this.data.id)
} : undefined;
const storageEntry = annotationStorage.get(this.data.id); let value = storageEntry && storageEntry.value;
const value = storageEntry && storageEntry.value;
if (value === this.data.fieldValue || value === undefined) { if (value === this.data.fieldValue || value === undefined) {
return null; if (!this._hasValueFromXFA) {
return null;
}
value = value || this.data.fieldValue;
} }
// Value can be an array (with choice list and multiple selections) // Value can be an array (with choice list and multiple selections)
if ( if (
!this._hasValueFromXFA &&
Array.isArray(value) && Array.isArray(value) &&
Array.isArray(this.data.fieldValue) && Array.isArray(this.data.fieldValue) &&
value.length === this.data.fieldValue.length && value.length === this.data.fieldValue.length &&
@ -1493,14 +1509,23 @@ class WidgetAnnotation extends Annotation {
async _getAppearance(evaluator, task, annotationStorage) { async _getAppearance(evaluator, task, annotationStorage) {
const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD); const isPassword = this.hasFieldFlag(AnnotationFieldFlag.PASSWORD);
if (!annotationStorage || isPassword) { if (isPassword) {
return null; return null;
} }
const storageEntry = annotationStorage.get(this.data.id); const storageEntry = annotationStorage
? annotationStorage.get(this.data.id)
: undefined;
let value = storageEntry && storageEntry.value; let value = storageEntry && storageEntry.value;
if (value === undefined) { if (value === undefined) {
// The annotation hasn't been rendered so use the appearance if (!this._hasValueFromXFA || this.appearance) {
return null; // The annotation hasn't been rendered so use the appearance.
return null;
}
// The annotation has its value in XFA datasets but not in the V field.
value = this.data.fieldValue;
if (!value) {
return "";
}
} }
value = value.trim(); value = value.trim();

View File

@ -0,0 +1,70 @@
/* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { parseXFAPath } from "./core_utils.js";
import { SimpleXMLParser } from "./xml_parser.js";
class DatasetXMLParser extends SimpleXMLParser {
constructor(options) {
super(options);
this.node = null;
}
onEndElement(name) {
const node = super.onEndElement(name);
if (node && name === "xfa:datasets") {
this.node = node;
// We don't need anything else, so just kill the parser.
throw new Error("Aborting DatasetXMLParser.");
}
}
}
class DatasetReader {
constructor(data) {
if (data.datasets) {
this.node = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
data.datasets
).documentElement;
} else {
const parser = new DatasetXMLParser({ hasAttributes: true });
try {
parser.parseFromString(data.xdp);
} catch (_) {}
this.node = parser.node;
}
}
getValue(path) {
if (!this.node || !path) {
return "";
}
const node = this.node.searchNode(parseXFAPath(path), 0);
if (!node) {
return "";
}
const first = node.firstChild;
if (first && first.nodeName === "value") {
return node.children.map(child => child.textContent);
}
return node.textContent;
}
}
export { DatasetReader };

View File

@ -47,6 +47,7 @@ import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js"; import { calculateMD5 } from "./crypto.js";
import { Catalog } from "./catalog.js"; import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js"; import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Linearization } from "./parser.js"; import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js"; import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js"; import { ObjectLoader } from "./object_loader.js";
@ -820,6 +821,47 @@ class PDFDocument {
}); });
} }
get xfaDatasets() {
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "xfaDatasets", null);
}
const xfa = acroForm.get("XFA");
if (xfa instanceof BaseStream && !xfa.isEmpty) {
try {
const xdp = stringToUTF8String(xfa.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ xdp }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}
if (!Array.isArray(xfa) || xfa.length === 0) {
return null;
}
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] !== "datasets") {
continue;
}
const data = this.xref.fetchIfRef(xfa[i + 1]);
if (!(data instanceof BaseStream) || data.isEmpty) {
continue;
}
try {
const datasets = stringToUTF8String(data.getString());
return shadow(this, "xfaDatasets", new DatasetReader({ datasets }));
} catch (_) {
warn("XFA - Invalid utf-8 string.");
return shadow(this, "xfaDatasets", null);
}
}
return shadow(this, "xfaDatasets", null);
}
get xfaData() { get xfaData() {
const acroForm = this.catalog.acroForm; const acroForm = this.catalog.acroForm;
if (!acroForm) { if (!acroForm) {

View File

@ -328,6 +328,10 @@ class SimpleDOMNode {
.join(""); .join("");
} }
get children() {
return this.childNodes || [];
}
hasChildNodes() { hasChildNodes() {
return this.childNodes && this.childNodes.length > 0; return this.childNodes && this.childNodes.length > 0;
} }
@ -492,11 +496,12 @@ class SimpleXMLParser extends XMLParserBase {
this._currentFragment = this._stack.pop() || []; this._currentFragment = this._stack.pop() || [];
const lastElement = this._currentFragment[this._currentFragment.length - 1]; const lastElement = this._currentFragment[this._currentFragment.length - 1];
if (!lastElement) { if (!lastElement) {
return; return null;
} }
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) { for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
lastElement.childNodes[i].parentNode = lastElement; lastElement.childNodes[i].parentNode = lastElement;
} }
return lastElement;
} }
onError(code) { onError(code) {

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/8283456/1647183160545.pdf

View File

@ -6332,5 +6332,27 @@
"md5": "5d1bfcc3b3130bfa7e33e43990e2213a", "md5": "5d1bfcc3b3130bfa7e33e43990e2213a",
"rounds": 1, "rounds": 1,
"type": "text" "type": "text"
},
{ "id": "issue14685",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue14685-print",
"file": "pdfs/issue14685.pdf",
"md5": "2c608203b9b1d13455f0b1d9cebc9515",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "eq",
"print": true,
"annotationStorage": {
"150R": {
"value": "Hello PDF.js World"
}
}
} }
] ]