[api-minor] Change the "dc:creator" Metadata field to an Array

- add scripting support for doc.info.authors
 - doc.info.metadata is the raw string with xml code
This commit is contained in:
Calixte Denizet 2021-01-08 18:40:09 +01:00
parent 35845d1bbb
commit 43d5512f5c
10 changed files with 97 additions and 33 deletions

View File

@ -130,9 +130,7 @@ function updateXFA(datasetsRef, newRefs, xref) {
} }
const datasets = xref.fetchIfRef(datasetsRef); const datasets = xref.fetchIfRef(datasetsRef);
const str = bytesToString(datasets.getBytes()); const str = bytesToString(datasets.getBytes());
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString( const xml = new SimpleXMLParser({ hasAttributes: true }).parseFromString(str);
str
);
for (const { xfa } of newRefs) { for (const { xfa } of newRefs) {
if (!xfa) { if (!xfa) {

View File

@ -24,7 +24,7 @@ class Metadata {
data = this._repair(data); data = this._repair(data);
// Convert the string to an XML document. // Convert the string to an XML document.
const parser = new SimpleXMLParser(); const parser = new SimpleXMLParser({ lowerCaseName: true });
const xmlDocument = parser.parseFromString(data); const xmlDocument = parser.parseFromString(data);
this._metadataMap = new Map(); this._metadataMap = new Map();
@ -32,6 +32,7 @@ class Metadata {
if (xmlDocument) { if (xmlDocument) {
this._parse(xmlDocument); this._parse(xmlDocument);
} }
this._data = data;
} }
_repair(data) { _repair(data) {
@ -79,38 +80,69 @@ class Metadata {
}); });
} }
_getSequence(entry) {
const name = entry.nodeName;
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
return null;
}
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
}
_getCreators(entry) {
if (entry.nodeName !== "dc:creator") {
return false;
}
if (!entry.hasChildNodes()) {
return true;
}
// Child must be a Bag (unordered array) or a Seq.
const seqNode = entry.childNodes[0];
const authors = this._getSequence(seqNode) || [];
this._metadataMap.set(
entry.nodeName,
authors.map(node => node.textContent.trim())
);
return true;
}
_parse(xmlDocument) { _parse(xmlDocument) {
let rdf = xmlDocument.documentElement; let rdf = xmlDocument.documentElement;
if (rdf.nodeName.toLowerCase() !== "rdf:rdf") { if (rdf.nodeName !== "rdf:rdf") {
// Wrapped in <xmpmeta> // Wrapped in <xmpmeta>
rdf = rdf.firstChild; rdf = rdf.firstChild;
while (rdf && rdf.nodeName.toLowerCase() !== "rdf:rdf") { while (rdf && rdf.nodeName !== "rdf:rdf") {
rdf = rdf.nextSibling; rdf = rdf.nextSibling;
} }
} }
const nodeName = rdf ? rdf.nodeName.toLowerCase() : null; if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
if (!rdf || nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
return; return;
} }
const children = rdf.childNodes; for (const desc of rdf.childNodes) {
for (let i = 0, ii = children.length; i < ii; i++) { if (desc.nodeName !== "rdf:description") {
const desc = children[i];
if (desc.nodeName.toLowerCase() !== "rdf:description") {
continue; continue;
} }
for (let j = 0, jj = desc.childNodes.length; j < jj; j++) { for (const entry of desc.childNodes) {
if (desc.childNodes[j].nodeName.toLowerCase() !== "#text") { const name = entry.nodeName;
const entry = desc.childNodes[j]; if (name === "#text") {
const name = entry.nodeName.toLowerCase(); continue;
}
if (this._getCreators(entry)) {
continue;
}
this._metadataMap.set(name, entry.textContent.trim()); this._metadataMap.set(name, entry.textContent.trim());
} }
} }
} }
getRaw() {
return this._data;
} }
get(name) { get(name) {

View File

@ -42,7 +42,7 @@ class Doc extends PDFObject {
this._dirty = false; this._dirty = false;
this._disclosed = false; this._disclosed = false;
this._media = undefined; this._media = undefined;
this._metadata = data.metadata; this._metadata = data.metadata || "";
this._noautocomplete = undefined; this._noautocomplete = undefined;
this._nocache = undefined; this._nocache = undefined;
this._spellDictionaryOrder = []; this._spellDictionaryOrder = [];
@ -74,12 +74,13 @@ class Doc extends PDFObject {
// and they're are read-only. // and they're are read-only.
this._info = new Proxy( this._info = new Proxy(
{ {
title: this.title, title: this._title,
author: this.author, author: this._author,
subject: this.subject, authors: data.authors || [this._author],
keywords: this.keywords, subject: this._subject,
creator: this.creator, keywords: this._keywords,
producer: this.producer, creator: this._creator,
producer: this._producer,
creationdate: this._creationDate, creationdate: this._creationDate,
moddate: this._modDate, moddate: this._modDate,
trapped: data.Trapped || "Unknown", trapped: data.Trapped || "Unknown",

View File

@ -427,12 +427,13 @@ class SimpleDOMNode {
} }
class SimpleXMLParser extends XMLParserBase { class SimpleXMLParser extends XMLParserBase {
constructor(hasAttributes = false) { constructor({ hasAttributes = false, lowerCaseName = false }) {
super(); super();
this._currentFragment = null; this._currentFragment = null;
this._stack = null; this._stack = null;
this._errorCode = XMLParserErrorCode.NoError; this._errorCode = XMLParserErrorCode.NoError;
this._hasAttributes = hasAttributes; this._hasAttributes = hasAttributes;
this._lowerCaseName = lowerCaseName;
} }
parseFromString(data) { parseFromString(data) {
@ -476,6 +477,9 @@ class SimpleXMLParser extends XMLParserBase {
} }
onBeginElement(name, attributes, isEmpty) { onBeginElement(name, attributes, isEmpty) {
if (this._lowerCaseName) {
name = name.toLowerCase();
}
const node = new SimpleDOMNode(name); const node = new SimpleDOMNode(name);
node.childNodes = []; node.childNodes = [];
if (this._hasAttributes) { if (this._hasAttributes) {

View File

@ -451,4 +451,29 @@ describe("Interaction", () => {
); );
}); });
}); });
describe("in js-authors.pdf", () => {
let pages;
beforeAll(async () => {
pages = await loadAndWait("js-authors.pdf", "#\\32 5R");
});
afterAll(async () => {
await closePages(pages);
});
it("must print authors in a text field", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const text = await actAndWaitForInput(page, "#\\32 5R", async () => {
await page.click("[data-annotation-id='26R']");
});
expect(text)
.withContext(`In ${browserName}`)
.toEqual("author1::author2::author3::author4::author5");
})
);
});
});
}); });

View File

@ -324,6 +324,7 @@
!tensor-allflags-withfunction.pdf !tensor-allflags-withfunction.pdf
!issue10084_reduced.pdf !issue10084_reduced.pdf
!issue4246.pdf !issue4246.pdf
!js-authors.pdf
!issue4461.pdf !issue4461.pdf
!issue4573.pdf !issue4573.pdf
!issue4722.pdf !issue4722.pdf

BIN
test/pdfs/js-authors.pdf Normal file

Binary file not shown.

View File

@ -96,7 +96,7 @@ describe("metadata", function () {
expect(metadata.get("dc:qux")).toEqual(null); expect(metadata.get("dc:qux")).toEqual(null);
expect(metadata.getAll()).toEqual({ expect(metadata.getAll()).toEqual({
"dc:creator": "ODIS", "dc:creator": ["ODIS"],
"dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub", "dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub",
"xap:creatortool": "PDFCreator Version 0.9.6", "xap:creatortool": "PDFCreator Version 0.9.6",
}); });
@ -168,7 +168,7 @@ describe("metadata", function () {
expect(metadata.get("dc:qux")).toEqual(null); expect(metadata.get("dc:qux")).toEqual(null);
expect(metadata.getAll()).toEqual({ expect(metadata.getAll()).toEqual({
"dc:creator": "", "dc:creator": [""],
"dc:description": "", "dc:description": "",
"dc:format": "application/pdf", "dc:format": "application/pdf",
"dc:subject": "", "dc:subject": "",

View File

@ -47,8 +47,9 @@ describe("XML", function () {
<g a="121110"/> <g a="121110"/>
</b> </b>
</a>`; </a>`;
const root = new SimpleXMLParser(true).parseFromString(xml) const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
.documentElement; xml
).documentElement;
function getAttr(path) { function getAttr(path) {
return root.searchNode(parseXFAPath(path), 0).attributes[0].value; return root.searchNode(parseXFAPath(path), 0).attributes[0].value;
} }
@ -96,8 +97,9 @@ describe("XML", function () {
<g a="121110"/> <g a="121110"/>
</b> </b>
</a>`; </a>`;
const root = new SimpleXMLParser(true).parseFromString(xml) const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
.documentElement; xml
).documentElement;
const buffer = []; const buffer = [];
root.dump(buffer); root.dump(buffer);

View File

@ -1655,7 +1655,8 @@ const PDFViewerApplication = {
baseURL: this.baseUrl, baseURL: this.baseUrl,
filesize: this._contentLength, filesize: this._contentLength,
filename: this._docFilename, filename: this._docFilename,
metadata: this.metadata, metadata: this.metadata?.getRaw(),
authors: this.metadata?.get("dc:creator"),
numPages: pdfDocument.numPages, numPages: pdfDocument.numPages,
URL: this.url, URL: this.url,
actions: docActions, actions: docActions,