[api-minor] Change the "dc:creator" Metadata field to an Array
- add scripting support for doc.info.authors - doc.info.metadata is the raw string with xml code
This commit is contained in:
parent
35845d1bbb
commit
43d5512f5c
@ -130,9 +130,7 @@ function updateXFA(datasetsRef, newRefs, xref) {
|
|||||||
}
|
}
|
||||||
const datasets = xref.fetchIfRef(datasetsRef);
|
const datasets = xref.fetchIfRef(datasetsRef);
|
||||||
const str = bytesToString(datasets.getBytes());
|
const str = bytesToString(datasets.getBytes());
|
||||||
const xml = new SimpleXMLParser(/* hasAttributes */ true).parseFromString(
|
const xml = new SimpleXMLParser({ hasAttributes: true }).parseFromString(str);
|
||||||
str
|
|
||||||
);
|
|
||||||
|
|
||||||
for (const { xfa } of newRefs) {
|
for (const { xfa } of newRefs) {
|
||||||
if (!xfa) {
|
if (!xfa) {
|
||||||
|
@ -24,7 +24,7 @@ class Metadata {
|
|||||||
data = this._repair(data);
|
data = this._repair(data);
|
||||||
|
|
||||||
// Convert the string to an XML document.
|
// Convert the string to an XML document.
|
||||||
const parser = new SimpleXMLParser();
|
const parser = new SimpleXMLParser({ lowerCaseName: true });
|
||||||
const xmlDocument = parser.parseFromString(data);
|
const xmlDocument = parser.parseFromString(data);
|
||||||
|
|
||||||
this._metadataMap = new Map();
|
this._metadataMap = new Map();
|
||||||
@ -32,6 +32,7 @@ class Metadata {
|
|||||||
if (xmlDocument) {
|
if (xmlDocument) {
|
||||||
this._parse(xmlDocument);
|
this._parse(xmlDocument);
|
||||||
}
|
}
|
||||||
|
this._data = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
_repair(data) {
|
_repair(data) {
|
||||||
@ -79,38 +80,69 @@ class Metadata {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_getSequence(entry) {
|
||||||
|
const name = entry.nodeName;
|
||||||
|
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
|
||||||
|
}
|
||||||
|
|
||||||
|
_getCreators(entry) {
|
||||||
|
if (entry.nodeName !== "dc:creator") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!entry.hasChildNodes()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Child must be a Bag (unordered array) or a Seq.
|
||||||
|
const seqNode = entry.childNodes[0];
|
||||||
|
const authors = this._getSequence(seqNode) || [];
|
||||||
|
this._metadataMap.set(
|
||||||
|
entry.nodeName,
|
||||||
|
authors.map(node => node.textContent.trim())
|
||||||
|
);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
_parse(xmlDocument) {
|
_parse(xmlDocument) {
|
||||||
let rdf = xmlDocument.documentElement;
|
let rdf = xmlDocument.documentElement;
|
||||||
|
|
||||||
if (rdf.nodeName.toLowerCase() !== "rdf:rdf") {
|
if (rdf.nodeName !== "rdf:rdf") {
|
||||||
// Wrapped in <xmpmeta>
|
// Wrapped in <xmpmeta>
|
||||||
rdf = rdf.firstChild;
|
rdf = rdf.firstChild;
|
||||||
while (rdf && rdf.nodeName.toLowerCase() !== "rdf:rdf") {
|
while (rdf && rdf.nodeName !== "rdf:rdf") {
|
||||||
rdf = rdf.nextSibling;
|
rdf = rdf.nextSibling;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const nodeName = rdf ? rdf.nodeName.toLowerCase() : null;
|
if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
||||||
if (!rdf || nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const children = rdf.childNodes;
|
for (const desc of rdf.childNodes) {
|
||||||
for (let i = 0, ii = children.length; i < ii; i++) {
|
if (desc.nodeName !== "rdf:description") {
|
||||||
const desc = children[i];
|
|
||||||
if (desc.nodeName.toLowerCase() !== "rdf:description") {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let j = 0, jj = desc.childNodes.length; j < jj; j++) {
|
for (const entry of desc.childNodes) {
|
||||||
if (desc.childNodes[j].nodeName.toLowerCase() !== "#text") {
|
const name = entry.nodeName;
|
||||||
const entry = desc.childNodes[j];
|
if (name === "#text") {
|
||||||
const name = entry.nodeName.toLowerCase();
|
continue;
|
||||||
|
}
|
||||||
|
if (this._getCreators(entry)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
this._metadataMap.set(name, entry.textContent.trim());
|
this._metadataMap.set(name, entry.textContent.trim());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getRaw() {
|
||||||
|
return this._data;
|
||||||
}
|
}
|
||||||
|
|
||||||
get(name) {
|
get(name) {
|
||||||
|
@ -42,7 +42,7 @@ class Doc extends PDFObject {
|
|||||||
this._dirty = false;
|
this._dirty = false;
|
||||||
this._disclosed = false;
|
this._disclosed = false;
|
||||||
this._media = undefined;
|
this._media = undefined;
|
||||||
this._metadata = data.metadata;
|
this._metadata = data.metadata || "";
|
||||||
this._noautocomplete = undefined;
|
this._noautocomplete = undefined;
|
||||||
this._nocache = undefined;
|
this._nocache = undefined;
|
||||||
this._spellDictionaryOrder = [];
|
this._spellDictionaryOrder = [];
|
||||||
@ -74,12 +74,13 @@ class Doc extends PDFObject {
|
|||||||
// and they're are read-only.
|
// and they're are read-only.
|
||||||
this._info = new Proxy(
|
this._info = new Proxy(
|
||||||
{
|
{
|
||||||
title: this.title,
|
title: this._title,
|
||||||
author: this.author,
|
author: this._author,
|
||||||
subject: this.subject,
|
authors: data.authors || [this._author],
|
||||||
keywords: this.keywords,
|
subject: this._subject,
|
||||||
creator: this.creator,
|
keywords: this._keywords,
|
||||||
producer: this.producer,
|
creator: this._creator,
|
||||||
|
producer: this._producer,
|
||||||
creationdate: this._creationDate,
|
creationdate: this._creationDate,
|
||||||
moddate: this._modDate,
|
moddate: this._modDate,
|
||||||
trapped: data.Trapped || "Unknown",
|
trapped: data.Trapped || "Unknown",
|
||||||
|
@ -427,12 +427,13 @@ class SimpleDOMNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class SimpleXMLParser extends XMLParserBase {
|
class SimpleXMLParser extends XMLParserBase {
|
||||||
constructor(hasAttributes = false) {
|
constructor({ hasAttributes = false, lowerCaseName = false }) {
|
||||||
super();
|
super();
|
||||||
this._currentFragment = null;
|
this._currentFragment = null;
|
||||||
this._stack = null;
|
this._stack = null;
|
||||||
this._errorCode = XMLParserErrorCode.NoError;
|
this._errorCode = XMLParserErrorCode.NoError;
|
||||||
this._hasAttributes = hasAttributes;
|
this._hasAttributes = hasAttributes;
|
||||||
|
this._lowerCaseName = lowerCaseName;
|
||||||
}
|
}
|
||||||
|
|
||||||
parseFromString(data) {
|
parseFromString(data) {
|
||||||
@ -476,6 +477,9 @@ class SimpleXMLParser extends XMLParserBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
onBeginElement(name, attributes, isEmpty) {
|
onBeginElement(name, attributes, isEmpty) {
|
||||||
|
if (this._lowerCaseName) {
|
||||||
|
name = name.toLowerCase();
|
||||||
|
}
|
||||||
const node = new SimpleDOMNode(name);
|
const node = new SimpleDOMNode(name);
|
||||||
node.childNodes = [];
|
node.childNodes = [];
|
||||||
if (this._hasAttributes) {
|
if (this._hasAttributes) {
|
||||||
|
@ -451,4 +451,29 @@ describe("Interaction", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("in js-authors.pdf", () => {
|
||||||
|
let pages;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
pages = await loadAndWait("js-authors.pdf", "#\\32 5R");
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await closePages(pages);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("must print authors in a text field", async () => {
|
||||||
|
await Promise.all(
|
||||||
|
pages.map(async ([browserName, page]) => {
|
||||||
|
const text = await actAndWaitForInput(page, "#\\32 5R", async () => {
|
||||||
|
await page.click("[data-annotation-id='26R']");
|
||||||
|
});
|
||||||
|
expect(text)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual("author1::author2::author3::author4::author5");
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -324,6 +324,7 @@
|
|||||||
!tensor-allflags-withfunction.pdf
|
!tensor-allflags-withfunction.pdf
|
||||||
!issue10084_reduced.pdf
|
!issue10084_reduced.pdf
|
||||||
!issue4246.pdf
|
!issue4246.pdf
|
||||||
|
!js-authors.pdf
|
||||||
!issue4461.pdf
|
!issue4461.pdf
|
||||||
!issue4573.pdf
|
!issue4573.pdf
|
||||||
!issue4722.pdf
|
!issue4722.pdf
|
||||||
|
BIN
test/pdfs/js-authors.pdf
Normal file
BIN
test/pdfs/js-authors.pdf
Normal file
Binary file not shown.
@ -96,7 +96,7 @@ describe("metadata", function () {
|
|||||||
expect(metadata.get("dc:qux")).toEqual(null);
|
expect(metadata.get("dc:qux")).toEqual(null);
|
||||||
|
|
||||||
expect(metadata.getAll()).toEqual({
|
expect(metadata.getAll()).toEqual({
|
||||||
"dc:creator": "ODIS",
|
"dc:creator": ["ODIS"],
|
||||||
"dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub",
|
"dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub",
|
||||||
"xap:creatortool": "PDFCreator Version 0.9.6",
|
"xap:creatortool": "PDFCreator Version 0.9.6",
|
||||||
});
|
});
|
||||||
@ -168,7 +168,7 @@ describe("metadata", function () {
|
|||||||
expect(metadata.get("dc:qux")).toEqual(null);
|
expect(metadata.get("dc:qux")).toEqual(null);
|
||||||
|
|
||||||
expect(metadata.getAll()).toEqual({
|
expect(metadata.getAll()).toEqual({
|
||||||
"dc:creator": "",
|
"dc:creator": [""],
|
||||||
"dc:description": "",
|
"dc:description": "",
|
||||||
"dc:format": "application/pdf",
|
"dc:format": "application/pdf",
|
||||||
"dc:subject": "",
|
"dc:subject": "",
|
||||||
|
@ -47,8 +47,9 @@ describe("XML", function () {
|
|||||||
<g a="121110"/>
|
<g a="121110"/>
|
||||||
</b>
|
</b>
|
||||||
</a>`;
|
</a>`;
|
||||||
const root = new SimpleXMLParser(true).parseFromString(xml)
|
const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
|
||||||
.documentElement;
|
xml
|
||||||
|
).documentElement;
|
||||||
function getAttr(path) {
|
function getAttr(path) {
|
||||||
return root.searchNode(parseXFAPath(path), 0).attributes[0].value;
|
return root.searchNode(parseXFAPath(path), 0).attributes[0].value;
|
||||||
}
|
}
|
||||||
@ -96,8 +97,9 @@ describe("XML", function () {
|
|||||||
<g a="121110"/>
|
<g a="121110"/>
|
||||||
</b>
|
</b>
|
||||||
</a>`;
|
</a>`;
|
||||||
const root = new SimpleXMLParser(true).parseFromString(xml)
|
const root = new SimpleXMLParser({ hasAttributes: true }).parseFromString(
|
||||||
.documentElement;
|
xml
|
||||||
|
).documentElement;
|
||||||
const buffer = [];
|
const buffer = [];
|
||||||
root.dump(buffer);
|
root.dump(buffer);
|
||||||
|
|
||||||
|
@ -1655,7 +1655,8 @@ const PDFViewerApplication = {
|
|||||||
baseURL: this.baseUrl,
|
baseURL: this.baseUrl,
|
||||||
filesize: this._contentLength,
|
filesize: this._contentLength,
|
||||||
filename: this._docFilename,
|
filename: this._docFilename,
|
||||||
metadata: this.metadata,
|
metadata: this.metadata?.getRaw(),
|
||||||
|
authors: this.metadata?.get("dc:creator"),
|
||||||
numPages: pdfDocument.numPages,
|
numPages: pdfDocument.numPages,
|
||||||
URL: this.url,
|
URL: this.url,
|
||||||
actions: docActions,
|
actions: docActions,
|
||||||
|
Loading…
Reference in New Issue
Block a user