Merge pull request #12997 from Snuffleupagus/metadata-worker
Move the Metadata parsing to the worker-thread
This commit is contained in:
commit
4619b1b568
@ -316,8 +316,54 @@ function collectActions(xref, dict, eventType) {
|
|||||||
return objectSize(actions) > 0 ? actions : null;
|
return objectSize(actions) > 0 ? actions : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const XMLEntities = {
|
||||||
|
/* < */ 0x3c: "<",
|
||||||
|
/* > */ 0x3e: ">",
|
||||||
|
/* & */ 0x26: "&",
|
||||||
|
/* " */ 0x22: """,
|
||||||
|
/* ' */ 0x27: "'",
|
||||||
|
};
|
||||||
|
|
||||||
|
function encodeToXmlString(str) {
|
||||||
|
const buffer = [];
|
||||||
|
let start = 0;
|
||||||
|
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||||
|
const char = str.codePointAt(i);
|
||||||
|
if (0x20 <= char && char <= 0x7e) {
|
||||||
|
// ascii
|
||||||
|
const entity = XMLEntities[char];
|
||||||
|
if (entity) {
|
||||||
|
if (start < i) {
|
||||||
|
buffer.push(str.substring(start, i));
|
||||||
|
}
|
||||||
|
buffer.push(entity);
|
||||||
|
start = i + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (start < i) {
|
||||||
|
buffer.push(str.substring(start, i));
|
||||||
|
}
|
||||||
|
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
|
||||||
|
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
|
||||||
|
// char is represented by two u16
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
start = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer.length === 0) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
if (start < str.length) {
|
||||||
|
buffer.push(str.substring(start, str.length));
|
||||||
|
}
|
||||||
|
return buffer.join("");
|
||||||
|
}
|
||||||
|
|
||||||
export {
|
export {
|
||||||
collectActions,
|
collectActions,
|
||||||
|
encodeToXmlString,
|
||||||
escapePDFName,
|
escapePDFName,
|
||||||
getArrayLookupTableFactory,
|
getArrayLookupTableFactory,
|
||||||
getInheritableProperty,
|
getInheritableProperty,
|
||||||
|
146
src/core/metadata_parser.js
Normal file
146
src/core/metadata_parser.js
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
/* Copyright 2012 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { SimpleXMLParser } from "./xml_parser.js";
|
||||||
|
|
||||||
|
class MetadataParser {
|
||||||
|
constructor(data) {
|
||||||
|
// Ghostscript may produce invalid metadata, so try to repair that first.
|
||||||
|
data = this._repair(data);
|
||||||
|
|
||||||
|
// Convert the string to an XML document.
|
||||||
|
const parser = new SimpleXMLParser({ lowerCaseName: true });
|
||||||
|
const xmlDocument = parser.parseFromString(data);
|
||||||
|
|
||||||
|
this._metadataMap = new Map();
|
||||||
|
this._data = data;
|
||||||
|
|
||||||
|
if (xmlDocument) {
|
||||||
|
this._parse(xmlDocument);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_repair(data) {
|
||||||
|
// Start by removing any "junk" before the first tag (see issue 10395).
|
||||||
|
return data
|
||||||
|
.replace(/^[^<]+/, "")
|
||||||
|
.replace(/>\\376\\377([^<]+)/g, function (all, codes) {
|
||||||
|
const bytes = codes
|
||||||
|
.replace(/\\([0-3])([0-7])([0-7])/g, function (code, d1, d2, d3) {
|
||||||
|
return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
|
||||||
|
})
|
||||||
|
.replace(/&(amp|apos|gt|lt|quot);/g, function (str, name) {
|
||||||
|
switch (name) {
|
||||||
|
case "amp":
|
||||||
|
return "&";
|
||||||
|
case "apos":
|
||||||
|
return "'";
|
||||||
|
case "gt":
|
||||||
|
return ">";
|
||||||
|
case "lt":
|
||||||
|
return "<";
|
||||||
|
case "quot":
|
||||||
|
return '"';
|
||||||
|
}
|
||||||
|
throw new Error(`_repair: ${name} isn't defined.`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const charBuf = [];
|
||||||
|
for (let i = 0, ii = bytes.length; i < ii; i += 2) {
|
||||||
|
const code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1);
|
||||||
|
if (
|
||||||
|
code >= /* Space = */ 32 &&
|
||||||
|
code < /* Delete = */ 127 &&
|
||||||
|
code !== /* '<' = */ 60 &&
|
||||||
|
code !== /* '>' = */ 62 &&
|
||||||
|
code !== /* '&' = */ 38
|
||||||
|
) {
|
||||||
|
charBuf.push(String.fromCharCode(code));
|
||||||
|
} else {
|
||||||
|
charBuf.push(
|
||||||
|
"&#x" + (0x10000 + code).toString(16).substring(1) + ";"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ">" + charBuf.join("");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
_getSequence(entry) {
|
||||||
|
const name = entry.nodeName;
|
||||||
|
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
|
||||||
|
}
|
||||||
|
|
||||||
|
_parseArray(entry) {
|
||||||
|
if (!entry.hasChildNodes()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Child must be a Bag (unordered array) or a Seq.
|
||||||
|
const [seqNode] = entry.childNodes;
|
||||||
|
const sequence = this._getSequence(seqNode) || [];
|
||||||
|
|
||||||
|
this._metadataMap.set(
|
||||||
|
entry.nodeName,
|
||||||
|
sequence.map(node => node.textContent.trim())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
_parse(xmlDocument) {
|
||||||
|
let rdf = xmlDocument.documentElement;
|
||||||
|
|
||||||
|
if (rdf.nodeName !== "rdf:rdf") {
|
||||||
|
// Wrapped in <xmpmeta>
|
||||||
|
rdf = rdf.firstChild;
|
||||||
|
while (rdf && rdf.nodeName !== "rdf:rdf") {
|
||||||
|
rdf = rdf.nextSibling;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const desc of rdf.childNodes) {
|
||||||
|
if (desc.nodeName !== "rdf:description") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const entry of desc.childNodes) {
|
||||||
|
const name = entry.nodeName;
|
||||||
|
switch (name) {
|
||||||
|
case "#text":
|
||||||
|
continue;
|
||||||
|
case "dc:creator":
|
||||||
|
case "dc:subject":
|
||||||
|
this._parseArray(entry);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
this._metadataMap.set(name, entry.textContent.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
get serializable() {
|
||||||
|
return {
|
||||||
|
parsedData: this._metadataMap,
|
||||||
|
rawData: this._data,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { MetadataParser };
|
@ -59,6 +59,7 @@ import { Lexer, Parser } from "./parser.js";
|
|||||||
import { CipherTransformFactory } from "./crypto.js";
|
import { CipherTransformFactory } from "./crypto.js";
|
||||||
import { ColorSpace } from "./colorspace.js";
|
import { ColorSpace } from "./colorspace.js";
|
||||||
import { GlobalImageCache } from "./image_utils.js";
|
import { GlobalImageCache } from "./image_utils.js";
|
||||||
|
import { MetadataParser } from "./metadata_parser.js";
|
||||||
|
|
||||||
function fetchDestination(dest) {
|
function fetchDestination(dest) {
|
||||||
return isDict(dest) ? dest.get("D") : dest;
|
return isDict(dest) ? dest.get("D") : dest;
|
||||||
@ -131,20 +132,22 @@ class Catalog {
|
|||||||
this.xref.encrypt && this.xref.encrypt.encryptMetadata
|
this.xref.encrypt && this.xref.encrypt.encryptMetadata
|
||||||
);
|
);
|
||||||
const stream = this.xref.fetch(streamRef, suppressEncryption);
|
const stream = this.xref.fetch(streamRef, suppressEncryption);
|
||||||
let metadata;
|
let metadata = null;
|
||||||
|
|
||||||
if (stream && isDict(stream.dict)) {
|
if (isStream(stream) && isDict(stream.dict)) {
|
||||||
const type = stream.dict.get("Type");
|
const type = stream.dict.get("Type");
|
||||||
const subtype = stream.dict.get("Subtype");
|
const subtype = stream.dict.get("Subtype");
|
||||||
|
|
||||||
if (isName(type, "Metadata") && isName(subtype, "XML")) {
|
if (isName(type, "Metadata") && isName(subtype, "XML")) {
|
||||||
// XXX: This should examine the charset the XML document defines,
|
// XXX: This should examine the charset the XML document defines,
|
||||||
// however since there are currently no real means to decode
|
// however since there are currently no real means to decode arbitrary
|
||||||
// arbitrary charsets, let's just hope that the author of the PDF
|
// charsets, let's just hope that the author of the PDF was reasonable
|
||||||
// was reasonable enough to stick with the XML default charset,
|
// enough to stick with the XML default charset, which is UTF-8.
|
||||||
// which is UTF-8.
|
|
||||||
try {
|
try {
|
||||||
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
|
const data = stringToUTF8String(bytesToString(stream.getBytes()));
|
||||||
|
if (data) {
|
||||||
|
metadata = new MetadataParser(data).serializable;
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (e instanceof MissingDataException) {
|
if (e instanceof MissingDataException) {
|
||||||
throw e;
|
throw e;
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
import { bytesToString, escapeString, warn } from "../shared/util.js";
|
import { bytesToString, escapeString, warn } from "../shared/util.js";
|
||||||
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
|
import { Dict, isDict, isName, isRef, isStream, Name } from "./primitives.js";
|
||||||
import { escapePDFName, parseXFAPath } from "./core_utils.js";
|
import { escapePDFName, parseXFAPath } from "./core_utils.js";
|
||||||
import { SimpleDOMNode, SimpleXMLParser } from "../shared/xml_parser.js";
|
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
||||||
import { calculateMD5 } from "./crypto.js";
|
import { calculateMD5 } from "./crypto.js";
|
||||||
|
|
||||||
function writeDict(dict, buffer, transform) {
|
function writeDict(dict, buffer, transform) {
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { $clean, $finalize, $onChild, $onText } from "./xfa_object.js";
|
import { $clean, $finalize, $onChild, $onText } from "./xfa_object.js";
|
||||||
import { XMLParserBase, XMLParserErrorCode } from "../../shared/xml_parser.js";
|
import { XMLParserBase, XMLParserErrorCode } from "../xml_parser.js";
|
||||||
import { Builder } from "./builder.js";
|
import { Builder } from "./builder.js";
|
||||||
import { warn } from "../../shared/util.js";
|
import { warn } from "../../shared/util.js";
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
// The code for XMLParserBase copied from
|
// The code for XMLParserBase copied from
|
||||||
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
|
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
|
||||||
|
|
||||||
import { encodeToXmlString } from "./util.js";
|
import { encodeToXmlString } from "./core_utils.js";
|
||||||
|
|
||||||
const XMLParserErrorCode = {
|
const XMLParserErrorCode = {
|
||||||
NoError: 0,
|
NoError: 0,
|
@ -1655,8 +1655,24 @@ class LoopbackPort {
|
|||||||
cloned.set(value, result);
|
cloned.set(value, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
if (value instanceof Map) {
|
||||||
|
result = new Map();
|
||||||
|
cloned.set(value, result); // Adding to cache now for cyclic references.
|
||||||
|
for (const [key, val] of value) {
|
||||||
|
result.set(key, cloneValue(val));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
if (value instanceof Set) {
|
||||||
|
result = new Set();
|
||||||
|
cloned.set(value, result); // Adding to cache now for cyclic references.
|
||||||
|
for (const val of value) {
|
||||||
|
result.add(cloneValue(val));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
result = Array.isArray(value) ? [] : {};
|
result = Array.isArray(value) ? [] : {};
|
||||||
cloned.set(value, result); // adding to cache now for cyclic references
|
cloned.set(value, result); // Adding to cache now for cyclic references.
|
||||||
// Cloning all value and object properties, however ignoring properties
|
// Cloning all value and object properties, however ignoring properties
|
||||||
// defined via getter.
|
// defined via getter.
|
||||||
for (const i in value) {
|
for (const i in value) {
|
||||||
|
@ -13,129 +13,12 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { assert, objectFromEntries } from "../shared/util.js";
|
import { objectFromEntries } from "../shared/util.js";
|
||||||
import { SimpleXMLParser } from "../shared/xml_parser.js";
|
|
||||||
|
|
||||||
class Metadata {
|
class Metadata {
|
||||||
constructor(data) {
|
constructor({ parsedData, rawData }) {
|
||||||
assert(typeof data === "string", "Metadata: input is not a string");
|
this._metadataMap = parsedData;
|
||||||
|
this._data = rawData;
|
||||||
// Ghostscript may produce invalid metadata, so try to repair that first.
|
|
||||||
data = this._repair(data);
|
|
||||||
|
|
||||||
// Convert the string to an XML document.
|
|
||||||
const parser = new SimpleXMLParser({ lowerCaseName: true });
|
|
||||||
const xmlDocument = parser.parseFromString(data);
|
|
||||||
|
|
||||||
this._metadataMap = new Map();
|
|
||||||
|
|
||||||
if (xmlDocument) {
|
|
||||||
this._parse(xmlDocument);
|
|
||||||
}
|
|
||||||
this._data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
_repair(data) {
|
|
||||||
// Start by removing any "junk" before the first tag (see issue 10395).
|
|
||||||
return data
|
|
||||||
.replace(/^[^<]+/, "")
|
|
||||||
.replace(/>\\376\\377([^<]+)/g, function (all, codes) {
|
|
||||||
const bytes = codes
|
|
||||||
.replace(/\\([0-3])([0-7])([0-7])/g, function (code, d1, d2, d3) {
|
|
||||||
return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
|
|
||||||
})
|
|
||||||
.replace(/&(amp|apos|gt|lt|quot);/g, function (str, name) {
|
|
||||||
switch (name) {
|
|
||||||
case "amp":
|
|
||||||
return "&";
|
|
||||||
case "apos":
|
|
||||||
return "'";
|
|
||||||
case "gt":
|
|
||||||
return ">";
|
|
||||||
case "lt":
|
|
||||||
return "<";
|
|
||||||
case "quot":
|
|
||||||
return '"';
|
|
||||||
}
|
|
||||||
throw new Error(`_repair: ${name} isn't defined.`);
|
|
||||||
});
|
|
||||||
|
|
||||||
let chars = "";
|
|
||||||
for (let i = 0, ii = bytes.length; i < ii; i += 2) {
|
|
||||||
const code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1);
|
|
||||||
if (
|
|
||||||
code >= /* Space = */ 32 &&
|
|
||||||
code < /* Delete = */ 127 &&
|
|
||||||
code !== /* '<' = */ 60 &&
|
|
||||||
code !== /* '>' = */ 62 &&
|
|
||||||
code !== /* '&' = */ 38
|
|
||||||
) {
|
|
||||||
chars += String.fromCharCode(code);
|
|
||||||
} else {
|
|
||||||
chars += "&#x" + (0x10000 + code).toString(16).substring(1) + ";";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ">" + chars;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
_getSequence(entry) {
|
|
||||||
const name = entry.nodeName;
|
|
||||||
if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return entry.childNodes.filter(node => node.nodeName === "rdf:li");
|
|
||||||
}
|
|
||||||
|
|
||||||
_parseArray(entry) {
|
|
||||||
if (!entry.hasChildNodes()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Child must be a Bag (unordered array) or a Seq.
|
|
||||||
const [seqNode] = entry.childNodes;
|
|
||||||
const sequence = this._getSequence(seqNode) || [];
|
|
||||||
|
|
||||||
this._metadataMap.set(
|
|
||||||
entry.nodeName,
|
|
||||||
sequence.map(node => node.textContent.trim())
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
_parse(xmlDocument) {
|
|
||||||
let rdf = xmlDocument.documentElement;
|
|
||||||
|
|
||||||
if (rdf.nodeName !== "rdf:rdf") {
|
|
||||||
// Wrapped in <xmpmeta>
|
|
||||||
rdf = rdf.firstChild;
|
|
||||||
while (rdf && rdf.nodeName !== "rdf:rdf") {
|
|
||||||
rdf = rdf.nextSibling;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const desc of rdf.childNodes) {
|
|
||||||
if (desc.nodeName !== "rdf:description") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const entry of desc.childNodes) {
|
|
||||||
const name = entry.nodeName;
|
|
||||||
switch (name) {
|
|
||||||
case "#text":
|
|
||||||
continue;
|
|
||||||
case "dc:creator":
|
|
||||||
case "dc:subject":
|
|
||||||
this._parseArray(entry);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
this._metadataMap.set(name, entry.textContent.trim());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getRaw() {
|
getRaw() {
|
||||||
|
@ -967,53 +967,6 @@ const createObjectURL = (function createObjectURLClosure() {
|
|||||||
};
|
};
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const XMLEntities = {
|
|
||||||
/* < */ 0x3c: "<",
|
|
||||||
/* > */ 0x3e: ">",
|
|
||||||
/* & */ 0x26: "&",
|
|
||||||
/* " */ 0x22: """,
|
|
||||||
/* ' */ 0x27: "'",
|
|
||||||
};
|
|
||||||
|
|
||||||
function encodeToXmlString(str) {
|
|
||||||
const buffer = [];
|
|
||||||
let start = 0;
|
|
||||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
|
||||||
const char = str.codePointAt(i);
|
|
||||||
if (0x20 <= char && char <= 0x7e) {
|
|
||||||
// ascii
|
|
||||||
const entity = XMLEntities[char];
|
|
||||||
if (entity) {
|
|
||||||
if (start < i) {
|
|
||||||
buffer.push(str.substring(start, i));
|
|
||||||
}
|
|
||||||
buffer.push(entity);
|
|
||||||
start = i + 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (start < i) {
|
|
||||||
buffer.push(str.substring(start, i));
|
|
||||||
}
|
|
||||||
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
|
|
||||||
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
|
|
||||||
// char is represented by two u16
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
start = i + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (buffer.length === 0) {
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (start < str.length) {
|
|
||||||
buffer.push(str.substring(start, str.length));
|
|
||||||
}
|
|
||||||
|
|
||||||
return buffer.join("");
|
|
||||||
}
|
|
||||||
|
|
||||||
export {
|
export {
|
||||||
AbortException,
|
AbortException,
|
||||||
AnnotationActionEventType,
|
AnnotationActionEventType,
|
||||||
@ -1035,7 +988,6 @@ export {
|
|||||||
createPromiseCapability,
|
createPromiseCapability,
|
||||||
createValidAbsoluteUrl,
|
createValidAbsoluteUrl,
|
||||||
DocumentActionEventType,
|
DocumentActionEventType,
|
||||||
encodeToXmlString,
|
|
||||||
escapeString,
|
escapeString,
|
||||||
FONT_IDENTITY_MATRIX,
|
FONT_IDENTITY_MATRIX,
|
||||||
FontType,
|
FontType,
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
import { Dict, Ref } from "../../src/core/primitives.js";
|
import { Dict, Ref } from "../../src/core/primitives.js";
|
||||||
import {
|
import {
|
||||||
|
encodeToXmlString,
|
||||||
escapePDFName,
|
escapePDFName,
|
||||||
getInheritableProperty,
|
getInheritableProperty,
|
||||||
isWhiteSpace,
|
isWhiteSpace,
|
||||||
@ -218,4 +219,18 @@ describe("core_utils", function () {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("encodeToXmlString", function () {
|
||||||
|
it("should get a correctly encoded string with some entities", function () {
|
||||||
|
const str = "\"\u0397ell😂' & <W😂rld>";
|
||||||
|
expect(encodeToXmlString(str)).toEqual(
|
||||||
|
""Ηell😂' & <W😂rld>"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should get a correctly encoded basic ascii string", function () {
|
||||||
|
const str = "hello world";
|
||||||
|
expect(encodeToXmlString(str)).toEqual(str);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
@ -15,6 +15,12 @@
|
|||||||
|
|
||||||
import { isEmptyObj } from "./test_utils.js";
|
import { isEmptyObj } from "./test_utils.js";
|
||||||
import { Metadata } from "../../src/display/metadata.js";
|
import { Metadata } from "../../src/display/metadata.js";
|
||||||
|
import { MetadataParser } from "../../src/core/metadata_parser.js";
|
||||||
|
|
||||||
|
function createMetadata(data) {
|
||||||
|
const metadataParser = new MetadataParser(data);
|
||||||
|
return new Metadata(metadataParser.serializable);
|
||||||
|
}
|
||||||
|
|
||||||
describe("metadata", function () {
|
describe("metadata", function () {
|
||||||
it("should handle valid metadata", function () {
|
it("should handle valid metadata", function () {
|
||||||
@ -24,7 +30,7 @@ describe("metadata", function () {
|
|||||||
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
|
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
|
||||||
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' +
|
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' +
|
||||||
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
|
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
@ -42,7 +48,7 @@ describe("metadata", function () {
|
|||||||
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
|
"<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
|
||||||
"<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>" +
|
"<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>" +
|
||||||
"</rdf:Description></rdf:RDF></x:xmpmeta>";
|
"</rdf:Description></rdf:RDF></x:xmpmeta>";
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
@ -85,7 +91,7 @@ describe("metadata", function () {
|
|||||||
"<dc:creator><rdf:Seq><rdf:li>\\376\\377\\000O\\000D\\000I\\000S" +
|
"<dc:creator><rdf:Seq><rdf:li>\\376\\377\\000O\\000D\\000I\\000S" +
|
||||||
"</rdf:li></rdf:Seq></dc:creator></rdf:Description></rdf:RDF>" +
|
"</rdf:li></rdf:Seq></dc:creator></rdf:Description></rdf:RDF>" +
|
||||||
"</x:xmpmeta>";
|
"</x:xmpmeta>";
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
@ -128,7 +134,7 @@ describe("metadata", function () {
|
|||||||
"</rdf:RDF>" +
|
"</rdf:RDF>" +
|
||||||
"</x:xmpmeta>" +
|
"</x:xmpmeta>" +
|
||||||
'<?xpacket end="w"?>';
|
'<?xpacket end="w"?>';
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(isEmptyObj(metadata.getAll())).toEqual(true);
|
expect(isEmptyObj(metadata.getAll())).toEqual(true);
|
||||||
});
|
});
|
||||||
@ -159,7 +165,7 @@ describe("metadata", function () {
|
|||||||
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li>' +
|
'<dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li>' +
|
||||||
"</rdf:Alt></dc:title><dc:format>application/pdf</dc:format>" +
|
"</rdf:Alt></dc:title><dc:format>application/pdf</dc:format>" +
|
||||||
'</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
|
'</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
@ -191,7 +197,7 @@ describe("metadata", function () {
|
|||||||
"<dc:title><rdf:Alt>" +
|
"<dc:title><rdf:Alt>" +
|
||||||
'<rdf:li xml:lang="x-default">'Foo bar baz'</rdf:li>' +
|
'<rdf:li xml:lang="x-default">'Foo bar baz'</rdf:li>' +
|
||||||
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
|
"</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
@ -220,7 +226,7 @@ describe("metadata", function () {
|
|||||||
"<xmpMM:DocumentID>uuid:00000000-1c84-3cf9-89ba-bef0e729c831" +
|
"<xmpMM:DocumentID>uuid:00000000-1c84-3cf9-89ba-bef0e729c831" +
|
||||||
"</xmpMM:DocumentID></rdf:Description>" +
|
"</xmpMM:DocumentID></rdf:Description>" +
|
||||||
'</rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
|
'</rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(isEmptyObj(metadata.getAll())).toEqual(true);
|
expect(isEmptyObj(metadata.getAll())).toEqual(true);
|
||||||
});
|
});
|
||||||
@ -249,7 +255,7 @@ describe("metadata", function () {
|
|||||||
" </dc:title>" +
|
" </dc:title>" +
|
||||||
" </rdf:Description>" +
|
" </rdf:Description>" +
|
||||||
"</rdf:RDF>";
|
"</rdf:RDF>";
|
||||||
const metadata = new Metadata(data);
|
const metadata = createMetadata(data);
|
||||||
|
|
||||||
expect(metadata.has("dc:title")).toBeTruthy();
|
expect(metadata.has("dc:title")).toBeTruthy();
|
||||||
expect(metadata.has("dc:qux")).toBeFalsy();
|
expect(metadata.has("dc:qux")).toBeFalsy();
|
||||||
|
@ -17,7 +17,6 @@ import {
|
|||||||
bytesToString,
|
bytesToString,
|
||||||
createPromiseCapability,
|
createPromiseCapability,
|
||||||
createValidAbsoluteUrl,
|
createValidAbsoluteUrl,
|
||||||
encodeToXmlString,
|
|
||||||
escapeString,
|
escapeString,
|
||||||
getModificationDate,
|
getModificationDate,
|
||||||
isArrayBuffer,
|
isArrayBuffer,
|
||||||
@ -335,20 +334,6 @@ describe("util", function () {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("encodeToXmlString", function () {
|
|
||||||
it("should get a correctly encoded string with some entities", function () {
|
|
||||||
const str = "\"\u0397ell😂' & <W😂rld>";
|
|
||||||
expect(encodeToXmlString(str)).toEqual(
|
|
||||||
""Ηell😂' & <W😂rld>"
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should get a correctly encoded basic ascii string", function () {
|
|
||||||
const str = "hello world";
|
|
||||||
expect(encodeToXmlString(str)).toEqual(str);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("isAscii", function () {
|
describe("isAscii", function () {
|
||||||
it("handles ascii/non-ascii strings", function () {
|
it("handles ascii/non-ascii strings", function () {
|
||||||
expect(isAscii("hello world")).toEqual(true);
|
expect(isAscii("hello world")).toEqual(true);
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { parseXFAPath } from "../../src/core/core_utils.js";
|
import { parseXFAPath } from "../../src/core/core_utils.js";
|
||||||
import { SimpleXMLParser } from "../../src/shared/xml_parser.js";
|
import { SimpleXMLParser } from "../../src/core/xml_parser.js";
|
||||||
|
|
||||||
describe("XML", function () {
|
describe("XML", function () {
|
||||||
describe("searchNode", function () {
|
describe("searchNode", function () {
|
||||||
|
@ -1755,11 +1755,8 @@ const PDFViewerApplication = {
|
|||||||
`${this.pdfViewer.enableWebGL ? " [WebGL]" : ""})`
|
`${this.pdfViewer.enableWebGL ? " [WebGL]" : ""})`
|
||||||
);
|
);
|
||||||
|
|
||||||
let pdfTitle;
|
let pdfTitle = info?.Title;
|
||||||
const infoTitle = info?.Title;
|
|
||||||
if (infoTitle) {
|
|
||||||
pdfTitle = infoTitle;
|
|
||||||
}
|
|
||||||
const metadataTitle = metadata?.get("dc:title");
|
const metadataTitle = metadata?.get("dc:title");
|
||||||
if (metadataTitle) {
|
if (metadataTitle) {
|
||||||
// Ghostscript can produce invalid 'dc:title' Metadata entries:
|
// Ghostscript can produce invalid 'dc:title' Metadata entries:
|
||||||
|
Loading…
Reference in New Issue
Block a user