New XML parser
This commit is contained in:
parent
6662985a20
commit
655c8d34d0
@ -135,132 +135,6 @@ class DOMSVGFactory {
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleDOMNode {
|
||||
constructor(nodeName, nodeValue) {
|
||||
this.nodeName = nodeName;
|
||||
this.nodeValue = nodeValue;
|
||||
|
||||
Object.defineProperty(this, 'parentNode', { value: null, writable: true, });
|
||||
}
|
||||
|
||||
get firstChild() {
|
||||
return this.childNodes[0];
|
||||
}
|
||||
|
||||
get nextSibling() {
|
||||
let index = this.parentNode.childNodes.indexOf(this);
|
||||
return this.parentNode.childNodes[index + 1];
|
||||
}
|
||||
|
||||
get textContent() {
|
||||
if (!this.childNodes) {
|
||||
return this.nodeValue || '';
|
||||
}
|
||||
return this.childNodes.map(function(child) {
|
||||
return child.textContent;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
hasChildNodes() {
|
||||
return this.childNodes && this.childNodes.length > 0;
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleXMLParser {
|
||||
parseFromString(data) {
|
||||
let nodes = [];
|
||||
|
||||
// Remove all comments and processing instructions.
|
||||
data = data.replace(/<\?[\s\S]*?\?>|<!--[\s\S]*?-->/g, '').trim();
|
||||
data = data.replace(/<!DOCTYPE[^>\[]+(\[[^\]]+)?[^>]+>/g, '').trim();
|
||||
|
||||
// Extract all text nodes and replace them with a numeric index in
|
||||
// the nodes.
|
||||
data = data.replace(/>([^<][\s\S]*?)</g, (all, text) => {
|
||||
let length = nodes.length;
|
||||
let node = new SimpleDOMNode('#text', this._decodeXML(text));
|
||||
nodes.push(node);
|
||||
if (node.textContent.trim().length === 0) {
|
||||
return '><'; // Ignore whitespace.
|
||||
}
|
||||
return '>' + length + ',<';
|
||||
});
|
||||
|
||||
// Extract all CDATA nodes.
|
||||
data = data.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g,
|
||||
function(all, text) {
|
||||
let length = nodes.length;
|
||||
let node = new SimpleDOMNode('#text', text);
|
||||
nodes.push(node);
|
||||
return length + ',';
|
||||
});
|
||||
|
||||
// Until nodes without '<' and '>' content are present, replace them
|
||||
// with a numeric index in the nodes.
|
||||
let regex =
|
||||
/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g;
|
||||
let lastLength;
|
||||
do {
|
||||
lastLength = nodes.length;
|
||||
data = data.replace(regex, function(all, name, attrs, data) {
|
||||
let length = nodes.length;
|
||||
let node = new SimpleDOMNode(name);
|
||||
let children = [];
|
||||
if (data) {
|
||||
data = data.split(',');
|
||||
data.pop();
|
||||
data.forEach(function(child) {
|
||||
let childNode = nodes[+child];
|
||||
childNode.parentNode = node;
|
||||
children.push(childNode);
|
||||
});
|
||||
}
|
||||
|
||||
node.childNodes = children;
|
||||
nodes.push(node);
|
||||
return length + ',';
|
||||
});
|
||||
} while (lastLength < nodes.length);
|
||||
|
||||
// We should only have one root index left, which will be last in the nodes.
|
||||
return {
|
||||
documentElement: nodes.pop(),
|
||||
};
|
||||
}
|
||||
|
||||
_decodeXML(text) {
|
||||
if (!text.includes('&')) {
|
||||
return text;
|
||||
}
|
||||
|
||||
return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi,
|
||||
function(all, entityName, number) {
|
||||
if (number) {
|
||||
if (number[0] === 'x') {
|
||||
number = parseInt(number.substring(1), 16);
|
||||
} else {
|
||||
number = +number;
|
||||
}
|
||||
return String.fromCharCode(number);
|
||||
}
|
||||
|
||||
switch (entityName) {
|
||||
case 'amp':
|
||||
return '&';
|
||||
case 'lt':
|
||||
return '<';
|
||||
case 'gt':
|
||||
return '>';
|
||||
case 'quot':
|
||||
return '\"';
|
||||
case 'apos':
|
||||
return '\'';
|
||||
}
|
||||
return '&' + entityName + ';';
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
var RenderingCancelledException = (function RenderingCancelledException() {
|
||||
function RenderingCancelledException(msg, type) {
|
||||
this.message = msg;
|
||||
@ -411,7 +285,6 @@ export {
|
||||
DOMCanvasFactory,
|
||||
DOMCMapReaderFactory,
|
||||
DOMSVGFactory,
|
||||
SimpleXMLParser,
|
||||
StatTimer,
|
||||
DummyStatTimer,
|
||||
};
|
||||
|
@ -14,7 +14,7 @@
|
||||
*/
|
||||
|
||||
import { assert } from '../shared/util';
|
||||
import { SimpleXMLParser } from './dom_utils';
|
||||
import { SimpleXMLParser } from './xml_parser';
|
||||
|
||||
class Metadata {
|
||||
constructor(data) {
|
||||
@ -23,13 +23,15 @@ class Metadata {
|
||||
// Ghostscript may produce invalid metadata, so try to repair that first.
|
||||
data = this._repair(data);
|
||||
|
||||
// Convert the string to a DOM `Document`.
|
||||
// Convert the string to an XML document.
|
||||
let parser = new SimpleXMLParser();
|
||||
data = parser.parseFromString(data);
|
||||
const xmlDocument = parser.parseFromString(data);
|
||||
|
||||
this._metadata = Object.create(null);
|
||||
|
||||
this._parse(data);
|
||||
if (xmlDocument) {
|
||||
this._parse(xmlDocument);
|
||||
}
|
||||
}
|
||||
|
||||
_repair(data) {
|
||||
@ -68,8 +70,8 @@ class Metadata {
|
||||
});
|
||||
}
|
||||
|
||||
_parse(domDocument) {
|
||||
let rdf = domDocument.documentElement;
|
||||
_parse(xmlDocument) {
|
||||
let rdf = xmlDocument.documentElement;
|
||||
|
||||
if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
|
||||
rdf = rdf.firstChild;
|
||||
|
374
src/display/xml_parser.js
Normal file
374
src/display/xml_parser.js
Normal file
@ -0,0 +1,374 @@
|
||||
/* Copyright 2018 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// The code for XMLParserBase copied from
|
||||
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
|
||||
|
||||
const XMLParserErrorCode = {
|
||||
NoError: 0,
|
||||
EndOfDocument: -1,
|
||||
UnterminatedCdat: -2,
|
||||
UnterminatedXmlDeclaration: -3,
|
||||
UnterminatedDoctypeDeclaration: -4,
|
||||
UnterminatedComment: -5,
|
||||
MalformedElement: -6,
|
||||
OutOfMemory: -7,
|
||||
UnterminatedAttributeValue: -8,
|
||||
UnterminatedElement: -9,
|
||||
ElementNeverBegun: -10,
|
||||
};
|
||||
|
||||
function isWhitespace(s, index) {
|
||||
const ch = s[index];
|
||||
return ch === ' ' || ch === '\n' || ch === '\r' || ch === '\t';
|
||||
}
|
||||
|
||||
function isWhitespaceString(s) {
|
||||
for (let i = 0, ii = s.length; i < ii; i++) {
|
||||
if (!isWhitespace(s, i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
class XMLParserBase {
|
||||
_resolveEntities(s) {
|
||||
return s.replace(/&([^;]+);/g, function (all, entity) {
|
||||
if (entity.substring(0, 2) === '#x') {
|
||||
return String.fromCharCode(parseInt(entity.substring(2), 16));
|
||||
} else if (entity.substring(0, 1) === '#') {
|
||||
return String.fromCharCode(parseInt(entity.substring(1), 10));
|
||||
}
|
||||
switch (entity) {
|
||||
case 'lt':
|
||||
return '<';
|
||||
case 'gt':
|
||||
return '>';
|
||||
case 'amp':
|
||||
return '&';
|
||||
case 'quot':
|
||||
return '\"';
|
||||
}
|
||||
return this.onResolveEntity(entity);
|
||||
});
|
||||
}
|
||||
|
||||
_parseContent(s, start) {
|
||||
let pos = start, name, attributes = [];
|
||||
|
||||
function skipWs() {
|
||||
while (pos < s.length && isWhitespace(s, pos)) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
while (pos < s.length && !isWhitespace(s, pos) &&
|
||||
s[pos] !== '>' && s[pos] !== '/') {
|
||||
++pos;
|
||||
}
|
||||
name = s.substring(start, pos);
|
||||
skipWs();
|
||||
while (pos < s.length && s[pos] !== '>' &&
|
||||
s[pos] !== '/' && s[pos] !== '?') {
|
||||
skipWs();
|
||||
let attrName = '', attrValue = '';
|
||||
while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== '=') {
|
||||
attrName += s[pos];
|
||||
++pos;
|
||||
}
|
||||
skipWs();
|
||||
if (s[pos] !== '=') {
|
||||
return null;
|
||||
}
|
||||
++pos;
|
||||
skipWs();
|
||||
const attrEndChar = s[pos];
|
||||
if (attrEndChar !== '\"' && attrEndChar !== '\'') {
|
||||
return null;
|
||||
}
|
||||
const attrEndIndex = s.indexOf(attrEndChar, ++pos);
|
||||
if (attrEndIndex < 0) {
|
||||
return null;
|
||||
}
|
||||
attrValue = s.substring(pos, attrEndIndex);
|
||||
attributes.push({
|
||||
name: attrName,
|
||||
value: this._resolveEntities(attrValue),
|
||||
});
|
||||
pos = attrEndIndex + 1;
|
||||
skipWs();
|
||||
}
|
||||
return {
|
||||
name,
|
||||
attributes,
|
||||
parsed: pos - start,
|
||||
};
|
||||
}
|
||||
|
||||
_parseProcessingInstruction(s, start) {
|
||||
let pos = start, name, value;
|
||||
|
||||
function skipWs() {
|
||||
while (pos < s.length && isWhitespace(s, pos)) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
while (pos < s.length && !isWhitespace(s, pos) &&
|
||||
s[pos] !== '>' && s[pos] !== '/') {
|
||||
++pos;
|
||||
}
|
||||
name = s.substring(start, pos);
|
||||
skipWs();
|
||||
const attrStart = pos;
|
||||
while (pos < s.length && (s[pos] !== '?' || s[pos + 1] !== '>')) {
|
||||
++pos;
|
||||
}
|
||||
value = s.substring(attrStart, pos);
|
||||
return {
|
||||
name,
|
||||
value,
|
||||
parsed: pos - start,
|
||||
};
|
||||
}
|
||||
|
||||
parseXml(s) {
|
||||
let i = 0;
|
||||
while (i < s.length) {
|
||||
const ch = s[i];
|
||||
let j = i;
|
||||
if (ch === '<') {
|
||||
++j;
|
||||
const ch2 = s[j];
|
||||
let q;
|
||||
switch (ch2) {
|
||||
case '/':
|
||||
++j;
|
||||
q = s.indexOf('>', j);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedElement);
|
||||
return;
|
||||
}
|
||||
this.onEndElement(s.substring(j, q));
|
||||
j = q + 1;
|
||||
break;
|
||||
case '?':
|
||||
++j;
|
||||
const pi = this._parseProcessingInstruction(s, j);
|
||||
if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== '?>') {
|
||||
this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
|
||||
return;
|
||||
}
|
||||
this.onPi(pi.name, pi.value);
|
||||
j += pi.parsed + 2;
|
||||
break;
|
||||
case '!':
|
||||
if (s.substring(j + 1, j + 3) === '--') {
|
||||
q = s.indexOf('-->', j + 3);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedComment);
|
||||
return;
|
||||
}
|
||||
this.onComment(s.substring(j + 3, q));
|
||||
j = q + 3;
|
||||
} else if (s.substring(j + 1, j + 8) === '[CDATA[') {
|
||||
q = s.indexOf(']]>', j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedCdat);
|
||||
return;
|
||||
}
|
||||
this.onCdata(s.substring(j + 8, q));
|
||||
j = q + 3;
|
||||
} else if (s.substring(j + 1, j + 8) === 'DOCTYPE') {
|
||||
const q2 = s.indexOf('[', j + 8);
|
||||
let complexDoctype = false;
|
||||
q = s.indexOf('>', j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
|
||||
return;
|
||||
}
|
||||
if (q2 > 0 && q > q2) {
|
||||
q = s.indexOf(']>', j + 8);
|
||||
if (q < 0) {
|
||||
this.onError(
|
||||
XMLParserErrorCode.UnterminatedDoctypeDeclaration);
|
||||
return;
|
||||
}
|
||||
complexDoctype = true;
|
||||
}
|
||||
const doctypeContent =
|
||||
s.substring(j + 8, q + (complexDoctype ? 1 : 0));
|
||||
this.onDoctype(doctypeContent);
|
||||
j = q + (complexDoctype ? 2 : 1);
|
||||
} else {
|
||||
this.onError(XMLParserErrorCode.MalformedElement);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
const content = this._parseContent(s, j);
|
||||
if (content === null) {
|
||||
this.onError(XMLParserErrorCode.MalformedElement);
|
||||
return;
|
||||
}
|
||||
let isClosed = false;
|
||||
if (s.substring(j + content.parsed,
|
||||
j + content.parsed + 2) === '/>') {
|
||||
isClosed = true;
|
||||
} else if (s.substring(j + content.parsed,
|
||||
j + content.parsed + 1) !== '>') {
|
||||
this.onError(XMLParserErrorCode.UnterminatedElement);
|
||||
return;
|
||||
}
|
||||
this.onBeginElement(content.name, content.attributes, isClosed);
|
||||
j += content.parsed + (isClosed ? 2 : 1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
while (j < s.length && s[j] !== '<') {
|
||||
j++;
|
||||
}
|
||||
const text = s.substring(i, j);
|
||||
this.onText(this._resolveEntities(text));
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
onResolveEntity(name) {
|
||||
return `&${name};`;
|
||||
}
|
||||
|
||||
onPi(name, value) { }
|
||||
|
||||
onComment(text) { }
|
||||
|
||||
onCdata(text) { }
|
||||
|
||||
onDoctype(doctypeContent) { }
|
||||
|
||||
onText(text) { }
|
||||
|
||||
onBeginElement(name, attributes, isEmpty) { }
|
||||
|
||||
onEndElement(name) { }
|
||||
|
||||
onError(code) { }
|
||||
}
|
||||
|
||||
class SimpleDOMNode {
|
||||
constructor(nodeName, nodeValue) {
|
||||
this.nodeName = nodeName;
|
||||
this.nodeValue = nodeValue;
|
||||
|
||||
Object.defineProperty(this, 'parentNode', { value: null, writable: true, });
|
||||
}
|
||||
|
||||
get firstChild() {
|
||||
return this.childNodes[0];
|
||||
}
|
||||
|
||||
get nextSibling() {
|
||||
let index = this.parentNode.childNodes.indexOf(this);
|
||||
return this.parentNode.childNodes[index + 1];
|
||||
}
|
||||
|
||||
get textContent() {
|
||||
if (!this.childNodes) {
|
||||
return this.nodeValue || '';
|
||||
}
|
||||
return this.childNodes.map(function(child) {
|
||||
return child.textContent;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
hasChildNodes() {
|
||||
return this.childNodes && this.childNodes.length > 0;
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleXMLParser extends XMLParserBase {
|
||||
constructor() {
|
||||
super();
|
||||
this._currentFragment = null;
|
||||
this._stack = null;
|
||||
this._errorCode = XMLParserErrorCode.NoError;
|
||||
}
|
||||
|
||||
parseFromString(data) {
|
||||
this._currentFragment = [];
|
||||
this._stack = [];
|
||||
this._errorCode = XMLParserErrorCode.NoError;
|
||||
|
||||
this.parseXml(data);
|
||||
|
||||
if (this._errorCode !== XMLParserErrorCode.NoError) {
|
||||
return undefined; // return undefined on error
|
||||
}
|
||||
|
||||
// We should only have one root.
|
||||
const [documentElement] = this._currentFragment;
|
||||
return { documentElement, };
|
||||
}
|
||||
|
||||
onResolveEntity(name) {
|
||||
switch (name) {
|
||||
case 'apos':
|
||||
return '\'';
|
||||
}
|
||||
return super.onResolveEntity(name);
|
||||
}
|
||||
|
||||
onText(text) {
|
||||
if (isWhitespaceString(text)) {
|
||||
return;
|
||||
}
|
||||
const node = new SimpleDOMNode('#text', text);
|
||||
this._currentFragment.push(node);
|
||||
}
|
||||
|
||||
onCdata(text) {
|
||||
const node = new SimpleDOMNode('#text', text);
|
||||
this._currentFragment.push(node);
|
||||
}
|
||||
|
||||
onBeginElement(name, attributes, isEmpty) {
|
||||
const node = new SimpleDOMNode(name);
|
||||
node.childNodes = [];
|
||||
this._currentFragment.push(node);
|
||||
if (isEmpty) {
|
||||
return;
|
||||
}
|
||||
this._stack.push(this._currentFragment);
|
||||
this._currentFragment = node.childNodes;
|
||||
}
|
||||
|
||||
onEndElement(name) {
|
||||
this._currentFragment = this._stack.pop();
|
||||
const lastElement = this._currentFragment[this._currentFragment.length - 1];
|
||||
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
|
||||
lastElement.childNodes[i].parentNode = lastElement;
|
||||
}
|
||||
}
|
||||
|
||||
onError(code) {
|
||||
this._errorCode = code;
|
||||
}
|
||||
}
|
||||
|
||||
export {
|
||||
SimpleXMLParser,
|
||||
};
|
Loading…
Reference in New Issue
Block a user