Merge pull request #8557 from Rob--W/svg-oom-streaming
pdf2svg.js: provides ReadableSVGStream class to serialize a SVG as a stream
This commit is contained in:
commit
7e4c69eccf
@ -91,30 +91,6 @@ DOMElement.prototype = {
|
||||
}
|
||||
},
|
||||
|
||||
toString: function DOMElement_toString() {
|
||||
var buf = [];
|
||||
buf.push('<' + this.nodeName);
|
||||
if (this.nodeName === 'svg:svg') {
|
||||
buf.push(' xmlns:xlink="http://www.w3.org/1999/xlink"' +
|
||||
' xmlns:svg="http://www.w3.org/2000/svg"');
|
||||
}
|
||||
for (var i in this.attributes) {
|
||||
buf.push(' ' + i + '="' + xmlEncode(this.attributes[i]) + '"');
|
||||
}
|
||||
|
||||
buf.push('>');
|
||||
|
||||
if (this.nodeName === 'svg:tspan' || this.nodeName === 'svg:style') {
|
||||
buf.push(xmlEncode(this.textContent));
|
||||
} else {
|
||||
this.childNodes.forEach(function(childNode) {
|
||||
buf.push(childNode.toString());
|
||||
});
|
||||
}
|
||||
buf.push('</' + this.nodeName + '>');
|
||||
return buf.join('');
|
||||
},
|
||||
|
||||
cloneNode: function DOMElement_cloneNode() {
|
||||
var newNode = new DOMElement(this.nodeName);
|
||||
newNode.childNodes = this.childNodes;
|
||||
@ -122,8 +98,95 @@ DOMElement.prototype = {
|
||||
newNode.textContent = this.textContent;
|
||||
return newNode;
|
||||
},
|
||||
|
||||
// This method is offered for convenience. It is recommended to directly use
|
||||
// getSerializer because that allows you to process the chunks as they come
|
||||
// instead of requiring the whole image to fit in memory.
|
||||
toString: function DOMElement_toString() {
|
||||
var buf = [];
|
||||
var serializer = this.getSerializer();
|
||||
var chunk;
|
||||
while ((chunk = serializer.getNext()) !== null) {
|
||||
buf.push(chunk);
|
||||
}
|
||||
return buf.join('');
|
||||
},
|
||||
|
||||
getSerializer: function DOMElement_getSerializer() {
|
||||
return new DOMElementSerializer(this);
|
||||
}
|
||||
}
|
||||
|
||||
function DOMElementSerializer(node) {
|
||||
this._node = node;
|
||||
this._state = 0;
|
||||
this._loopIndex = 0;
|
||||
this._attributeKeys = null;
|
||||
this._childSerializer = null;
|
||||
}
|
||||
DOMElementSerializer.prototype = {
|
||||
/**
|
||||
* Yields the next chunk in the serialization of the element.
|
||||
*
|
||||
* @returns {string|null} null if the element has fully been serialized.
|
||||
*/
|
||||
getNext: function DOMElementSerializer_getNext() {
|
||||
var node = this._node;
|
||||
switch (this._state) {
|
||||
case 0: // Start opening tag.
|
||||
++this._state;
|
||||
return '<' + node.nodeName;
|
||||
case 1: // Add SVG namespace if this is the root element.
|
||||
++this._state;
|
||||
if (node.nodeName === 'svg:svg') {
|
||||
return ' xmlns:xlink="http://www.w3.org/1999/xlink"' +
|
||||
' xmlns:svg="http://www.w3.org/2000/svg"';
|
||||
}
|
||||
case 2: // Initialize variables for looping over attributes.
|
||||
++this._state;
|
||||
this._loopIndex = 0;
|
||||
this._attributeKeys = Object.keys(node.attributes);
|
||||
case 3: // Serialize any attributes and end opening tag.
|
||||
if (this._loopIndex < this._attributeKeys.length) {
|
||||
var name = this._attributeKeys[this._loopIndex++];
|
||||
return ' ' + name + '="' + xmlEncode(node.attributes[name]) + '"';
|
||||
}
|
||||
++this._state;
|
||||
return '>';
|
||||
case 4: // Serialize textContent for tspan/style elements.
|
||||
if (node.nodeName === 'svg:tspan' || node.nodeName === 'svg:style') {
|
||||
this._state = 6;
|
||||
return xmlEncode(node.textContent);
|
||||
}
|
||||
++this._state;
|
||||
this._loopIndex = 0;
|
||||
case 5: // Serialize child nodes (only for non-tspan/style elements).
|
||||
var value;
|
||||
while (true) {
|
||||
value = this._childSerializer && this._childSerializer.getNext();
|
||||
if (value !== null) {
|
||||
return value;
|
||||
}
|
||||
var nextChild = node.childNodes[this._loopIndex++];
|
||||
if (nextChild) {
|
||||
this._childSerializer = new DOMElementSerializer(nextChild);
|
||||
} else {
|
||||
this._childSerializer = null;
|
||||
++this._state;
|
||||
break;
|
||||
}
|
||||
}
|
||||
case 6: // Ending tag.
|
||||
++this._state;
|
||||
return '</' + node.nodeName + '>';
|
||||
case 7: // Done.
|
||||
return null;
|
||||
default:
|
||||
throw new Error('Unexpected serialization state: ' + this._state);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
const document = {
|
||||
childNodes : [],
|
||||
|
||||
|
@ -6,6 +6,9 @@
|
||||
//
|
||||
|
||||
var fs = require('fs');
|
||||
var util = require('util');
|
||||
var path = require('path');
|
||||
var stream = require('stream');
|
||||
|
||||
// HACK few hacks to let PDF.js be loaded not as a module in global space.
|
||||
require('./domstubs.js').setStubs(global);
|
||||
@ -17,32 +20,66 @@ var pdfjsLib = require('pdfjs-dist');
|
||||
var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
|
||||
var data = new Uint8Array(fs.readFileSync(pdfPath));
|
||||
|
||||
// Dumps svg outputs to a folder called svgdump
|
||||
function writeToFile(svgdump, pageNum, callback) {
|
||||
var name = getFileNameFromPath(pdfPath);
|
||||
fs.mkdir('./svgdump/', function(err) {
|
||||
if (!err || err.code === 'EEXIST') {
|
||||
fs.writeFile('./svgdump/' + name + "-" + pageNum + '.svg', svgdump,
|
||||
function(err) {
|
||||
if (err) {
|
||||
console.log('Error: ' + err);
|
||||
} else {
|
||||
console.log('Page: ' + pageNum);
|
||||
}
|
||||
callback();
|
||||
});
|
||||
} else {
|
||||
callback();
|
||||
}
|
||||
});
|
||||
var outputDirectory = './svgdump';
|
||||
|
||||
try {
|
||||
// Note: This creates a directory only one level deep. If you want to create
|
||||
// multiple subdirectories on the fly, use the mkdirp module from npm.
|
||||
fs.mkdirSync(outputDirectory);
|
||||
} catch (e) {
|
||||
if (e.code !== 'EEXIST') {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// Get filename from the path
|
||||
// Dumps svg outputs to a folder called svgdump
|
||||
function getFilePathForPage(pageNum) {
|
||||
var name = path.basename(pdfPath, path.extname(pdfPath));
|
||||
return path.join(outputDirectory, name + '-' + pageNum + '.svg');
|
||||
}
|
||||
|
||||
function getFileNameFromPath(path) {
|
||||
var index = path.lastIndexOf('/');
|
||||
var extIndex = path.lastIndexOf('.');
|
||||
return path.substring(index, extIndex);
|
||||
/**
|
||||
* A readable stream which offers a stream representing the serialization of a
|
||||
* given DOM element (as defined by domstubs.js).
|
||||
*
|
||||
* @param {object} options
|
||||
* @param {DOMElement} options.svgElement The element to serialize
|
||||
*/
|
||||
function ReadableSVGStream(options) {
|
||||
if (!(this instanceof ReadableSVGStream)) {
|
||||
return new ReadableSVGStream(options);
|
||||
}
|
||||
stream.Readable.call(this, options);
|
||||
this.serializer = options.svgElement.getSerializer();
|
||||
}
|
||||
util.inherits(ReadableSVGStream, stream.Readable);
|
||||
// Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1
|
||||
ReadableSVGStream.prototype._read = function() {
|
||||
var chunk;
|
||||
while ((chunk = this.serializer.getNext()) !== null) {
|
||||
if (!this.push(chunk)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
this.push(null);
|
||||
};
|
||||
|
||||
// Streams the SVG element to the given file path.
|
||||
function writeSvgToFile(svgElement, filePath) {
|
||||
var readableSvgStream = new ReadableSVGStream({
|
||||
svgElement: svgElement,
|
||||
});
|
||||
var writableStream = fs.createWriteStream(filePath);
|
||||
return new Promise(function(resolve, reject) {
|
||||
readableSvgStream.once('error', reject);
|
||||
writableStream.once('error', reject);
|
||||
writableStream.once('finish', resolve);
|
||||
readableSvgStream.pipe(writableStream);
|
||||
}).catch(function(err) {
|
||||
readableSvgStream = null; // Explicitly null because of v8 bug 6512.
|
||||
writableStream.end();
|
||||
throw err;
|
||||
});
|
||||
}
|
||||
|
||||
// Will be using promises to load document, pages and misc data instead of
|
||||
@ -69,13 +106,14 @@ pdfjsLib.getDocument({
|
||||
var svgGfx = new pdfjsLib.SVGGraphics(page.commonObjs, page.objs);
|
||||
svgGfx.embedFonts = true;
|
||||
return svgGfx.getSVG(opList, viewport).then(function (svg) {
|
||||
var svgDump = svg.toString();
|
||||
return new Promise(function(resolve) {
|
||||
writeToFile(svgDump, pageNum, resolve);
|
||||
return writeSvgToFile(svg, getFilePathForPage(pageNum)).then(function () {
|
||||
console.log('Page: ' + pageNum);
|
||||
}, function(err) {
|
||||
console.log('Error: ' + err);
|
||||
});
|
||||
});
|
||||
});
|
||||
})
|
||||
});
|
||||
};
|
||||
|
||||
for (var i = 1; i <= numPages; i++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user