Merge pull request #8557 from Rob--W/svg-oom-streaming

pdf2svg.js: provides ReadableSVGStream class to serialize a SVG as a stream
This commit is contained in:
Tim van der Meij 2017-08-20 23:29:58 +02:00 committed by GitHub
commit 7e4c69eccf
2 changed files with 152 additions and 51 deletions

View File

@ -91,30 +91,6 @@ DOMElement.prototype = {
}
},
toString: function DOMElement_toString() {
var buf = [];
buf.push('<' + this.nodeName);
if (this.nodeName === 'svg:svg') {
buf.push(' xmlns:xlink="http://www.w3.org/1999/xlink"' +
' xmlns:svg="http://www.w3.org/2000/svg"');
}
for (var i in this.attributes) {
buf.push(' ' + i + '="' + xmlEncode(this.attributes[i]) + '"');
}
buf.push('>');
if (this.nodeName === 'svg:tspan' || this.nodeName === 'svg:style') {
buf.push(xmlEncode(this.textContent));
} else {
this.childNodes.forEach(function(childNode) {
buf.push(childNode.toString());
});
}
buf.push('</' + this.nodeName + '>');
return buf.join('');
},
cloneNode: function DOMElement_cloneNode() {
var newNode = new DOMElement(this.nodeName);
newNode.childNodes = this.childNodes;
@ -122,8 +98,95 @@ DOMElement.prototype = {
newNode.textContent = this.textContent;
return newNode;
},
// This method is offered for convenience. It is recommended to directly use
// getSerializer because that allows you to process the chunks as they come
// instead of requiring the whole image to fit in memory.
toString: function DOMElement_toString() {
var buf = [];
var serializer = this.getSerializer();
var chunk;
while ((chunk = serializer.getNext()) !== null) {
buf.push(chunk);
}
return buf.join('');
},
getSerializer: function DOMElement_getSerializer() {
return new DOMElementSerializer(this);
}
}
function DOMElementSerializer(node) {
this._node = node;
this._state = 0;
this._loopIndex = 0;
this._attributeKeys = null;
this._childSerializer = null;
}
DOMElementSerializer.prototype = {
/**
* Yields the next chunk in the serialization of the element.
*
* @returns {string|null} null if the element has fully been serialized.
*/
getNext: function DOMElementSerializer_getNext() {
var node = this._node;
switch (this._state) {
case 0: // Start opening tag.
++this._state;
return '<' + node.nodeName;
case 1: // Add SVG namespace if this is the root element.
++this._state;
if (node.nodeName === 'svg:svg') {
return ' xmlns:xlink="http://www.w3.org/1999/xlink"' +
' xmlns:svg="http://www.w3.org/2000/svg"';
}
case 2: // Initialize variables for looping over attributes.
++this._state;
this._loopIndex = 0;
this._attributeKeys = Object.keys(node.attributes);
case 3: // Serialize any attributes and end opening tag.
if (this._loopIndex < this._attributeKeys.length) {
var name = this._attributeKeys[this._loopIndex++];
return ' ' + name + '="' + xmlEncode(node.attributes[name]) + '"';
}
++this._state;
return '>';
case 4: // Serialize textContent for tspan/style elements.
if (node.nodeName === 'svg:tspan' || node.nodeName === 'svg:style') {
this._state = 6;
return xmlEncode(node.textContent);
}
++this._state;
this._loopIndex = 0;
case 5: // Serialize child nodes (only for non-tspan/style elements).
var value;
while (true) {
value = this._childSerializer && this._childSerializer.getNext();
if (value !== null) {
return value;
}
var nextChild = node.childNodes[this._loopIndex++];
if (nextChild) {
this._childSerializer = new DOMElementSerializer(nextChild);
} else {
this._childSerializer = null;
++this._state;
break;
}
}
case 6: // Ending tag.
++this._state;
return '</' + node.nodeName + '>';
case 7: // Done.
return null;
default:
throw new Error('Unexpected serialization state: ' + this._state);
}
},
};
const document = {
childNodes : [],

View File

@ -6,6 +6,9 @@
//
var fs = require('fs');
var util = require('util');
var path = require('path');
var stream = require('stream');
// HACK few hacks to let PDF.js be loaded not as a module in global space.
require('./domstubs.js').setStubs(global);
@ -17,32 +20,66 @@ var pdfjsLib = require('pdfjs-dist');
var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
var data = new Uint8Array(fs.readFileSync(pdfPath));
// Dumps svg outputs to a folder called svgdump
function writeToFile(svgdump, pageNum, callback) {
var name = getFileNameFromPath(pdfPath);
fs.mkdir('./svgdump/', function(err) {
if (!err || err.code === 'EEXIST') {
fs.writeFile('./svgdump/' + name + "-" + pageNum + '.svg', svgdump,
function(err) {
if (err) {
console.log('Error: ' + err);
} else {
console.log('Page: ' + pageNum);
}
callback();
});
} else {
callback();
}
});
var outputDirectory = './svgdump';
try {
// Note: This creates a directory only one level deep. If you want to create
// multiple subdirectories on the fly, use the mkdirp module from npm.
fs.mkdirSync(outputDirectory);
} catch (e) {
if (e.code !== 'EEXIST') {
throw e;
}
}
// Get filename from the path
// Dumps svg outputs to a folder called svgdump
function getFilePathForPage(pageNum) {
var name = path.basename(pdfPath, path.extname(pdfPath));
return path.join(outputDirectory, name + '-' + pageNum + '.svg');
}
function getFileNameFromPath(path) {
var index = path.lastIndexOf('/');
var extIndex = path.lastIndexOf('.');
return path.substring(index, extIndex);
/**
* A readable stream which offers a stream representing the serialization of a
* given DOM element (as defined by domstubs.js).
*
* @param {object} options
* @param {DOMElement} options.svgElement The element to serialize
*/
function ReadableSVGStream(options) {
if (!(this instanceof ReadableSVGStream)) {
return new ReadableSVGStream(options);
}
stream.Readable.call(this, options);
this.serializer = options.svgElement.getSerializer();
}
util.inherits(ReadableSVGStream, stream.Readable);
// Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1
ReadableSVGStream.prototype._read = function() {
var chunk;
while ((chunk = this.serializer.getNext()) !== null) {
if (!this.push(chunk)) {
return;
}
}
this.push(null);
};
// Streams the SVG element to the given file path.
function writeSvgToFile(svgElement, filePath) {
var readableSvgStream = new ReadableSVGStream({
svgElement: svgElement,
});
var writableStream = fs.createWriteStream(filePath);
return new Promise(function(resolve, reject) {
readableSvgStream.once('error', reject);
writableStream.once('error', reject);
writableStream.once('finish', resolve);
readableSvgStream.pipe(writableStream);
}).catch(function(err) {
readableSvgStream = null; // Explicitly null because of v8 bug 6512.
writableStream.end();
throw err;
});
}
// Will be using promises to load document, pages and misc data instead of
@ -69,13 +106,14 @@ pdfjsLib.getDocument({
var svgGfx = new pdfjsLib.SVGGraphics(page.commonObjs, page.objs);
svgGfx.embedFonts = true;
return svgGfx.getSVG(opList, viewport).then(function (svg) {
var svgDump = svg.toString();
return new Promise(function(resolve) {
writeToFile(svgDump, pageNum, resolve);
return writeSvgToFile(svg, getFilePathForPage(pageNum)).then(function () {
console.log('Page: ' + pageNum);
}, function(err) {
console.log('Error: ' + err);
});
});
});
})
});
};
for (var i = 1; i <= numPages; i++) {