diff --git a/examples/node/domstubs.js b/examples/node/domstubs.js index 23c90ef5a..ce3d19f22 100644 --- a/examples/node/domstubs.js +++ b/examples/node/domstubs.js @@ -91,30 +91,6 @@ DOMElement.prototype = { } }, - toString: function DOMElement_toString() { - var buf = []; - buf.push('<' + this.nodeName); - if (this.nodeName === 'svg:svg') { - buf.push(' xmlns:xlink="http://www.w3.org/1999/xlink"' + - ' xmlns:svg="http://www.w3.org/2000/svg"'); - } - for (var i in this.attributes) { - buf.push(' ' + i + '="' + xmlEncode(this.attributes[i]) + '"'); - } - - buf.push('>'); - - if (this.nodeName === 'svg:tspan' || this.nodeName === 'svg:style') { - buf.push(xmlEncode(this.textContent)); - } else { - this.childNodes.forEach(function(childNode) { - buf.push(childNode.toString()); - }); - } - buf.push(''); - return buf.join(''); - }, - cloneNode: function DOMElement_cloneNode() { var newNode = new DOMElement(this.nodeName); newNode.childNodes = this.childNodes; @@ -122,8 +98,95 @@ DOMElement.prototype = { newNode.textContent = this.textContent; return newNode; }, + + // This method is offered for convenience. It is recommended to directly use + // getSerializer because that allows you to process the chunks as they come + // instead of requiring the whole image to fit in memory. + toString: function DOMElement_toString() { + var buf = []; + var serializer = this.getSerializer(); + var chunk; + while ((chunk = serializer.getNext()) !== null) { + buf.push(chunk); + } + return buf.join(''); + }, + + getSerializer: function DOMElement_getSerializer() { + return new DOMElementSerializer(this); + } } +function DOMElementSerializer(node) { + this._node = node; + this._state = 0; + this._loopIndex = 0; + this._attributeKeys = null; + this._childSerializer = null; +} +DOMElementSerializer.prototype = { + /** + * Yields the next chunk in the serialization of the element. + * + * @returns {string|null} null if the element has fully been serialized. + */ + getNext: function DOMElementSerializer_getNext() { + var node = this._node; + switch (this._state) { + case 0: // Start opening tag. + ++this._state; + return '<' + node.nodeName; + case 1: // Add SVG namespace if this is the root element. + ++this._state; + if (node.nodeName === 'svg:svg') { + return ' xmlns:xlink="http://www.w3.org/1999/xlink"' + + ' xmlns:svg="http://www.w3.org/2000/svg"'; + } + case 2: // Initialize variables for looping over attributes. + ++this._state; + this._loopIndex = 0; + this._attributeKeys = Object.keys(node.attributes); + case 3: // Serialize any attributes and end opening tag. + if (this._loopIndex < this._attributeKeys.length) { + var name = this._attributeKeys[this._loopIndex++]; + return ' ' + name + '="' + xmlEncode(node.attributes[name]) + '"'; + } + ++this._state; + return '>'; + case 4: // Serialize textContent for tspan/style elements. + if (node.nodeName === 'svg:tspan' || node.nodeName === 'svg:style') { + this._state = 6; + return xmlEncode(node.textContent); + } + ++this._state; + this._loopIndex = 0; + case 5: // Serialize child nodes (only for non-tspan/style elements). + var value; + while (true) { + value = this._childSerializer && this._childSerializer.getNext(); + if (value !== null) { + return value; + } + var nextChild = node.childNodes[this._loopIndex++]; + if (nextChild) { + this._childSerializer = new DOMElementSerializer(nextChild); + } else { + this._childSerializer = null; + ++this._state; + break; + } + } + case 6: // Ending tag. + ++this._state; + return ''; + case 7: // Done. + return null; + default: + throw new Error('Unexpected serialization state: ' + this._state); + } + }, +}; + const document = { childNodes : [], diff --git a/examples/node/pdf2svg.js b/examples/node/pdf2svg.js index f891c9fb6..4c59cb78c 100644 --- a/examples/node/pdf2svg.js +++ b/examples/node/pdf2svg.js @@ -6,6 +6,9 @@ // var fs = require('fs'); +var util = require('util'); +var path = require('path'); +var stream = require('stream'); // HACK few hacks to let PDF.js be loaded not as a module in global space. require('./domstubs.js').setStubs(global); @@ -17,32 +20,66 @@ var pdfjsLib = require('pdfjs-dist'); var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf'; var data = new Uint8Array(fs.readFileSync(pdfPath)); -// Dumps svg outputs to a folder called svgdump -function writeToFile(svgdump, pageNum, callback) { - var name = getFileNameFromPath(pdfPath); - fs.mkdir('./svgdump/', function(err) { - if (!err || err.code === 'EEXIST') { - fs.writeFile('./svgdump/' + name + "-" + pageNum + '.svg', svgdump, - function(err) { - if (err) { - console.log('Error: ' + err); - } else { - console.log('Page: ' + pageNum); - } - callback(); - }); - } else { - callback(); - } - }); +var outputDirectory = './svgdump'; + +try { + // Note: This creates a directory only one level deep. If you want to create + // multiple subdirectories on the fly, use the mkdirp module from npm. + fs.mkdirSync(outputDirectory); +} catch (e) { + if (e.code !== 'EEXIST') { + throw e; + } } -// Get filename from the path +// Dumps svg outputs to a folder called svgdump +function getFilePathForPage(pageNum) { + var name = path.basename(pdfPath, path.extname(pdfPath)); + return path.join(outputDirectory, name + '-' + pageNum + '.svg'); +} -function getFileNameFromPath(path) { - var index = path.lastIndexOf('/'); - var extIndex = path.lastIndexOf('.'); - return path.substring(index, extIndex); +/** + * A readable stream which offers a stream representing the serialization of a + * given DOM element (as defined by domstubs.js). + * + * @param {object} options + * @param {DOMElement} options.svgElement The element to serialize + */ +function ReadableSVGStream(options) { + if (!(this instanceof ReadableSVGStream)) { + return new ReadableSVGStream(options); + } + stream.Readable.call(this, options); + this.serializer = options.svgElement.getSerializer(); +} +util.inherits(ReadableSVGStream, stream.Readable); +// Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1 +ReadableSVGStream.prototype._read = function() { + var chunk; + while ((chunk = this.serializer.getNext()) !== null) { + if (!this.push(chunk)) { + return; + } + } + this.push(null); +}; + +// Streams the SVG element to the given file path. +function writeSvgToFile(svgElement, filePath) { + var readableSvgStream = new ReadableSVGStream({ + svgElement: svgElement, + }); + var writableStream = fs.createWriteStream(filePath); + return new Promise(function(resolve, reject) { + readableSvgStream.once('error', reject); + writableStream.once('error', reject); + writableStream.once('finish', resolve); + readableSvgStream.pipe(writableStream); + }).catch(function(err) { + readableSvgStream = null; // Explicitly null because of v8 bug 6512. + writableStream.end(); + throw err; + }); } // Will be using promises to load document, pages and misc data instead of @@ -69,13 +106,14 @@ pdfjsLib.getDocument({ var svgGfx = new pdfjsLib.SVGGraphics(page.commonObjs, page.objs); svgGfx.embedFonts = true; return svgGfx.getSVG(opList, viewport).then(function (svg) { - var svgDump = svg.toString(); - return new Promise(function(resolve) { - writeToFile(svgDump, pageNum, resolve); + return writeSvgToFile(svg, getFilePathForPage(pageNum)).then(function () { + console.log('Page: ' + pageNum); + }, function(err) { + console.log('Error: ' + err); }); }); }); - }) + }); }; for (var i = 1; i <= numPages; i++) {