From 849d8cfa2440812e8deff61a93c72727e7d9e0d5 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Sun, 18 Jun 2017 12:57:17 +0200 Subject: [PATCH 1/2] Improve memory-efficiency of DOMElement_toString in domstubs Test case: Using the PDF file from https://github.com/mozilla/pdf.js/issues/8534 node --max_old_space_size=200 examples/node/pdf2svg.js /tmp/FatalProcessOutOfMemory.pdf Before this patch: Node.js crashes due to OOM after processing 10 pages. After this patch: Node.js crashes due to OOM after processing 19 pages. --- examples/node/domstubs.js | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/node/domstubs.js b/examples/node/domstubs.js index ce4dc7890..d746b0051 100644 --- a/examples/node/domstubs.js +++ b/examples/node/domstubs.js @@ -91,24 +91,27 @@ DOMElement.prototype = { }, toString: function DOMElement_toString() { - var attrList = []; - for (i in this.attributes) { - attrList.push(i + '="' + xmlEncode(this.attributes[i]) + '"'); + var buf = []; + buf.push('<' + this.nodeName); + if (this.nodeName === 'svg:svg') { + buf.push(' xmlns:xlink="http://www.w3.org/1999/xlink"' + + ' xmlns:svg="http://www.w3.org/2000/svg"'); + } + for (var i in this.attributes) { + buf.push(' ' + i + '="' + xmlEncode(this.attributes[i]) + '"'); } + buf.push('>'); + if (this.nodeName === 'svg:tspan' || this.nodeName === 'svg:style') { - var encText = xmlEncode(this.textContent); - return '<' + this.nodeName + ' ' + attrList.join(' ') + '>' + - encText + ''; - } else if (this.nodeName === 'svg:svg') { - var ns = 'xmlns:xlink="http://www.w3.org/1999/xlink" ' + - 'xmlns:svg="http://www.w3.org/2000/svg"' - return '<' + this.nodeName + ' ' + ns + ' ' + attrList.join(' ') + '>' + - this.childNodes.join('') + ''; + buf.push(xmlEncode(this.textContent)); } else { - return '<' + this.nodeName + ' ' + attrList.join(' ') + '>' + - this.childNodes.join('') + ''; + this.childNodes.forEach(function(childNode) { + buf.push(childNode.toString()); + }); } + buf.push(''); + return buf.join(''); }, cloneNode: function DOMElement_cloneNode() { From 0cc173580934c3420ab3f1bf5d2dbe67e8e4599c Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Sun, 18 Jun 2017 12:58:53 +0200 Subject: [PATCH 2/2] Reduce concurrent memory footprint of pdf2svg.js Wait for the completion of writing the generated SVG file before processing the next page. This is to enable the garbage collector to garbage-collect the (potentially large) SVG string before trying to allocate memory again for the next page. Note that since the PDF-to-SVG conversion is now sequential instead of parallel, the time to generate all pages increases. Test case: node --max_old_space_size=200 examples/node/pdf2svg.js /tmp/FatalProcessOutOfMemory.pdf Before this patch: - Node.js crashes due to OOM after processing 20 pages. After this patch: - Node.js is able to convert all 203 PDFs to SVG without crashing. --- examples/node/pdf2svg.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/node/pdf2svg.js b/examples/node/pdf2svg.js index 7edf5df32..de2b6c56b 100644 --- a/examples/node/pdf2svg.js +++ b/examples/node/pdf2svg.js @@ -18,7 +18,7 @@ var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf'; var data = new Uint8Array(fs.readFileSync(pdfPath)); // Dumps svg outputs to a folder called svgdump -function writeToFile(svgdump, pageNum) { +function writeToFile(svgdump, pageNum, callback) { var name = getFileNameFromPath(pdfPath); fs.mkdir('./svgdump/', function(err) { if (!err || err.code === 'EEXIST') { @@ -29,7 +29,10 @@ function writeToFile(svgdump, pageNum) { } else { console.log('Page: ' + pageNum); } + callback(); }); + } else { + callback(); } }); } @@ -67,7 +70,9 @@ pdfjsLib.getDocument({ svgGfx.embedFonts = true; return svgGfx.getSVG(opList, viewport).then(function (svg) { var svgDump = svg.toString(); - writeToFile(svgDump, pageNum); + return new Promise(function(resolve) { + writeToFile(svgDump, pageNum, resolve); + }); }); }); })