Reduce concurrent memory footprint of pdf2svg.js

Wait for the completion of writing the generated SVG file before
processing the next page. This is to enable the garbage collector to
garbage-collect the (potentially large) SVG string before trying to
allocate memory again for the next page.

Note that since the PDF-to-SVG conversion is now sequential instead of
parallel, the time to generate all pages increases.

Test case:
node --max_old_space_size=200 examples/node/pdf2svg.js /tmp/FatalProcessOutOfMemory.pdf

Before this patch:
- Node.js crashes due to OOM after processing 20 pages.

After this patch:
- Node.js is able to convert all 203 PDFs to SVG without crashing.
This commit is contained in:
Rob Wu 2017-06-18 12:58:53 +02:00
parent 849d8cfa24
commit 0cc1735809

View File

@ -18,7 +18,7 @@ var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
var data = new Uint8Array(fs.readFileSync(pdfPath));
// Dumps svg outputs to a folder called svgdump
function writeToFile(svgdump, pageNum) {
function writeToFile(svgdump, pageNum, callback) {
var name = getFileNameFromPath(pdfPath);
fs.mkdir('./svgdump/', function(err) {
if (!err || err.code === 'EEXIST') {
@ -29,7 +29,10 @@ function writeToFile(svgdump, pageNum) {
} else {
console.log('Page: ' + pageNum);
}
callback();
});
} else {
callback();
}
});
}
@ -67,7 +70,9 @@ pdfjsLib.getDocument({
svgGfx.embedFonts = true;
return svgGfx.getSVG(opList, viewport).then(function (svg) {
var svgDump = svg.toString();
writeToFile(svgDump, pageNum);
return new Promise(function(resolve) {
writeToFile(svgDump, pageNum, resolve);
});
});
});
})