0cc1735809
Wait for the completion of writing the generated SVG file before processing the next page. This is to enable the garbage collector to garbage-collect the (potentially large) SVG string before trying to allocate memory again for the next page. Note that since the PDF-to-SVG conversion is now sequential instead of parallel, the time to generate all pages increases. Test case: node --max_old_space_size=200 examples/node/pdf2svg.js /tmp/FatalProcessOutOfMemory.pdf Before this patch: - Node.js crashes due to OOM after processing 20 pages. After this patch: - Node.js is able to convert all 203 PDFs to SVG without crashing.
90 lines
2.7 KiB
JavaScript
90 lines
2.7 KiB
JavaScript
/* Any copyright is dedicated to the Public Domain.
|
|
* http://creativecommons.org/publicdomain/zero/1.0/ */
|
|
|
|
//
|
|
// Node tool to dump SVG output into a file.
|
|
//
|
|
|
|
var fs = require('fs');
|
|
|
|
// HACK few hacks to let PDF.js be loaded not as a module in global space.
|
|
require('./domstubs.js');
|
|
|
|
// Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
|
|
var pdfjsLib = require('pdfjs-dist');
|
|
|
|
// Loading file from file system into typed array
|
|
var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
|
|
var data = new Uint8Array(fs.readFileSync(pdfPath));
|
|
|
|
// Dumps svg outputs to a folder called svgdump
|
|
function writeToFile(svgdump, pageNum, callback) {
|
|
var name = getFileNameFromPath(pdfPath);
|
|
fs.mkdir('./svgdump/', function(err) {
|
|
if (!err || err.code === 'EEXIST') {
|
|
fs.writeFile('./svgdump/' + name + "-" + pageNum + '.svg', svgdump,
|
|
function(err) {
|
|
if (err) {
|
|
console.log('Error: ' + err);
|
|
} else {
|
|
console.log('Page: ' + pageNum);
|
|
}
|
|
callback();
|
|
});
|
|
} else {
|
|
callback();
|
|
}
|
|
});
|
|
}
|
|
|
|
// Get filename from the path
|
|
|
|
function getFileNameFromPath(path) {
|
|
var index = path.lastIndexOf('/');
|
|
var extIndex = path.lastIndexOf('.');
|
|
return path.substring(index, extIndex);
|
|
}
|
|
|
|
// Will be using promises to load document, pages and misc data instead of
|
|
// callback.
|
|
pdfjsLib.getDocument({
|
|
data: data,
|
|
// Try to export JPEG images directly if they don't need any further processing.
|
|
nativeImageDecoderSupport: pdfjsLib.NativeImageDecoding.DISPLAY
|
|
}).then(function (doc) {
|
|
var numPages = doc.numPages;
|
|
console.log('# Document Loaded');
|
|
console.log('Number of Pages: ' + numPages);
|
|
console.log();
|
|
|
|
var lastPromise = Promise.resolve(); // will be used to chain promises
|
|
var loadPage = function (pageNum) {
|
|
return doc.getPage(pageNum).then(function (page) {
|
|
console.log('# Page ' + pageNum);
|
|
var viewport = page.getViewport(1.0 /* scale */);
|
|
console.log('Size: ' + viewport.width + 'x' + viewport.height);
|
|
console.log();
|
|
|
|
return page.getOperatorList().then(function (opList) {
|
|
var svgGfx = new pdfjsLib.SVGGraphics(page.commonObjs, page.objs);
|
|
svgGfx.embedFonts = true;
|
|
return svgGfx.getSVG(opList, viewport).then(function (svg) {
|
|
var svgDump = svg.toString();
|
|
return new Promise(function(resolve) {
|
|
writeToFile(svgDump, pageNum, resolve);
|
|
});
|
|
});
|
|
});
|
|
})
|
|
};
|
|
|
|
for (var i = 1; i <= numPages; i++) {
|
|
lastPromise = lastPromise.then(loadPage.bind(null, i));
|
|
}
|
|
return lastPromise;
|
|
}).then(function () {
|
|
console.log('# End of Document');
|
|
}, function (err) {
|
|
console.error('Error: ' + err);
|
|
});
|