pdf.js/examples/node/pdf2svg.js
Jonas Jenwald 487a7ddc7d Update (primarily) the Node.js examples to release page resources
Given that Node.js doesn't support Workers, general PDF.js performance will be worse when compared to browsers. In an attempt to improve at least memory usage a little bit, update the Node.js examples to release page resources once parsing is done for that page.
2021-11-30 13:11:50 +01:00

129 lines
3.8 KiB
JavaScript

/* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/publicdomain/zero/1.0/ */
//
// Node tool to dump SVG output into a file.
//
const fs = require("fs");
const util = require("util");
const path = require("path");
const stream = require("stream");
// HACK few hacks to let PDF.js be loaded not as a module in global space.
require("./domstubs.js").setStubs(global);
// Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.js");
// Some PDFs need external cmaps.
const CMAP_URL = "../../node_modules/pdfjs-dist/cmaps/";
const CMAP_PACKED = true;
// Loading file from file system into typed array
const pdfPath =
process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf";
const data = new Uint8Array(fs.readFileSync(pdfPath));
const outputDirectory = "./svgdump";
try {
// Note: This creates a directory only one level deep. If you want to create
// multiple subdirectories on the fly, use the mkdirp module from npm.
fs.mkdirSync(outputDirectory);
} catch (e) {
if (e.code !== "EEXIST") {
throw e;
}
}
// Dumps svg outputs to a folder called svgdump
function getFilePathForPage(pageNum) {
const name = path.basename(pdfPath, path.extname(pdfPath));
return path.join(outputDirectory, `${name}-${pageNum}.svg`);
}
/**
* A readable stream which offers a stream representing the serialization of a
* given DOM element (as defined by domstubs.js).
*
* @param {object} options
* @param {DOMElement} options.svgElement The element to serialize
*/
function ReadableSVGStream(options) {
if (!(this instanceof ReadableSVGStream)) {
return new ReadableSVGStream(options);
}
stream.Readable.call(this, options);
this.serializer = options.svgElement.getSerializer();
}
util.inherits(ReadableSVGStream, stream.Readable);
// Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1
ReadableSVGStream.prototype._read = function () {
let chunk;
while ((chunk = this.serializer.getNext()) !== null) {
if (!this.push(chunk)) {
return;
}
}
this.push(null);
};
// Streams the SVG element to the given file path.
function writeSvgToFile(svgElement, filePath) {
let readableSvgStream = new ReadableSVGStream({
svgElement,
});
const writableStream = fs.createWriteStream(filePath);
return new Promise(function (resolve, reject) {
readableSvgStream.once("error", reject);
writableStream.once("error", reject);
writableStream.once("finish", resolve);
readableSvgStream.pipe(writableStream);
}).catch(function (err) {
readableSvgStream = null; // Explicitly null because of v8 bug 6512.
writableStream.end();
throw err;
});
}
// Will be using async/await to load document, pages and misc data.
const loadingTask = pdfjsLib.getDocument({
data,
cMapUrl: CMAP_URL,
cMapPacked: CMAP_PACKED,
fontExtraProperties: true,
});
(async function () {
const doc = await loadingTask.promise;
const numPages = doc.numPages;
console.log("# Document Loaded");
console.log(`Number of Pages: ${numPages}`);
console.log();
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
try {
const page = await doc.getPage(pageNum);
console.log(`# Page ${pageNum}`);
const viewport = page.getViewport({ scale: 1.0 });
console.log(`Size: ${viewport.width}x${viewport.height}`);
console.log();
const opList = await page.getOperatorList();
const svgGfx = new pdfjsLib.SVGGraphics(
page.commonObjs,
page.objs,
/* forceDataSchema = */ true
);
svgGfx.embedFonts = true;
const svg = await svgGfx.getSVG(opList, viewport);
await writeSvgToFile(svg, getFilePathForPage(pageNum));
// Release page resources.
page.cleanup();
} catch (err) {
console.log(`Error: ${err}`);
}
}
console.log("# End of Document");
})();