487a7ddc7d
Given that Node.js doesn't support Workers, general PDF.js performance will be worse when compared to browsers. In an attempt to improve at least memory usage a little bit, update the Node.js examples to release page resources once parsing is done for that page.
129 lines
3.8 KiB
JavaScript
129 lines
3.8 KiB
JavaScript
/* Any copyright is dedicated to the Public Domain.
|
|
* http://creativecommons.org/publicdomain/zero/1.0/ */
|
|
|
|
//
|
|
// Node tool to dump SVG output into a file.
|
|
//
|
|
|
|
const fs = require("fs");
|
|
const util = require("util");
|
|
const path = require("path");
|
|
const stream = require("stream");
|
|
|
|
// HACK few hacks to let PDF.js be loaded not as a module in global space.
|
|
require("./domstubs.js").setStubs(global);
|
|
|
|
// Run `gulp dist-install` to generate 'pdfjs-dist' npm package files.
|
|
const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.js");
|
|
|
|
// Some PDFs need external cmaps.
|
|
const CMAP_URL = "../../node_modules/pdfjs-dist/cmaps/";
|
|
const CMAP_PACKED = true;
|
|
|
|
// Loading file from file system into typed array
|
|
const pdfPath =
|
|
process.argv[2] || "../../web/compressed.tracemonkey-pldi-09.pdf";
|
|
const data = new Uint8Array(fs.readFileSync(pdfPath));
|
|
|
|
const outputDirectory = "./svgdump";
|
|
|
|
try {
|
|
// Note: This creates a directory only one level deep. If you want to create
|
|
// multiple subdirectories on the fly, use the mkdirp module from npm.
|
|
fs.mkdirSync(outputDirectory);
|
|
} catch (e) {
|
|
if (e.code !== "EEXIST") {
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
// Dumps svg outputs to a folder called svgdump
|
|
function getFilePathForPage(pageNum) {
|
|
const name = path.basename(pdfPath, path.extname(pdfPath));
|
|
return path.join(outputDirectory, `${name}-${pageNum}.svg`);
|
|
}
|
|
|
|
/**
|
|
* A readable stream which offers a stream representing the serialization of a
|
|
* given DOM element (as defined by domstubs.js).
|
|
*
|
|
* @param {object} options
|
|
* @param {DOMElement} options.svgElement The element to serialize
|
|
*/
|
|
function ReadableSVGStream(options) {
|
|
if (!(this instanceof ReadableSVGStream)) {
|
|
return new ReadableSVGStream(options);
|
|
}
|
|
stream.Readable.call(this, options);
|
|
this.serializer = options.svgElement.getSerializer();
|
|
}
|
|
util.inherits(ReadableSVGStream, stream.Readable);
|
|
// Implements https://nodejs.org/api/stream.html#stream_readable_read_size_1
|
|
ReadableSVGStream.prototype._read = function () {
|
|
let chunk;
|
|
while ((chunk = this.serializer.getNext()) !== null) {
|
|
if (!this.push(chunk)) {
|
|
return;
|
|
}
|
|
}
|
|
this.push(null);
|
|
};
|
|
|
|
// Streams the SVG element to the given file path.
|
|
function writeSvgToFile(svgElement, filePath) {
|
|
let readableSvgStream = new ReadableSVGStream({
|
|
svgElement,
|
|
});
|
|
const writableStream = fs.createWriteStream(filePath);
|
|
return new Promise(function (resolve, reject) {
|
|
readableSvgStream.once("error", reject);
|
|
writableStream.once("error", reject);
|
|
writableStream.once("finish", resolve);
|
|
readableSvgStream.pipe(writableStream);
|
|
}).catch(function (err) {
|
|
readableSvgStream = null; // Explicitly null because of v8 bug 6512.
|
|
writableStream.end();
|
|
throw err;
|
|
});
|
|
}
|
|
|
|
// Will be using async/await to load document, pages and misc data.
|
|
const loadingTask = pdfjsLib.getDocument({
|
|
data,
|
|
cMapUrl: CMAP_URL,
|
|
cMapPacked: CMAP_PACKED,
|
|
fontExtraProperties: true,
|
|
});
|
|
(async function () {
|
|
const doc = await loadingTask.promise;
|
|
const numPages = doc.numPages;
|
|
console.log("# Document Loaded");
|
|
console.log(`Number of Pages: ${numPages}`);
|
|
console.log();
|
|
|
|
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
|
|
try {
|
|
const page = await doc.getPage(pageNum);
|
|
console.log(`# Page ${pageNum}`);
|
|
const viewport = page.getViewport({ scale: 1.0 });
|
|
console.log(`Size: ${viewport.width}x${viewport.height}`);
|
|
console.log();
|
|
|
|
const opList = await page.getOperatorList();
|
|
const svgGfx = new pdfjsLib.SVGGraphics(
|
|
page.commonObjs,
|
|
page.objs,
|
|
/* forceDataSchema = */ true
|
|
);
|
|
svgGfx.embedFonts = true;
|
|
const svg = await svgGfx.getSVG(opList, viewport);
|
|
await writeSvgToFile(svg, getFilePathForPage(pageNum));
|
|
// Release page resources.
|
|
page.cleanup();
|
|
} catch (err) {
|
|
console.log(`Error: ${err}`);
|
|
}
|
|
}
|
|
console.log("# End of Document");
|
|
})();
|