diff --git a/examples/node/domparsermock.js b/examples/node/domparsermock.js new file mode 100644 index 000000000..cf797c7a8 --- /dev/null +++ b/examples/node/domparsermock.js @@ -0,0 +1,107 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +// Dummy XML Parser + +function DOMNodeMock(nodeName, nodeValue) { + this.nodeName = nodeName; + this.nodeValue = nodeValue; + Object.defineProperty(this, 'parentNode', {value: null, writable: true}); +} +DOMNodeMock.prototype = { + get firstChild() { + return this.childNodes[0]; + }, + get nextSibling() { + var index = this.parentNode.childNodes.indexOf(this); + return this.parentNode.childNodes[index + 1]; + }, + get textContent() { + if (!this.childNodes) { + return this.nodeValue || ''; + } + return this.childNodes.map(function (child) { + return child.textContent; + }).join(''); + }, + hasChildNodes: function () { + return this.childNodes && this.childNodes.length > 0; + } +}; + +function decodeXML(text) { + if (text.indexOf('&') < 0) { + return text; + } + return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi, function (all, entityName, number) { + if (number) { + return String.fromCharCode(number[0] === 'x' ? parseInt(number.substring(1), 16) : +number); + } + switch (entityName) { + case 'amp': + return '&'; + case 'lt': + return '<'; + case 'gt': + return '>'; + case 'quot': + return '\"'; + case 'apos': + return '\''; + } + return '&' + entityName + ';'; + }); +} + +function DOMParserMock() {}; +DOMParserMock.prototype = { + parseFromString: function (content) { + content = content.replace(/<\?[\s\S]*?\?>|/g, '').trim(); + var nodes = []; + content = content.replace(/>([\s\S]+?)<'; // ignoring whitespaces + } + return '>' + i + ',<'; + }); + content = content.replace(//g, function (all, text) { + var i = nodes.length; + var node = new DOMNodeMock('#text', text); + nodes.push(node); + return i + ','; + }); + var lastLength; + do { + lastLength = nodes.length; + content = content.replace(/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g, + function (all, name, attrs, content) { + var i = nodes.length; + var node = new DOMNodeMock(name); + var children = []; + if (content) { + content = content.split(','); + content.pop(); + content.forEach(function (child) { + var childNode = nodes[+child]; + childNode.parentNode = node; + children.push(childNode); + }) + } + node.childNodes = children; + nodes.push(node); + return i + ','; + + }); + } while(lastLength < nodes.length); + return { + documentElement: nodes.pop() + }; + } +}; + +exports.DOMParserMock = DOMParserMock; diff --git a/examples/node/getinfo.js b/examples/node/getinfo.js new file mode 100644 index 000000000..ae45db94a --- /dev/null +++ b/examples/node/getinfo.js @@ -0,0 +1,76 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +// +// Basic node example that prints document metadata and text content. +// Requires single file built version of PDF.js -- please run +// `node make singlefile` before running the example. +// + +var fs = require('fs'); + +// HACK few hacks to let PDF.js be loaded not as a module in global space. +global.window = global; +global.navigator = { userAgent: "node" }; +global.PDFJS = {}; +global.DOMParser = require('./domparsermock.js').DOMParserMock; + +require('../../build/singlefile/build/pdf.combined.js'); + +// Loading file from file system into typed array +var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf'; +var data = new Uint8Array(fs.readFileSync(pdfPath)); + +// Will be using promises to load document, pages and misc data instead of +// callback. +PDFJS.getDocument(data).then(function (doc) { + var numPages = doc.numPages; + console.log('# Document Loaded'); + console.log('Number of Pages: ' + numPages); + console.log(); + + var lastPromise; // will be used to chain promises + lastPromise = doc.getMetadata().then(function (data) { + console.log('# Metadata Is Loaded'); + console.log('## Info'); + console.log(JSON.stringify(data.info, null, 2)); + console.log(); + if (data.metadata) { + console.log('## Metadata'); + console.log(JSON.stringify(data.metadata.metadata, null, 2)); + console.log(); + } + }); + + var loadPage = function (pageNum) { + return doc.getPage(pageNum).then(function (page) { + console.log('# Page ' + pageNum); + var viewport = page.getViewport(1.0 /* scale */); + console.log('Size: ' + viewport.width + 'x' + viewport.height); + console.log(); + return page.getTextContent().then(function (content) { + // Content contains lots of information about the text layout and + // styles, but we need only strings at the moment + var strings = content.items.map(function (item) { + return item.str; + }); + console.log('## Text Content'); + console.log(strings.join(' ')); + }).then(function () { + console.log(); + }); + }) + }; + // Loading of the first page will wait on metadata and subsequent loadings + // will wait on the previous pages. + for (var i = 1; i <= numPages; i++) { + lastPromise = lastPromise.then(loadPage.bind(null, i)); + } + return lastPromise; +}).then(function () { + console.log('# End of Document'); +}, function (err) { + console.error('Error: ' + err); +});