diff --git a/Makefile b/Makefile index 62565670a..3cc423350 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,7 @@ PDF_JS_FILES = \ ../external/jpgjs/jpg.js \ jpx.js \ bidi.js \ + metadata.js \ $(NULL) # make server diff --git a/make.js b/make.js index 33771aeb7..19dea7b82 100755 --- a/make.js +++ b/make.js @@ -97,7 +97,8 @@ target.bundle = function() { 'worker.js', '../external/jpgjs/jpg.js', 'jpx.js', - 'bidi.js']; + 'bidi.js', + 'metadata.js']; if (!exists(BUILD_DIR)) mkdir(BUILD_DIR); diff --git a/src/core.js b/src/core.js index 9c671960e..3010e9f6b 100644 --- a/src/core.js +++ b/src/core.js @@ -587,14 +587,6 @@ var PDFDocModel = (function PDFDocModelClosure() { this.mainXRefEntriesOffset); this.xref = xref; this.catalog = new Catalog(xref); - if (xref.trailer && xref.trailer.has('ID')) { - var fileID = ''; - var id = xref.fetchIfRef(xref.trailer.get('ID'))[0]; - id.split('').forEach(function(el) { - fileID += Number(el.charCodeAt(0)).toString(16); - }); - this.fileID = fileID; - } }, get numPages() { var linearization = this.linearization; @@ -602,21 +594,33 @@ var PDFDocModel = (function PDFDocModelClosure() { // shadow the prototype getter return shadow(this, 'numPages', num); }, + getDocumentInfo: function pdfDocGetDocumentInfo() { + var info; + if (this.xref.trailer.has('Info')) + info = this.xref.fetch(this.xref.trailer.get('Info')); + + return shadow(this, 'getDocumentInfo', info); + }, getFingerprint: function pdfDocGetFingerprint() { - if (this.fileID) { - return this.fileID; + var xref = this.xref, fileID; + if (xref.trailer.has('ID')) { + fileID = ''; + var id = xref.fetchIfRef(xref.trailer.get('ID'))[0]; + id.split('').forEach(function(el) { + fileID += Number(el.charCodeAt(0)).toString(16); + }); } else { // If we got no fileID, then we generate one, // from the first 100 bytes of PDF var data = this.stream.bytes.subarray(0, 100); var hash = calculateMD5(data, 0, data.length); - var strHash = ''; + fileID = ''; for (var i = 0, length = hash.length; i < length; i++) { - strHash += Number(hash[i]).toString(16); + fileID += Number(hash[i]).toString(16); } - - return strHash; } + + return shadow(this, 'getFingerprint', fileID); }, getPage: function pdfDocGetPage(n) { return this.catalog.getPage(n); @@ -645,6 +649,7 @@ var PDFDoc = (function PDFDocClosure() { this.stream = stream; this.pdfModel = new PDFDocModel(stream); this.fingerprint = this.pdfModel.getFingerprint(); + this.info = this.pdfModel.getDocumentInfo(); this.catalog = this.pdfModel.catalog; this.objs = new PDFObjects(); diff --git a/src/metadata.js b/src/metadata.js new file mode 100644 index 000000000..7f3f24a86 --- /dev/null +++ b/src/metadata.js @@ -0,0 +1,66 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ + +'use strict'; + +var Metadata = PDFJS.Metadata = (function MetadataClosure() { + function Metadata(meta) { + if (typeof meta === 'string') { + var parser = new DOMParser(); + meta = parser.parseFromString(meta, 'application/xml'); + } else if (!(meta instanceof Document)) { + error('Metadata: Invalid metadata object'); + } + + this.metaDocument = meta; + this.metadata = {}; + this.parse(); + } + + Metadata.prototype = { + parse: function() { + var doc = this.metaDocument; + var rdf = doc.documentElement; + + if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in + rdf = rdf.firstChild; + while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') + rdf = rdf.nextSibling; + } + + var nodeName = (rdf) ? rdf.nodeName.toLowerCase() : null; + if (!rdf || nodeName !== 'rdf:rdf' || !rdf.hasChildNodes()) + return; + + var childNodes = rdf.childNodes, desc, namespace, entries, entry; + + for (var i = 0, length = childNodes.length; i < length; i++) { + desc = childNodes[i]; + if (desc.nodeName.toLowerCase() !== 'rdf:description') + continue; + + entries = []; + for (var ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) { + if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text') + entries.push(desc.childNodes[ii]); + } + + for (ii = 0, iLength = entries.length; ii < iLength; ii++) { + var entry = entries[ii]; + var name = entry.nodeName.toLowerCase(); + this.metadata[name] = entry.textContent.trim(); + } + } + }, + + get: function(name) { + return this.metadata[name] || null; + }, + + has: function(name) { + return typeof this.metadata[name] !== 'undefined'; + } + }; + + return Metadata; +})(); diff --git a/src/obj.js b/src/obj.js index 3c649fb06..2eb9c6f1d 100644 --- a/src/obj.js +++ b/src/obj.js @@ -111,6 +111,22 @@ var Catalog = (function CatalogClosure() { } Catalog.prototype = { + get metadata() { + var ref = this.catDict.get('Metadata'); + var stream = this.xref.fetchIfRef(ref); + var metadata; + if (stream && isDict(stream.dict)) { + var type = stream.dict.get('Type'); + var subtype = stream.dict.get('Subtype'); + + if (isName(type) && isName(subtype) && + type.name === 'Metadata' && subtype.name === 'XML') { + metadata = stringToPDFString(bytesToString(stream.getBytes())); + } + } + + return shadow(this, 'metadata', metadata); + }, get toplevelPagesDict() { var pagesObj = this.catDict.get('Pages'); assertWellFormed(isRef(pagesObj), 'invalid top-level pages reference'); diff --git a/web/viewer.html b/web/viewer.html index 34b2e77cb..2806d3a7e 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -11,6 +11,7 @@ + diff --git a/web/viewer.js b/web/viewer.js index 0e6fdb7fe..e10f6c29f 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -499,6 +499,24 @@ var PDFView = { // Setting the default one. this.parseScale(kDefaultScale, true); } + + this.metadata = null; + var metadata = pdf.catalog.metadata; + var info = this.documentInfo = pdf.info; + var pdfTitle; + + if (metadata) { + this.metadata = metadata = new PDFJS.Metadata(metadata); + + if (metadata.has('dc:title')) + pdfTitle = metadata.get('dc:title'); + } + + if (!pdfTitle && info && info.has('Title')) + pdfTitle = info.get('Title'); + + if (pdfTitle) + document.title = pdfTitle; }, setHash: function pdfViewSetHash(hash) {