Merge pull request #1413 from saebekassebil/metadata

Metadata Parsing - Setting proper document title
This commit is contained in:
Brendan Dahl 2012-03-28 12:02:16 -07:00
commit afebc33142
7 changed files with 123 additions and 15 deletions

View File

@ -39,6 +39,7 @@ PDF_JS_FILES = \
../external/jpgjs/jpg.js \
jpx.js \
bidi.js \
metadata.js \
$(NULL)
# make server

View File

@ -97,7 +97,8 @@ target.bundle = function() {
'worker.js',
'../external/jpgjs/jpg.js',
'jpx.js',
'bidi.js'];
'bidi.js',
'metadata.js'];
if (!exists(BUILD_DIR))
mkdir(BUILD_DIR);

View File

@ -587,14 +587,6 @@ var PDFDocModel = (function PDFDocModelClosure() {
this.mainXRefEntriesOffset);
this.xref = xref;
this.catalog = new Catalog(xref);
if (xref.trailer && xref.trailer.has('ID')) {
var fileID = '';
var id = xref.fetchIfRef(xref.trailer.get('ID'))[0];
id.split('').forEach(function(el) {
fileID += Number(el.charCodeAt(0)).toString(16);
});
this.fileID = fileID;
}
},
get numPages() {
var linearization = this.linearization;
@ -602,21 +594,33 @@ var PDFDocModel = (function PDFDocModelClosure() {
// shadow the prototype getter
return shadow(this, 'numPages', num);
},
getDocumentInfo: function pdfDocGetDocumentInfo() {
var info;
if (this.xref.trailer.has('Info'))
info = this.xref.fetch(this.xref.trailer.get('Info'));
return shadow(this, 'getDocumentInfo', info);
},
getFingerprint: function pdfDocGetFingerprint() {
if (this.fileID) {
return this.fileID;
var xref = this.xref, fileID;
if (xref.trailer.has('ID')) {
fileID = '';
var id = xref.fetchIfRef(xref.trailer.get('ID'))[0];
id.split('').forEach(function(el) {
fileID += Number(el.charCodeAt(0)).toString(16);
});
} else {
// If we got no fileID, then we generate one,
// from the first 100 bytes of PDF
var data = this.stream.bytes.subarray(0, 100);
var hash = calculateMD5(data, 0, data.length);
var strHash = '';
fileID = '';
for (var i = 0, length = hash.length; i < length; i++) {
strHash += Number(hash[i]).toString(16);
fileID += Number(hash[i]).toString(16);
}
return strHash;
}
return shadow(this, 'getFingerprint', fileID);
},
getPage: function pdfDocGetPage(n) {
return this.catalog.getPage(n);
@ -645,6 +649,7 @@ var PDFDoc = (function PDFDocClosure() {
this.stream = stream;
this.pdfModel = new PDFDocModel(stream);
this.fingerprint = this.pdfModel.getFingerprint();
this.info = this.pdfModel.getDocumentInfo();
this.catalog = this.pdfModel.catalog;
this.objs = new PDFObjects();

66
src/metadata.js Normal file
View File

@ -0,0 +1,66 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
'use strict';
var Metadata = PDFJS.Metadata = (function MetadataClosure() {
function Metadata(meta) {
if (typeof meta === 'string') {
var parser = new DOMParser();
meta = parser.parseFromString(meta, 'application/xml');
} else if (!(meta instanceof Document)) {
error('Metadata: Invalid metadata object');
}
this.metaDocument = meta;
this.metadata = {};
this.parse();
}
Metadata.prototype = {
parse: function() {
var doc = this.metaDocument;
var rdf = doc.documentElement;
if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
rdf = rdf.firstChild;
while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf')
rdf = rdf.nextSibling;
}
var nodeName = (rdf) ? rdf.nodeName.toLowerCase() : null;
if (!rdf || nodeName !== 'rdf:rdf' || !rdf.hasChildNodes())
return;
var childNodes = rdf.childNodes, desc, namespace, entries, entry;
for (var i = 0, length = childNodes.length; i < length; i++) {
desc = childNodes[i];
if (desc.nodeName.toLowerCase() !== 'rdf:description')
continue;
entries = [];
for (var ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) {
if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text')
entries.push(desc.childNodes[ii]);
}
for (ii = 0, iLength = entries.length; ii < iLength; ii++) {
var entry = entries[ii];
var name = entry.nodeName.toLowerCase();
this.metadata[name] = entry.textContent.trim();
}
}
},
get: function(name) {
return this.metadata[name] || null;
},
has: function(name) {
return typeof this.metadata[name] !== 'undefined';
}
};
return Metadata;
})();

View File

@ -111,6 +111,22 @@ var Catalog = (function CatalogClosure() {
}
Catalog.prototype = {
get metadata() {
var ref = this.catDict.get('Metadata');
var stream = this.xref.fetchIfRef(ref);
var metadata;
if (stream && isDict(stream.dict)) {
var type = stream.dict.get('Type');
var subtype = stream.dict.get('Subtype');
if (isName(type) && isName(subtype) &&
type.name === 'Metadata' && subtype.name === 'XML') {
metadata = stringToPDFString(bytesToString(stream.getBytes()));
}
}
return shadow(this, 'metadata', metadata);
},
get toplevelPagesDict() {
var pagesObj = this.catDict.get('Pages');
assertWellFormed(isRef(pagesObj), 'invalid top-level pages reference');

View File

@ -11,6 +11,7 @@
<!-- PDFJSSCRIPT_INCLUDE_BUILD -->
<script type="text/javascript" src="../src/core.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/util.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/metadata.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/canvas.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/obj.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/function.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->

View File

@ -499,6 +499,24 @@ var PDFView = {
// Setting the default one.
this.parseScale(kDefaultScale, true);
}
this.metadata = null;
var metadata = pdf.catalog.metadata;
var info = this.documentInfo = pdf.info;
var pdfTitle;
if (metadata) {
this.metadata = metadata = new PDFJS.Metadata(metadata);
if (metadata.has('dc:title'))
pdfTitle = metadata.get('dc:title');
}
if (!pdfTitle && info && info.has('Title'))
pdfTitle = info.get('Title');
if (pdfTitle)
document.title = pdfTitle;
},
setHash: function pdfViewSetHash(hash) {