Metadata parsing/serialization

This commit is contained in:
Saebekassebil 2012-03-24 19:59:51 +01:00
parent 32eb64389e
commit 20dd225e40
6 changed files with 113 additions and 1 deletions

View File

@ -39,6 +39,7 @@ PDF_JS_FILES = \
../external/jpgjs/jpg.js \
jpx.js \
bidi.js \
metadata.js \
$(NULL)
# make server

View File

@ -97,7 +97,8 @@ target.bundle = function() {
'worker.js',
'../external/jpgjs/jpg.js',
'jpx.js',
'bidi.js'];
'bidi.js',
'metadata-js'];
if (!exists(BUILD_DIR))
mkdir(BUILD_DIR);

80
src/metadata.js Normal file
View File

@ -0,0 +1,80 @@
var Metadata = (function MetadataClosure() {
function Metadata(meta) {
if (typeof meta === 'string') {
var parser = new DOMParser();
meta = parser.parseFromString(meta, 'application/xml');
} else if (!(meta instanceof Document)) {
error('Metadata: Invalid metadata object');
}
this.metaDocument = meta;
this.metadata = {};
this.parse();
}
Metadata.prototype = {
parse: function() {
var doc = this.metaDocument;
var rdf = doc.documentElement;
if (rdf.tagName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
rdf = rdf.firstChild;
while (rdf.nodeName && rdf.nodeName.toLowerCase() !== 'rdf:rdf') {
rdf = rdf.nextSibling;
}
}
if (rdf.nodeName.toLowerCase() !== 'rdf:rdf' || !rdf.hasChildNodes()) {
return;
}
var childNodes = rdf.childNodes, desc, namespace, entries, entry;
for (var i = 0, length = childNodes.length; i < length; i++) {
desc = childNodes[i];
if (desc.nodeName.toLowerCase() !== 'rdf:description') {
continue;
}
entries = [];
for (var ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) {
if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text') {
entries.push(desc.childNodes[ii]);
}
}
for (ii = 0, iLength = entries.length; ii < iLength; ii++) {
var entry = entries[ii];
var name = entry.nodeName.toLowerCase();
var entryName = name.split(':');
entryName = (entryName.length > 1) ? entryName[1] : entryName[0];
switch (name) {
case 'pdf:moddate':
case 'xap:createdate':
case 'xap:metadatadate':
case 'xap:modifydate':
this.metadata[entryName] = new Date(entry.textContent.trim());
break;
default:
// For almost all entries we just add them to the metadata object
if (this.metadata[entryName]) {
this.metadata[name] = entry.textContent.trim();
} else {
this.metadata[entryName] = entry.textContent.trim();
}
break;
}
}
}
},
get: function(name) {
return this.metadata[name] || null;
},
has: function(name) {
return typeof this.metadata[name] !== 'undefined';
}
};
return Metadata;
})();

View File

@ -111,6 +111,27 @@ var Catalog = (function CatalogClosure() {
}
Catalog.prototype = {
get metadata() {
var ref = this.catDict.get('Metadata');
if (!ref) {
return null;
}
var stream = this.xref.fetch(ref);
var dict = stream.dict;
if (isDict(dict)) {
var type = dict.get('Type');
var subtype = dict.get('Subtype');
if(isName(type) && isName(subtype) &&
type.name === 'Metadata' && subtype.name === 'XML') {
var metadata = stringToPDFString(bytesToString(stream.getbytes()));
return metadata;
}
}
return null;
},
get toplevelPagesDict() {
var pagesObj = this.catDict.get('Pages');
assertWellFormed(isRef(pagesObj), 'invalid top-level pages reference');

View File

@ -11,6 +11,7 @@
<!-- PDFJSSCRIPT_INCLUDE_BUILD -->
<script type="text/javascript" src="../src/core.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/util.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/metadata.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/canvas.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/obj.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
<script type="text/javascript" src="../src/function.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->

View File

@ -499,6 +499,14 @@ var PDFView = {
// Setting the default one.
this.parseScale(kDefaultScale, true);
}
var metadata = pdf.catalog.metadata;
if (metadata) {
this.metadata = metadata = new Metadata(metadata);
if (metadata.has('title')) {
document.title = metadata.get('title');
}
}
},
setHash: function pdfViewSetHash(hash) {