Merge pull request #1413 from saebekassebil/metadata
Metadata Parsing - Setting proper document title
This commit is contained in:
commit
afebc33142
1
Makefile
1
Makefile
@ -39,6 +39,7 @@ PDF_JS_FILES = \
|
||||
../external/jpgjs/jpg.js \
|
||||
jpx.js \
|
||||
bidi.js \
|
||||
metadata.js \
|
||||
$(NULL)
|
||||
|
||||
# make server
|
||||
|
3
make.js
3
make.js
@ -97,7 +97,8 @@ target.bundle = function() {
|
||||
'worker.js',
|
||||
'../external/jpgjs/jpg.js',
|
||||
'jpx.js',
|
||||
'bidi.js'];
|
||||
'bidi.js',
|
||||
'metadata.js'];
|
||||
|
||||
if (!exists(BUILD_DIR))
|
||||
mkdir(BUILD_DIR);
|
||||
|
33
src/core.js
33
src/core.js
@ -587,14 +587,6 @@ var PDFDocModel = (function PDFDocModelClosure() {
|
||||
this.mainXRefEntriesOffset);
|
||||
this.xref = xref;
|
||||
this.catalog = new Catalog(xref);
|
||||
if (xref.trailer && xref.trailer.has('ID')) {
|
||||
var fileID = '';
|
||||
var id = xref.fetchIfRef(xref.trailer.get('ID'))[0];
|
||||
id.split('').forEach(function(el) {
|
||||
fileID += Number(el.charCodeAt(0)).toString(16);
|
||||
});
|
||||
this.fileID = fileID;
|
||||
}
|
||||
},
|
||||
get numPages() {
|
||||
var linearization = this.linearization;
|
||||
@ -602,21 +594,33 @@ var PDFDocModel = (function PDFDocModelClosure() {
|
||||
// shadow the prototype getter
|
||||
return shadow(this, 'numPages', num);
|
||||
},
|
||||
getDocumentInfo: function pdfDocGetDocumentInfo() {
|
||||
var info;
|
||||
if (this.xref.trailer.has('Info'))
|
||||
info = this.xref.fetch(this.xref.trailer.get('Info'));
|
||||
|
||||
return shadow(this, 'getDocumentInfo', info);
|
||||
},
|
||||
getFingerprint: function pdfDocGetFingerprint() {
|
||||
if (this.fileID) {
|
||||
return this.fileID;
|
||||
var xref = this.xref, fileID;
|
||||
if (xref.trailer.has('ID')) {
|
||||
fileID = '';
|
||||
var id = xref.fetchIfRef(xref.trailer.get('ID'))[0];
|
||||
id.split('').forEach(function(el) {
|
||||
fileID += Number(el.charCodeAt(0)).toString(16);
|
||||
});
|
||||
} else {
|
||||
// If we got no fileID, then we generate one,
|
||||
// from the first 100 bytes of PDF
|
||||
var data = this.stream.bytes.subarray(0, 100);
|
||||
var hash = calculateMD5(data, 0, data.length);
|
||||
var strHash = '';
|
||||
fileID = '';
|
||||
for (var i = 0, length = hash.length; i < length; i++) {
|
||||
strHash += Number(hash[i]).toString(16);
|
||||
fileID += Number(hash[i]).toString(16);
|
||||
}
|
||||
|
||||
return strHash;
|
||||
}
|
||||
|
||||
return shadow(this, 'getFingerprint', fileID);
|
||||
},
|
||||
getPage: function pdfDocGetPage(n) {
|
||||
return this.catalog.getPage(n);
|
||||
@ -645,6 +649,7 @@ var PDFDoc = (function PDFDocClosure() {
|
||||
this.stream = stream;
|
||||
this.pdfModel = new PDFDocModel(stream);
|
||||
this.fingerprint = this.pdfModel.getFingerprint();
|
||||
this.info = this.pdfModel.getDocumentInfo();
|
||||
this.catalog = this.pdfModel.catalog;
|
||||
this.objs = new PDFObjects();
|
||||
|
||||
|
66
src/metadata.js
Normal file
66
src/metadata.js
Normal file
@ -0,0 +1,66 @@
|
||||
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
|
||||
|
||||
'use strict';
|
||||
|
||||
var Metadata = PDFJS.Metadata = (function MetadataClosure() {
|
||||
function Metadata(meta) {
|
||||
if (typeof meta === 'string') {
|
||||
var parser = new DOMParser();
|
||||
meta = parser.parseFromString(meta, 'application/xml');
|
||||
} else if (!(meta instanceof Document)) {
|
||||
error('Metadata: Invalid metadata object');
|
||||
}
|
||||
|
||||
this.metaDocument = meta;
|
||||
this.metadata = {};
|
||||
this.parse();
|
||||
}
|
||||
|
||||
Metadata.prototype = {
|
||||
parse: function() {
|
||||
var doc = this.metaDocument;
|
||||
var rdf = doc.documentElement;
|
||||
|
||||
if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in <xmpmeta>
|
||||
rdf = rdf.firstChild;
|
||||
while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf')
|
||||
rdf = rdf.nextSibling;
|
||||
}
|
||||
|
||||
var nodeName = (rdf) ? rdf.nodeName.toLowerCase() : null;
|
||||
if (!rdf || nodeName !== 'rdf:rdf' || !rdf.hasChildNodes())
|
||||
return;
|
||||
|
||||
var childNodes = rdf.childNodes, desc, namespace, entries, entry;
|
||||
|
||||
for (var i = 0, length = childNodes.length; i < length; i++) {
|
||||
desc = childNodes[i];
|
||||
if (desc.nodeName.toLowerCase() !== 'rdf:description')
|
||||
continue;
|
||||
|
||||
entries = [];
|
||||
for (var ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) {
|
||||
if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text')
|
||||
entries.push(desc.childNodes[ii]);
|
||||
}
|
||||
|
||||
for (ii = 0, iLength = entries.length; ii < iLength; ii++) {
|
||||
var entry = entries[ii];
|
||||
var name = entry.nodeName.toLowerCase();
|
||||
this.metadata[name] = entry.textContent.trim();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
get: function(name) {
|
||||
return this.metadata[name] || null;
|
||||
},
|
||||
|
||||
has: function(name) {
|
||||
return typeof this.metadata[name] !== 'undefined';
|
||||
}
|
||||
};
|
||||
|
||||
return Metadata;
|
||||
})();
|
16
src/obj.js
16
src/obj.js
@ -111,6 +111,22 @@ var Catalog = (function CatalogClosure() {
|
||||
}
|
||||
|
||||
Catalog.prototype = {
|
||||
get metadata() {
|
||||
var ref = this.catDict.get('Metadata');
|
||||
var stream = this.xref.fetchIfRef(ref);
|
||||
var metadata;
|
||||
if (stream && isDict(stream.dict)) {
|
||||
var type = stream.dict.get('Type');
|
||||
var subtype = stream.dict.get('Subtype');
|
||||
|
||||
if (isName(type) && isName(subtype) &&
|
||||
type.name === 'Metadata' && subtype.name === 'XML') {
|
||||
metadata = stringToPDFString(bytesToString(stream.getBytes()));
|
||||
}
|
||||
}
|
||||
|
||||
return shadow(this, 'metadata', metadata);
|
||||
},
|
||||
get toplevelPagesDict() {
|
||||
var pagesObj = this.catDict.get('Pages');
|
||||
assertWellFormed(isRef(pagesObj), 'invalid top-level pages reference');
|
||||
|
@ -11,6 +11,7 @@
|
||||
<!-- PDFJSSCRIPT_INCLUDE_BUILD -->
|
||||
<script type="text/javascript" src="../src/core.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/util.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/metadata.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/canvas.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/obj.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/function.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
|
@ -499,6 +499,24 @@ var PDFView = {
|
||||
// Setting the default one.
|
||||
this.parseScale(kDefaultScale, true);
|
||||
}
|
||||
|
||||
this.metadata = null;
|
||||
var metadata = pdf.catalog.metadata;
|
||||
var info = this.documentInfo = pdf.info;
|
||||
var pdfTitle;
|
||||
|
||||
if (metadata) {
|
||||
this.metadata = metadata = new PDFJS.Metadata(metadata);
|
||||
|
||||
if (metadata.has('dc:title'))
|
||||
pdfTitle = metadata.get('dc:title');
|
||||
}
|
||||
|
||||
if (!pdfTitle && info && info.has('Title'))
|
||||
pdfTitle = info.get('Title');
|
||||
|
||||
if (pdfTitle)
|
||||
document.title = pdfTitle;
|
||||
},
|
||||
|
||||
setHash: function pdfViewSetHash(hash) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user