Fix broken metadata

This commit is contained in:
Yury Delendik 2012-04-23 16:43:20 -05:00
parent 8b7cd47798
commit f7ccb291f4
3 changed files with 39 additions and 0 deletions

View File

@ -4,8 +4,28 @@
'use strict';
var Metadata = PDFJS.Metadata = (function MetadataClosure() {
function fixMetadata(meta) {
return meta.replace(/>\\376\\377([^<]+)/g, function(all, codes) {
var bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g,
function(code, d1, d2, d3) {
return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
});
var chars = '';
for (var i = 0; i < bytes.length; i += 2) {
var code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1);
chars += code >= 32 && code < 127 && code != 60 && code != 62 &&
code != 38 && false ? String.fromCharCode(code) :
'&#x' + (0x10000 + code).toString(16).substring(1) + ';';
}
return '>' + chars;
});
}
function Metadata(meta) {
if (typeof meta === 'string') {
// Ghostscript produces invalid metadata
meta = fixMetadata(meta);
var parser = new DOMParser();
meta = parser.parseFromString(meta, 'application/xml');
} else if (!(meta instanceof Document)) {

View File

@ -0,0 +1,18 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
'use strict';
describe('metadata', function() {
describe('incorrect_xmp', function() {
it('should fix the incorrect XMP data', function() {
var invalidXMP = '<x:xmpmeta xmlns:x=\'adobe:ns:meta/\'>' +
'<rdf:RDF xmlns:rdf=\'http://www.w3.org/1999/02/22-rdf-syntax-ns#\'>' +
'<rdf:Description xmlns:dc=\'http://purl.org/dc/elements/1.1/\'>' +
'<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>' +
'</rdf:Description></rdf:RDF></x:xmpmeta>';
var meta = new Metadata(invalidXMP);
expect(meta.get('dc:title')).toEqual('PDF&');
});
});
});

View File

@ -41,6 +41,7 @@
<script type="text/javascript" src="crypto_spec.js"></script>
<script type="text/javascript" src="stream_spec.js"></script>
<script type="text/javascript" src="api_spec.js"></script>
<script type="text/javascript" src="metadata_spec.js"></script>
<script type="text/javascript">
'use strict';