From f7ccb291f40e5eccaafa69af727e89c33a77e571 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 23 Apr 2012 16:43:20 -0500 Subject: [PATCH] Fix broken metadata --- src/metadata.js | 20 ++++++++++++++++++++ test/unit/metadata_spec.js | 18 ++++++++++++++++++ test/unit/unit_test.html | 1 + 3 files changed, 39 insertions(+) create mode 100644 test/unit/metadata_spec.js diff --git a/src/metadata.js b/src/metadata.js index f38056168..f2585214c 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -4,8 +4,28 @@ 'use strict'; var Metadata = PDFJS.Metadata = (function MetadataClosure() { + function fixMetadata(meta) { + return meta.replace(/>\\376\\377([^<]+)/g, function(all, codes) { + var bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g, + function(code, d1, d2, d3) { + return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1); + }); + var chars = ''; + for (var i = 0; i < bytes.length; i += 2) { + var code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1); + chars += code >= 32 && code < 127 && code != 60 && code != 62 && + code != 38 && false ? String.fromCharCode(code) : + '&#x' + (0x10000 + code).toString(16).substring(1) + ';'; + } + return '>' + chars; + }); + } + function Metadata(meta) { if (typeof meta === 'string') { + // Ghostscript produces invalid metadata + meta = fixMetadata(meta); + var parser = new DOMParser(); meta = parser.parseFromString(meta, 'application/xml'); } else if (!(meta instanceof Document)) { diff --git a/test/unit/metadata_spec.js b/test/unit/metadata_spec.js new file mode 100644 index 000000000..2d4b839bc --- /dev/null +++ b/test/unit/metadata_spec.js @@ -0,0 +1,18 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ + +'use strict'; + +describe('metadata', function() { + describe('incorrect_xmp', function() { + it('should fix the incorrect XMP data', function() { + var invalidXMP = '' + + '' + + '' + + '\\376\\377\\000P\\000D\\000F\\000&' + + ''; + var meta = new Metadata(invalidXMP); + expect(meta.get('dc:title')).toEqual('PDF&'); + }); + }); +}); diff --git a/test/unit/unit_test.html b/test/unit/unit_test.html index 49de9dbde..cdd0c297f 100644 --- a/test/unit/unit_test.html +++ b/test/unit/unit_test.html @@ -41,6 +41,7 @@ +