From 5d902244093970d9e4f0896f6cac7f83351484ee Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 16 Jan 2019 11:30:36 +0100 Subject: [PATCH 1/2] Add a unit-test for issue 10395 (PR 10398 follow-up) --- test/unit/metadata_spec.js | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/unit/metadata_spec.js b/test/unit/metadata_spec.js index 01a992702..2abadc139 100644 --- a/test/unit/metadata_spec.js +++ b/test/unit/metadata_spec.js @@ -128,6 +128,37 @@ describe('metadata', function() { expect(isEmptyObj(metadata.getAll())).toEqual(true); }); + it('should gracefully handle "junk" before the actual metadata (issue 10395)', + function() { + const data = '' + + '' + + '' + + '' + + 'PDFKit.NET 4.0.102.0' + + '' + + '1.7' + + '' + + '2018-12-27T13:50:36-08:00' + + '2018-12-27T13:50:38-08:00' + + '' + + '2018-12-27T13:50:38-08:00' + + '' + + '' + + '' + + '' + + '' + + '' + + 'application/pdf' + + ''; + const metadata = new Metadata(data); + + expect(isEmptyObj(metadata.getAll())).toEqual(true); + }); + it('should correctly handle metadata containing "&apos" (issue 10407)', function() { const data = '' + From 9f45f8dfdaed3f399f594050354db982f900759b Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 16 Jan 2019 12:37:21 +0100 Subject: [PATCH 2/2] When parsing Metadata, attempt to remove "junk" before the first tag (PR 10398 follow-up) This will allow the Metadata to be successfully extracted from the PDF file in issue 10395. Furthermore, this patch also fixes a bug in `Metadata.get` which causes the method to return `null` rather than an empty string or zero (since either ought to be allowed). --- src/display/metadata.js | 7 +++++-- test/unit/metadata_spec.js | 21 ++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/display/metadata.js b/src/display/metadata.js index 5c7922bf4..eaa3ebdc9 100644 --- a/src/display/metadata.js +++ b/src/display/metadata.js @@ -35,7 +35,9 @@ class Metadata { } _repair(data) { - return data.replace(/>\\376\\377([^<]+)/g, function(all, codes) { + // Start by removing any "junk" before the first tag (see issue 10395). + return data.replace(/^([^<]+)/, '').replace(/>\\376\\377([^<]+)/g, + function(all, codes) { let bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g, function(code, d1, d2, d3) { return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1); @@ -104,7 +106,8 @@ class Metadata { } get(name) { - return this._metadata[name] || null; + const data = this._metadata[name]; + return (typeof data !== 'undefined' ? data : null); } getAll() { diff --git a/test/unit/metadata_spec.js b/test/unit/metadata_spec.js index 2abadc139..b88d31c1d 100644 --- a/test/unit/metadata_spec.js +++ b/test/unit/metadata_spec.js @@ -156,7 +156,26 @@ describe('metadata', function() { ''; const metadata = new Metadata(data); - expect(isEmptyObj(metadata.getAll())).toEqual(true); + expect(metadata.has('dc:title')).toBeTruthy(); + expect(metadata.has('dc:qux')).toBeFalsy(); + + expect(metadata.get('dc:title')).toEqual(''); + expect(metadata.get('dc:qux')).toEqual(null); + + expect(metadata.getAll()).toEqual({ + 'dc:creator': '', + 'dc:description': '', + 'dc:format': 'application/pdf', + 'dc:subject': '', + 'dc:title': '', + 'pdf:keywords': '', + 'pdf:pdfversion': '1.7', + 'pdf:producer': 'PDFKit.NET 4.0.102.0', + 'xap:createdate': '2018-12-27T13:50:36-08:00', + 'xap:creatortool': '', + 'xap:metadatadate': '2018-12-27T13:50:38-08:00', + 'xap:modifydate': '2018-12-27T13:50:38-08:00', + }); }); it('should correctly handle metadata containing "&apos" (issue 10407)',