When parsing Metadata, attempt to remove "junk" before the first tag (PR 10398 follow-up)
This will allow the Metadata to be successfully extracted from the PDF file in issue 10395. Furthermore, this patch also fixes a bug in `Metadata.get` which causes the method to return `null` rather than an empty string or zero (since either ought to be allowed).
This commit is contained in:
		
							parent
							
								
									5d90224409
								
							
						
					
					
						commit
						9f45f8dfda
					
				@ -35,7 +35,9 @@ class Metadata {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  _repair(data) {
 | 
					  _repair(data) {
 | 
				
			||||||
    return data.replace(/>\\376\\377([^<]+)/g, function(all, codes) {
 | 
					    // Start by removing any "junk" before the first tag (see issue 10395).
 | 
				
			||||||
 | 
					    return data.replace(/^([^<]+)/, '').replace(/>\\376\\377([^<]+)/g,
 | 
				
			||||||
 | 
					        function(all, codes) {
 | 
				
			||||||
      let bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g,
 | 
					      let bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g,
 | 
				
			||||||
          function(code, d1, d2, d3) {
 | 
					          function(code, d1, d2, d3) {
 | 
				
			||||||
        return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
 | 
					        return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
 | 
				
			||||||
@ -104,7 +106,8 @@ class Metadata {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  get(name) {
 | 
					  get(name) {
 | 
				
			||||||
    return this._metadata[name] || null;
 | 
					    const data = this._metadata[name];
 | 
				
			||||||
 | 
					    return (typeof data !== 'undefined' ? data : null);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  getAll() {
 | 
					  getAll() {
 | 
				
			||||||
 | 
				
			|||||||
@ -156,7 +156,26 @@ describe('metadata', function() {
 | 
				
			|||||||
      '</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
 | 
					      '</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
 | 
				
			||||||
    const metadata = new Metadata(data);
 | 
					    const metadata = new Metadata(data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    expect(isEmptyObj(metadata.getAll())).toEqual(true);
 | 
					    expect(metadata.has('dc:title')).toBeTruthy();
 | 
				
			||||||
 | 
					    expect(metadata.has('dc:qux')).toBeFalsy();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    expect(metadata.get('dc:title')).toEqual('');
 | 
				
			||||||
 | 
					    expect(metadata.get('dc:qux')).toEqual(null);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    expect(metadata.getAll()).toEqual({
 | 
				
			||||||
 | 
					      'dc:creator': '',
 | 
				
			||||||
 | 
					      'dc:description': '',
 | 
				
			||||||
 | 
					      'dc:format': 'application/pdf',
 | 
				
			||||||
 | 
					      'dc:subject': '',
 | 
				
			||||||
 | 
					      'dc:title': '',
 | 
				
			||||||
 | 
					      'pdf:keywords': '',
 | 
				
			||||||
 | 
					      'pdf:pdfversion': '1.7',
 | 
				
			||||||
 | 
					      'pdf:producer': 'PDFKit.NET 4.0.102.0',
 | 
				
			||||||
 | 
					      'xap:createdate': '2018-12-27T13:50:36-08:00',
 | 
				
			||||||
 | 
					      'xap:creatortool': '',
 | 
				
			||||||
 | 
					      'xap:metadatadate': '2018-12-27T13:50:38-08:00',
 | 
				
			||||||
 | 
					      'xap:modifydate': '2018-12-27T13:50:38-08:00',
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  it('should correctly handle metadata containing "&apos" (issue 10407)',
 | 
					  it('should correctly handle metadata containing "&apos" (issue 10407)',
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user