Prevent Metadata/XML parsing from breaking PDFDocumentProxy.getMetadata when no XML root document is found (issue 8884)

With the new XML parser, see PR 9573, the referenced PDF file now causes `getMetadata` to fail when incomplete XML tags are encountered. This provides a simple, and hopefully generally useful, work-around that may also help prevent future bugs.

(Without being able to reproduce nor even understand the other (non XML) errors mentioned in issue 8884, I'd say that this patch is enough to close that one as fixed.)
This commit is contained in:
Jonas Jenwald 2018-07-17 23:18:31 +02:00
parent 7e13977669
commit 8ec99b200c
2 changed files with 34 additions and 0 deletions

View File

@ -321,6 +321,9 @@ class SimpleXMLParser extends XMLParserBase {
// We should only have one root.
const [documentElement] = this._currentFragment;
if (!documentElement) {
return undefined; // Return undefined if no root was found.
}
return { documentElement, };
}

View File

@ -13,6 +13,7 @@
* limitations under the License.
*/
import { isEmptyObj } from '../../src/shared/util';
import { Metadata } from '../../src/display/metadata';
describe('metadata', function() {
@ -96,4 +97,34 @@ describe('metadata', function() {
'xap:creatortool': 'PDFCreator Version 0.9.6',
});
});
it('should gracefully handle incomplete tags (issue 8884)', function() {
let data = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d' +
'<x:xmpmeta xmlns:x="adobe:ns:meta/">' +
'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' +
'<rdf:Description rdf:about=""' +
'xmlns:pdfx="http://ns.adobe.com/pdfx/1.3/">' +
'</rdf:Description>' +
'<rdf:Description rdf:about=""' +
'xmlns:xap="http://ns.adobe.com/xap/1.0/">' +
'<xap:ModifyDate>2010-03-25T11:20:09-04:00</xap:ModifyDate>' +
'<xap:CreateDate>2010-03-25T11:20:09-04:00</xap:CreateDate>' +
'<xap:MetadataDate>2010-03-25T11:20:09-04:00</xap:MetadataDate>' +
'</rdf:Description>' +
'<rdf:Description rdf:about=""' +
'xmlns:dc="http://purl.org/dc/elements/1.1/">' +
'<dc:format>application/pdf</dc:format>' +
'</rdf:Description>' +
'<rdf:Description rdf:about=""' +
'xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">' +
'<pdfaid:part>1</pdfaid:part>' +
'<pdfaid:conformance>A</pdfaid:conformance>' +
'</rdf:Description>' +
'</rdf:RDF>' +
'</x:xmpmeta>' +
'<?xpacket end="w"?>';
let metadata = new Metadata(data);
expect(isEmptyObj(metadata.getAll())).toEqual(true);
});
});