From 88c35d872f7c802f41636707c93b9b4632b2b794 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 5 Feb 2020 13:59:47 +0100 Subject: [PATCH] Ensure that the PDF header contains an actual number (PR 11463 follow-up) While it would be nice to change the `PDFFormatVersion` property, as returned through `PDFDocumentProxy.getMetadata`, to a number (rather than a string) that would unfortunately be a breaking API change. However, it does seem like a good idea to at least *validate* the PDF header version on the worker-thread, rather than potentially returning an arbitrary string. --- src/core/document.js | 13 +++++++- test/pdfs/.gitignore | 1 + test/pdfs/bug1606566.pdf | 66 ++++++++++++++++++++++++++++++++++++++++ test/unit/api_spec.js | 22 ++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/bug1606566.pdf diff --git a/src/core/document.js b/src/core/document.js index 0d1583b99..0def19020 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -429,6 +429,8 @@ const FINGERPRINT_FIRST_BYTES = 1024; const EMPTY_FINGERPRINT = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; +const PDF_HEADER_VERSION_REGEXP = /^[1-9]\.[0-9]$/; + function find(stream, signature, limit = 1024, backwards = false) { if ( typeof PDFJSDev === "undefined" || @@ -668,8 +670,17 @@ class PDFDocument { Trapped: isName, }; + let version = this.pdfFormatVersion; + if ( + typeof version !== "string" || + !PDF_HEADER_VERSION_REGEXP.test(version) + ) { + warn(`Invalid PDF header version number: ${version}`); + version = null; + } + const docInfo = { - PDFFormatVersion: this.pdfFormatVersion, + PDFFormatVersion: version, IsLinearized: !!this.linearization, IsAcroFormPresent: !!this.acroForm, IsXFAPresent: !!this.xfa, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 367ff3ae2..85ce63957 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -110,6 +110,7 @@ !issue9949.pdf !bug1308536.pdf !bug1337429.pdf +!bug1606566.pdf !issue5564_reduced.pdf !canvas.pdf !bug1132849.pdf diff --git a/test/pdfs/bug1606566.pdf b/test/pdfs/bug1606566.pdf new file mode 100644 index 000000000..cc22ca288 --- /dev/null +++ b/test/pdfs/bug1606566.pdf @@ -0,0 +1,66 @@ +%âãÏÓ +1 0 obj +<< +/Pages 2 0 R +/Type /Catalog +>> +endobj +2 0 obj +<< +/Kids [3 0 R] +/Count 1 +/Type /Pages +>> +endobj +3 0 obj +<< +/Parent 2 0 R +/MediaBox [0 0 200 50] +/Resources +<< +/Font +<< +/F1 4 0 R +>> +>> +/Contents 5 0 R +/Type /Page +>> +endobj +4 0 obj +<< +/BaseFont /Times-Roman +/Subtype /Type1 +/Encoding /WinAnsiEncoding +/Type /Font +>> +endobj +5 0 obj +<< +/Length 42 +>> +stream +BT +10 20 TD +/F1 20 Tf +(Bug 1606566) Tj +ET + +endstream +endobj xref +0 6 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000125 00000 n +0000000254 00000 n +0000000355 00000 n +trailer + +<< +/Root 1 0 R +/Size 6 +>> +startxref +449 +%%EOF diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 408d15270..8f7e0c386 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1121,6 +1121,28 @@ describe("api", function() { }) .catch(done.fail); }); + it("gets metadata, with missing PDF header (bug 1606566)", function(done) { + const loadingTask = getDocument(buildGetDocumentParams("bug1606566.pdf")); + + loadingTask.promise + .then(function(pdfDocument) { + return pdfDocument.getMetadata(); + }) + .then(function({ info, metadata, contentDispositionFilename }) { + // The following are PDF.js specific, non-standard, properties. + expect(info["PDFFormatVersion"]).toEqual(null); + expect(info["IsLinearized"]).toEqual(false); + expect(info["IsAcroFormPresent"]).toEqual(false); + expect(info["IsXFAPresent"]).toEqual(false); + expect(info["IsCollectionPresent"]).toEqual(false); + + expect(metadata).toEqual(null); + expect(contentDispositionFilename).toEqual(null); + + loadingTask.destroy().then(done); + }) + .catch(done.fail); + }); it("gets data", function(done) { var promise = doc.getData();