From d4700102934db36be8e18aac32b5e3cc9c5608b4 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Sat, 15 Oct 2022 11:55:37 +0200
Subject: [PATCH] Re-factor the PDF version parsing in the worker-thread

Part of this is very old code, and back when support for parsing the catalog-version was added things became less clear (in my opinion).
Hence this patch tries to improve things, by e.g. validating the header- and catalog-version separately.
---
 src/core/catalog.js    | 13 ++++++++-----
 src/core/core_utils.js |  3 +++
 src/core/document.js   | 35 ++++++++++++++---------------------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/core/catalog.js b/src/core/catalog.js
index 561d6a9be..c3911c352 100644
--- a/src/core/catalog.js
+++ b/src/core/catalog.js
@@ -16,6 +16,7 @@
 import {
   collectActions,
   MissingDataException,
+  PDF_VERSION_REGEXP,
   recoverJsURL,
   toRomanNumerals,
   XRefEntryException,
@@ -84,11 +85,13 @@ class Catalog {
 
   get version() {
     const version = this._catDict.get("Version");
-    return shadow(
-      this,
-      "version",
-      version instanceof Name ? version.name : null
-    );
+    if (version instanceof Name) {
+      if (PDF_VERSION_REGEXP.test(version.name)) {
+        return shadow(this, "version", version.name);
+      }
+      warn(`Invalid PDF catalog version: ${version.name}`);
+    }
+    return shadow(this, "version", null);
   }
 
   get lang() {
diff --git a/src/core/core_utils.js b/src/core/core_utils.js
index 8b4ed0e83..77e7f3df1 100644
--- a/src/core/core_utils.js
+++ b/src/core/core_utils.js
@@ -26,6 +26,8 @@ import {
 import { Dict, isName, Ref, RefSet } from "./primitives.js";
 import { BaseStream } from "./base_stream.js";
 
+const PDF_VERSION_REGEXP = /^[1-9]\.\d$/;
+
 function getLookupTableFactory(initializer) {
   let lookup;
   return function () {
@@ -585,6 +587,7 @@ export {
   numberToString,
   ParserEOFException,
   parseXFAPath,
+  PDF_VERSION_REGEXP,
   readInt8,
   readUint16,
   readUint32,
diff --git a/src/core/document.js b/src/core/document.js
index 6fd413fdd..37a1597c4 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -37,6 +37,7 @@ import {
   getNewAnnotationsMap,
   isWhiteSpace,
   MissingDataException,
+  PDF_VERSION_REGEXP,
   validateCSSFont,
   XRefEntryException,
   XRefParseException,
@@ -712,8 +713,6 @@ const FINGERPRINT_FIRST_BYTES = 1024;
 const EMPTY_FINGERPRINT =
   "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
 
-const PDF_HEADER_VERSION_REGEXP = /^[1-9]\.\d$/;
-
 function find(stream, signature, limit = 1024, backwards = false) {
   if (
     typeof PDFJSDev === "undefined" ||
@@ -818,14 +817,6 @@ class PDFDocument {
   parse(recoveryMode) {
     this.xref.parse(recoveryMode);
     this.catalog = new Catalog(this.pdfManager, this.xref);
-
-    // The `checkHeader` method is called before this method and parses the
-    // version from the header. The specification states in section 7.5.2
-    // that the version from the catalog, if present, should overwrite the
-    // version from the header.
-    if (this.catalog.version) {
-      this._version = this.catalog.version;
-    }
   }
 
   get linearization() {
@@ -911,8 +902,11 @@ class PDFDocument {
     ) {
       version += String.fromCharCode(ch);
     }
-    if (!this._version) {
+
+    if (PDF_VERSION_REGEXP.test(version)) {
       this._version = version;
+    } else {
+      warn(`Invalid PDF header version: ${version}`);
     }
   }
 
@@ -1260,6 +1254,14 @@ class PDFDocument {
       : null;
   }
 
+  /**
+   * The specification states in section 7.5.2 that the version from
+   * the catalog, if present, should overwrite the version from the header.
+   */
+  get version() {
+    return this.catalog.version || this._version;
+  }
+
   get formInfo() {
     const formInfo = {
       hasFields: false,
@@ -1307,17 +1309,8 @@ class PDFDocument {
   }
 
   get documentInfo() {
-    let version = this._version;
-    if (
-      typeof version !== "string" ||
-      !PDF_HEADER_VERSION_REGEXP.test(version)
-    ) {
-      warn(`Invalid PDF header version number: ${version}`);
-      version = null;
-    }
-
     const docInfo = {
-      PDFFormatVersion: version,
+      PDFFormatVersion: this.version,
       Language: this.catalog.lang,
       EncryptFilterName: this.xref.encrypt
         ? this.xref.encrypt.filterName