From 9c6770748c9a4898bd4a54b4724ae5b12a48b071 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Wed, 31 Mar 2021 16:21:22 +0200
Subject: [PATCH 1/3] Move the `PDFDocumentStats` typedef closer to its usage

Currently this typedef appears slightly out-of-place, in the middle of the arguably much more important `getDocument` JSDocs.
---
 src/display/api.js | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/display/api.js b/src/display/api.js
index 1236cdc44..cbf4e9a36 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -185,16 +185,6 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
  *   (see `web/debugger.js`). The default value is `false`.
  */
 
-/**
- * @typedef {Object} PDFDocumentStats
- * @property {Object<string, boolean>} streamTypes - Used stream types in the
- *   document (an item is set to true if specific stream ID was used in the
- *   document).
- * @property {Object<string, boolean>} fontTypes - Used font types in the
- *   document (an item is set to true if specific font ID was used in the
- *   document).
- */
-
 /**
  * This is the main entry point for loading a PDF and interacting with it.
  *
@@ -891,6 +881,16 @@ class PDFDocumentProxy {
     return this._transport.downloadInfoCapability.promise;
   }
 
+  /**
+   * @typedef {Object} PDFDocumentStats
+   * @property {Object<string, boolean>} streamTypes - Used stream types in the
+   *   document (an item is set to true if specific stream ID was used in the
+   *   document).
+   * @property {Object<string, boolean>} fontTypes - Used font types in the
+   *   document (an item is set to true if specific font ID was used in the
+   *   document).
+   */
+
   /**
    * @returns {Promise<PDFDocumentStats>} A promise this is resolved with
    *   current statistics about document structures (see

From 27add0f1f3cc2fa1766169d5bf334649d7806bd4 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Wed, 31 Mar 2021 16:21:37 +0200
Subject: [PATCH 2/3] Re-factor the `source` parsing, in `getDocument`, to use
 `switch` rather than `if...else`

Given the number of parameters that we now need to parse here, this code is no longer as readable as one would like. Hence this re-factoring, which will improve overall readability and also help with the next patch.
---
 src/display/api.js | 88 ++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/src/display/api.js b/src/display/api.js
index cbf4e9a36..15b605c84 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -226,49 +226,53 @@ function getDocument(src) {
     worker = null;
 
   for (const key in source) {
-    if (key === "url" && typeof window !== "undefined") {
-      // The full path is required in the 'url' field.
-      params[key] = new URL(source[key], window.location).href;
-      continue;
-    } else if (key === "range") {
-      rangeTransport = source[key];
-      continue;
-    } else if (key === "worker") {
-      worker = source[key];
-      continue;
-    } else if (key === "data") {
-      // Converting string or array-like data to Uint8Array.
-      const pdfBytes = source[key];
-      if (
-        typeof PDFJSDev !== "undefined" &&
-        PDFJSDev.test("GENERIC") &&
-        isNodeJS &&
-        typeof Buffer !== "undefined" && // eslint-disable-line no-undef
-        pdfBytes instanceof Buffer // eslint-disable-line no-undef
-      ) {
-        params[key] = new Uint8Array(pdfBytes);
-      } else if (pdfBytes instanceof Uint8Array) {
-        // Use the data as-is when it's already a Uint8Array.
-        params[key] = pdfBytes;
-      } else if (typeof pdfBytes === "string") {
-        params[key] = stringToBytes(pdfBytes);
-      } else if (
-        typeof pdfBytes === "object" &&
-        pdfBytes !== null &&
-        !isNaN(pdfBytes.length)
-      ) {
-        params[key] = new Uint8Array(pdfBytes);
-      } else if (isArrayBuffer(pdfBytes)) {
-        params[key] = new Uint8Array(pdfBytes);
-      } else {
-        throw new Error(
-          "Invalid PDF binary data: either typed array, " +
-            "string, or array-like object is expected in the data property."
-        );
-      }
-      continue;
+    const value = source[key];
+
+    switch (key) {
+      case "url":
+        if (typeof window !== "undefined") {
+          // The full path is required in the 'url' field.
+          params[key] = new URL(value, window.location).href;
+          continue;
+        }
+        break;
+      case "range":
+        rangeTransport = value;
+        continue;
+      case "worker":
+        worker = value;
+        continue;
+      case "data":
+        // Converting string or array-like data to Uint8Array.
+        if (
+          typeof PDFJSDev !== "undefined" &&
+          PDFJSDev.test("GENERIC") &&
+          isNodeJS &&
+          typeof Buffer !== "undefined" && // eslint-disable-line no-undef
+          value instanceof Buffer // eslint-disable-line no-undef
+        ) {
+          params[key] = new Uint8Array(value);
+        } else if (value instanceof Uint8Array) {
+          break; // Use the data as-is when it's already a Uint8Array.
+        } else if (typeof value === "string") {
+          params[key] = stringToBytes(value);
+        } else if (
+          typeof value === "object" &&
+          value !== null &&
+          !isNaN(value.length)
+        ) {
+          params[key] = new Uint8Array(value);
+        } else if (isArrayBuffer(value)) {
+          params[key] = new Uint8Array(value);
+        } else {
+          throw new Error(
+            "Invalid PDF binary data: either typed array, " +
+              "string, or array-like object is expected in the data property."
+          );
+        }
+        continue;
     }
-    params[key] = source[key];
+    params[key] = value;
   }
 
   params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE;

From db1e1612df8d029134f8859647532350317390e2 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Wed, 31 Mar 2021 16:21:41 +0200
Subject: [PATCH 3/3] [api-minor] Support proper `URL`-objects, in addition to
 URL-strings, in `getDocument`

Currently only URL-strings are officially supported by `getDocument`, however at this point in time I cannot really see any compelling reason to not support `URL`-objects as well.

Most likely the reason that we've don't already support `URL`-objects, in `getDocument`, is that historically `URL` wasn't fully implemented across browsers and our old polyfill wasn't perfect; see https://developer.mozilla.org/en-US/docs/Web/API/URL/URL#browser_compatibility

*Please note:* Because of how the `url` parameter is currently handled, there's actually *some* cases where passing a `URL`-object to `getDocument` already works. That, in my opinion, provides additional motivation for supporting `URL`-objects officially, since it makes the API more consistent.

The following is an attempt to summarize the *current* situation, based on the actual code rather than the JSDocs:
 - `getDocument("url string")` works and is documented.[1]
 - `getDocument({ url: "url string", })` works and is documented.[1]
 - `getDocument(new URL(...))` throws immediately, since no supported parameters are found.
 - `getDocument({ url: new URL(...), })` actually works even though it's not documented.[1] Originally, when data was fetched on the worker-thread, this would likely have thrown since `URL` isn't clonable.[2]
 - `getDocument({ url: { abc: 123, }, })`, or some similarily meaningless input, will be "accepted" by `getDocument` and then throw a `MissingPDFException` when attempting to fetch the bogus data.

With the changes in this patch, not only is `URL`-objects now officially supported and documented when calling `getDocument`, but we'll also do a much better job at actually validating any URL-data passed to `getDocument` (and instead fail early).

---
[1] In *browsers*, we create a valid URL thus indirectly validating the input. In Node.js environments, on the other hand, no validation is done since obtaining a baseUrl is more difficult (and PDF.js is primarily written for browsers anyway).

[2] https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types
---
 src/display/api.js    | 28 +++++++++++++++++++---------
 test/unit/api_spec.js | 30 ++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/src/display/api.js b/src/display/api.js
index 15b605c84..6549d85ab 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -111,7 +111,7 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
  * Document initialization / loading parameters object.
  *
  * @typedef {Object} DocumentInitParameters
- * @property {string} [url] - The URL of the PDF.
+ * @property {string|URL} [url] - The URL of the PDF.
  * @property {TypedArray|Array<number>|string} [data] - Binary PDF data. Use
  *    typed arrays (Uint8Array) to improve the memory usage. If PDF data is
  *    BASE64-encoded, use `atob()` to convert it to a binary string first.
@@ -192,16 +192,16 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
  * XHR as fallback) is used, which means it must follow same origin rules,
  * e.g. no cross-domain requests without CORS.
  *
- * @param {string|TypedArray|DocumentInitParameters|PDFDataRangeTransport} src -
- *   Can be a URL to where a PDF file is located, a typed array (Uint8Array)
- *   already populated with data or parameter object.
+ * @param {string|URL|TypedArray|PDFDataRangeTransport|DocumentInitParameters}
+ *   src - Can be a URL where a PDF file is located, a typed array (Uint8Array)
+ *         already populated with data, or a parameter object.
  * @returns {PDFDocumentLoadingTask}
  */
 function getDocument(src) {
   const task = new PDFDocumentLoadingTask();
 
   let source;
-  if (typeof src === "string") {
+  if (typeof src === "string" || src instanceof URL) {
     source = { url: src };
   } else if (isArrayBuffer(src)) {
     source = { data: src };
@@ -211,7 +211,7 @@ function getDocument(src) {
     if (typeof src !== "object") {
       throw new Error(
         "Invalid parameter in getDocument, " +
-          "need either Uint8Array, string or a parameter object"
+          "need either string, URL, Uint8Array, or parameter object."
       );
     }
     if (!src.url && !src.data && !src.range) {
@@ -231,11 +231,21 @@ function getDocument(src) {
     switch (key) {
       case "url":
         if (typeof window !== "undefined") {
-          // The full path is required in the 'url' field.
-          params[key] = new URL(value, window.location).href;
+          try {
+            // The full path is required in the 'url' field.
+            params[key] = new URL(value, window.location).href;
+            continue;
+          } catch (ex) {
+            warn(`Cannot create valid URL: "${ex}".`);
+          }
+        } else if (typeof value === "string" || value instanceof URL) {
+          params[key] = value.toString(); // Support Node.js environments.
           continue;
         }
-        break;
+        throw new Error(
+          "Invalid PDF url data: " +
+            "either string or URL-object is expected in the url property."
+        );
       case "range":
         rangeTransport = value;
         continue;
diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js
index 791abdad6..22ea78f13 100644
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@@ -73,6 +73,36 @@ describe("api", function () {
   }
 
   describe("getDocument", function () {
+    it("creates pdf doc from URL-string", async function () {
+      const urlStr = TEST_PDFS_PATH + basicApiFileName;
+      const loadingTask = getDocument(urlStr);
+      const pdfDocument = await loadingTask.promise;
+
+      expect(typeof urlStr).toEqual("string");
+      expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
+      expect(pdfDocument.numPages).toEqual(3);
+
+      await loadingTask.destroy();
+    });
+
+    it("creates pdf doc from URL-object", async function () {
+      if (isNodeJS) {
+        pending("window.location is not supported in Node.js.");
+      }
+      const urlObj = new URL(
+        TEST_PDFS_PATH + basicApiFileName,
+        window.location
+      );
+      const loadingTask = getDocument(urlObj);
+      const pdfDocument = await loadingTask.promise;
+
+      expect(urlObj instanceof URL).toEqual(true);
+      expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
+      expect(pdfDocument.numPages).toEqual(3);
+
+      await loadingTask.destroy();
+    });
+
     it("creates pdf doc from URL", function (done) {
       const loadingTask = getDocument(basicApiGetDocumentParams);