Re-factor the parameter parsing/validation in getDocument

This is very old code, where we loop through the user-provided options and build an internal parameter object. To prevent errors we also need to ensure that the parameters are correct/valid, which is especially important for the ones that are sent to the worker-thread such that structured cloning won't fail.[1] Over the years this has led to more and more code being added in `getDocument` to validate the user-provided options, and at this point *most* of them have at least basic validation. However the way that this is implemented feels slightly backwards, since we first build the internal parameter object and only *afterwards* validate those parameters.[2] Hence this patch changes the `getDocument` function to instead check/validate the supported options upfront, and then *explicitly* build the internal parameter object with only the needed properties. --- [1] Note the supported types at https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types [2] The internal parameter object may also, because of the loop, end up with lots of unnecessary properties since anything that the user provides is being copied.
2023-01-29 12:21:21 +01:00 · 2023-01-29 12:21:21 +01:00 · 512aa50fdd
commit 512aa50fdd
parent e698664927
2 changed files with 174 additions and 180 deletions
--- a/src/display/api.js
+++ b/src/display/api.js
@ -274,157 +274,82 @@ function getDocument(src) {
  }
  const task = new PDFDocumentLoadingTask();

-  const params = Object.create(null);
-  let rangeTransport = null,
-    worker = null;
+  const url = src.url ? getUrlProp(src.url) : null;
+  const data = src.data ? getDataProp(src.data) : null;
+  const httpHeaders = src.httpHeaders || null;
+  const withCredentials = src.withCredentials === true;
+  const password = src.password ?? null;
+  const rangeTransport =
+    src.range instanceof PDFDataRangeTransport ? src.range : null;
+  const rangeChunkSize =
+    Number.isInteger(src.rangeChunkSize) && src.rangeChunkSize > 0
+      ? src.rangeChunkSize
+      : DEFAULT_RANGE_CHUNK_SIZE;
+  let worker = src.worker instanceof PDFWorker ? src.worker : null;
+  const verbosity = src.verbosity;
+  // Ignore "data:"-URLs, since they can't be used to recover valid absolute
+  // URLs anyway. We want to avoid sending them to the worker-thread, since
+  // they contain the *entire* PDF document and can thus be arbitrarily long.
+  const docBaseUrl =
+    typeof src.docBaseUrl === "string" && !isDataScheme(src.docBaseUrl)
+      ? src.docBaseUrl
+      : null;
+  const cMapUrl = typeof src.cMapUrl === "string" ? src.cMapUrl : null;
+  const cMapPacked = src.cMapPacked !== false;
+  const CMapReaderFactory = src.CMapReaderFactory || DefaultCMapReaderFactory;
+  const standardFontDataUrl =
+    typeof src.standardFontDataUrl === "string"
+      ? src.standardFontDataUrl
+      : null;
+  const StandardFontDataFactory =
+    src.StandardFontDataFactory || DefaultStandardFontDataFactory;
+  const ignoreErrors = src.stopAtErrors !== true;
+  const maxImageSize =
+    Number.isInteger(src.maxImageSize) && src.maxImageSize > -1
+      ? src.maxImageSize
+      : -1;
+  const isEvalSupported = src.isEvalSupported !== false;
+  const isOffscreenCanvasSupported =
+    typeof src.isOffscreenCanvasSupported === "boolean"
+      ? src.isOffscreenCanvasSupported
+      : !isNodeJS;
+  const disableFontFace =
+    typeof src.disableFontFace === "boolean" ? src.disableFontFace : isNodeJS;
+  const fontExtraProperties = src.fontExtraProperties === true;
+  const enableXfa = src.enableXfa === true;
+  const ownerDocument = src.ownerDocument || globalThis.document;
+  const disableRange = src.disableRange === true;
+  const disableStream = src.disableStream === true;
+  const disableAutoFetch = src.disableAutoFetch === true;
+  const pdfBug = src.pdfBug === true;

-  for (const key in src) {
-    const val = src[key];
+  // Parameters whose default values depend on other parameters.
+  const length = rangeTransport ? rangeTransport.length : src.length ?? NaN;
+  const useSystemFonts =
+    typeof src.useSystemFonts === "boolean"
+      ? src.useSystemFonts
+      : !isNodeJS && !disableFontFace;
+  const useWorkerFetch =
+    typeof src.useWorkerFetch === "boolean"
+      ? src.useWorkerFetch
+      : (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) ||
+        (CMapReaderFactory === DOMCMapReaderFactory &&
+          StandardFontDataFactory === DOMStandardFontDataFactory &&
+          isValidFetchUrl(cMapUrl, document.baseURI) &&
+          isValidFetchUrl(standardFontDataUrl, document.baseURI));

-    switch (key) {
-      case "url":
-        if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
-          continue; // The 'url' is unused with `PDFDataRangeTransport`.
-        } else {
-          if (val instanceof URL) {
-            params[key] = val.href;
-            continue;
-          }
-          try {
-            // The full path is required in the 'url' field.
-            params[key] = new URL(val, window.location).href;
-            continue;
-          } catch (ex) {
-            if (
-              typeof PDFJSDev !== "undefined" &&
-              PDFJSDev.test("GENERIC") &&
-              isNodeJS &&
-              typeof val === "string"
-            ) {
-              break; // Use the url as-is in Node.js environments.
-            }
-          }
-          throw new Error(
-            "Invalid PDF url data: " +
-              "either string or URL-object is expected in the url property."
-          );
-        }
-      case "range":
-        rangeTransport = val;
-        continue;
-      case "worker":
-        worker = val;
-        continue;
-      case "data":
-        // Converting string or array-like data to Uint8Array.
-        if (
-          typeof PDFJSDev !== "undefined" &&
-          PDFJSDev.test("GENERIC") &&
-          isNodeJS &&
-          typeof Buffer !== "undefined" && // eslint-disable-line no-undef
-          val instanceof Buffer // eslint-disable-line no-undef
-        ) {
-          params[key] = new Uint8Array(val);
-        } else if (
-          val instanceof Uint8Array &&
-          val.byteLength === val.buffer.byteLength
-        ) {
-          // Use the data as-is when it's already a Uint8Array that completely
-          // "utilizes" its underlying ArrayBuffer, to prevent any possible
-          // issues when transferring it to the worker-thread.
-          break;
-        } else if (typeof val === "string") {
-          params[key] = stringToBytes(val);
-        } else if (
-          (typeof val === "object" && val !== null && !isNaN(val.length)) ||
-          isArrayBuffer(val)
-        ) {
-          params[key] = new Uint8Array(val);
-        } else {
-          throw new Error(
-            "Invalid PDF binary data: either TypedArray, " +
-              "string, or array-like object is expected in the data property."
-          );
-        }
-        continue;
-    }
-    params[key] = val;
-  }
-
-  params.cMapPacked = params.cMapPacked !== false;
-  params.CMapReaderFactory =
-    params.CMapReaderFactory || DefaultCMapReaderFactory;
-  params.StandardFontDataFactory =
-    params.StandardFontDataFactory || DefaultStandardFontDataFactory;
-  params.ignoreErrors = params.stopAtErrors !== true;
-  params.fontExtraProperties = params.fontExtraProperties === true;
-  params.pdfBug = params.pdfBug === true;
-  params.enableXfa = params.enableXfa === true;
-
-  if (!Number.isInteger(params.rangeChunkSize) || params.rangeChunkSize < 1) {
-    params.rangeChunkSize = DEFAULT_RANGE_CHUNK_SIZE;
-  }
-  if (
-    typeof params.docBaseUrl !== "string" ||
-    isDataScheme(params.docBaseUrl)
-  ) {
-    // Ignore "data:"-URLs, since they can't be used to recover valid absolute
-    // URLs anyway. We want to avoid sending them to the worker-thread, since
-    // they contain the *entire* PDF document and can thus be arbitrarily long.
-    params.docBaseUrl = null;
-  }
-  if (!Number.isInteger(params.maxImageSize) || params.maxImageSize < -1) {
-    params.maxImageSize = -1;
-  }
-  if (typeof params.cMapUrl !== "string") {
-    params.cMapUrl = null;
-  }
-  if (typeof params.standardFontDataUrl !== "string") {
-    params.standardFontDataUrl = null;
-  }
-  if (typeof params.useWorkerFetch !== "boolean") {
-    params.useWorkerFetch =
-      (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) ||
-      (params.CMapReaderFactory === DOMCMapReaderFactory &&
-        params.StandardFontDataFactory === DOMStandardFontDataFactory &&
-        isValidFetchUrl(params.cMapUrl, document.baseURI) &&
-        isValidFetchUrl(params.standardFontDataUrl, document.baseURI));
-  }
-  if (typeof params.isEvalSupported !== "boolean") {
-    params.isEvalSupported = true;
-  }
-  if (typeof params.isOffscreenCanvasSupported !== "boolean") {
-    params.isOffscreenCanvasSupported = !isNodeJS;
-  }
-  if (typeof params.disableFontFace !== "boolean") {
-    params.disableFontFace = isNodeJS;
-  }
-  if (typeof params.useSystemFonts !== "boolean") {
-    params.useSystemFonts = !isNodeJS && !params.disableFontFace;
-  }
-  if (
-    typeof params.ownerDocument !== "object" ||
-    params.ownerDocument === null
-  ) {
-    params.ownerDocument = globalThis.document;
-  }
-
-  if (typeof params.disableRange !== "boolean") {
-    params.disableRange = false;
-  }
-  if (typeof params.disableStream !== "boolean") {
-    params.disableStream = false;
-  }
-  if (typeof params.disableAutoFetch !== "boolean") {
-    params.disableAutoFetch = false;
-  }
+  // Parameters only intended for development/testing purposes.
+  const styleElement =
+    typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")
+      ? src.styleElement
+      : null;

  // Set the main-thread verbosity level.
-  setVerbosityLevel(params.verbosity);
+  setVerbosityLevel(verbosity);

  if (!worker) {
    const workerParams = {
-      verbosity: params.verbosity,
+      verbosity,
      port: GlobalWorkerOptions.workerPort,
    };
    // Worker was not provided -- creating and owning our own. If message port
@ -435,44 +360,67 @@ function getDocument(src) {
    task._worker = worker;
  }
  const docId = task.docId;
+
+  const params = {
+    data,
+    password,
+    rangeChunkSize,
+    length,
+    docBaseUrl,
+    cMapUrl,
+    cMapPacked,
+    CMapReaderFactory,
+    standardFontDataUrl,
+    StandardFontDataFactory,
+    ignoreErrors,
+    maxImageSize,
+    isEvalSupported,
+    isOffscreenCanvasSupported,
+    disableFontFace,
+    fontExtraProperties,
+    enableXfa,
+    ownerDocument,
+    disableAutoFetch,
+    pdfBug,
+    useSystemFonts,
+    useWorkerFetch,
+    styleElement,
+  };
+
  worker.promise
    .then(function () {
      if (task.destroyed) {
        throw new Error("Loading aborted");
      }

-      const workerIdPromise = _fetchDocument(
-        worker,
-        params,
-        rangeTransport,
-        docId
-      );
+      const workerIdPromise = _fetchDocument(worker, params, docId);
      const networkStreamPromise = new Promise(function (resolve) {
        let networkStream;
        if (rangeTransport) {
          networkStream = new PDFDataTransportStream(
            {
-              length: params.length,
-              initialData: params.initialData,
-              progressiveDone: params.progressiveDone,
-              contentDispositionFilename: params.contentDispositionFilename,
-              disableRange: params.disableRange,
-              disableStream: params.disableStream,
+              length,
+              initialData: rangeTransport.initialData,
+              progressiveDone: rangeTransport.progressiveDone,
+              contentDispositionFilename:
+                rangeTransport.contentDispositionFilename,
+              disableRange,
+              disableStream,
            },
            rangeTransport
          );
-        } else if (!params.data) {
+        } else if (!data) {
          if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
            throw new Error("Not implemented: createPDFNetworkStream");
          }
          networkStream = createPDFNetworkStream({
-            url: params.url,
-            length: params.length,
-            httpHeaders: params.httpHeaders,
-            withCredentials: params.withCredentials,
-            rangeChunkSize: params.rangeChunkSize,
-            disableRange: params.disableRange,
-            disableStream: params.disableStream,
+            url,
+            length,
+            httpHeaders,
+            withCredentials,
+            rangeChunkSize,
+            disableRange,
+            disableStream,
          });
        }
        resolve(networkStream);
@ -510,24 +458,15 @@ function getDocument(src) {
 *
 * @param {PDFWorker} worker
 * @param {Object} source
- * @param {PDFDataRangeTransport} pdfDataRangeTransport
 * @param {string} docId - Unique document ID, used in `MessageHandler`.
 * @returns {Promise<string>} A promise that is resolved when the worker ID of
 *   the `MessageHandler` is known.
 * @private
 */
-async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
+async function _fetchDocument(worker, source, docId) {
  if (worker.destroyed) {
    throw new Error("Worker was destroyed");
  }
-
-  if (pdfDataRangeTransport) {
-    source.length = pdfDataRangeTransport.length;
-    source.initialData = pdfDataRangeTransport.initialData;
-    source.progressiveDone = pdfDataRangeTransport.progressiveDone;
-    source.contentDispositionFilename =
-      pdfDataRangeTransport.contentDispositionFilename;
-  }
  const transfers = source.data ? [source.data.buffer] : null;

  const workerId = await worker.messageHandler.sendWithPromise(
@ -569,6 +508,63 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
  return workerId;
 }

+function getUrlProp(val) {
+  if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
+    return null; // The 'url' is unused with `PDFDataRangeTransport`.
+  } else if (val instanceof URL) {
+    return val.href;
+  }
+  try {
+    // The full path is required in the 'url' field.
+    return new URL(val, window.location).href;
+  } catch (ex) {
+    if (
+      typeof PDFJSDev !== "undefined" &&
+      PDFJSDev.test("GENERIC") &&
+      isNodeJS &&
+      typeof val === "string"
+    ) {
+      return val; // Use the url as-is in Node.js environments.
+    }
+  }
+  throw new Error(
+    "Invalid PDF url data: " +
+      "either string or URL-object is expected in the url property."
+  );
+}
+
+function getDataProp(val) {
+  // Converting string or array-like data to Uint8Array.
+  if (
+    typeof PDFJSDev !== "undefined" &&
+    PDFJSDev.test("GENERIC") &&
+    isNodeJS &&
+    typeof Buffer !== "undefined" && // eslint-disable-line no-undef
+    val instanceof Buffer // eslint-disable-line no-undef
+  ) {
+    return new Uint8Array(val);
+  } else if (
+    val instanceof Uint8Array &&
+    val.byteLength === val.buffer.byteLength
+  ) {
+    // Use the data as-is when it's already a Uint8Array that completely
+    // "utilizes" its underlying ArrayBuffer, to prevent any possible
+    // issues when transferring it to the worker-thread.
+    return val;
+  } else if (typeof val === "string") {
+    return stringToBytes(val);
+  } else if (
+    (typeof val === "object" && !isNaN(val?.length)) ||
+    isArrayBuffer(val)
+  ) {
+    return new Uint8Array(val);
+  }
+  throw new Error(
+    "Invalid PDF binary data: either TypedArray, " +
+      "string, or array-like object is expected in the data property."
+  );
+}
+
 /**
 * @typedef {Object} OnProgressParameters
 * @property {number} loaded - Currently loaded number of bytes.
--- a/web/app.js
+++ b/web/app.js
@ -932,12 +932,10 @@ const PDFViewerApplication = {
    ) {
      // The Firefox built-in viewer always calls `setTitleUsingUrl`, before
      // `initPassiveLoading`, and it never provides an `originalUrl` here.
-      if (args.originalUrl) {
-        this.setTitleUsingUrl(args.originalUrl, /* downloadUrl = */ args.url);
-        delete args.originalUrl;
-      } else {
-        this.setTitleUsingUrl(args.url, /* downloadUrl = */ args.url);
-      }
+      this.setTitleUsingUrl(
+        args.originalUrl || args.url,
+        /* downloadUrl = */ args.url
+      );
    }
    // Set the necessary API parameters, using all the available options.
    const apiParams = AppOptions.getAll(OptionKind.API);