From db1e1612df8d029134f8859647532350317390e2 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 31 Mar 2021 16:21:41 +0200 Subject: [PATCH] [api-minor] Support proper `URL`-objects, in addition to URL-strings, in `getDocument` Currently only URL-strings are officially supported by `getDocument`, however at this point in time I cannot really see any compelling reason to not support `URL`-objects as well. Most likely the reason that we've don't already support `URL`-objects, in `getDocument`, is that historically `URL` wasn't fully implemented across browsers and our old polyfill wasn't perfect; see https://developer.mozilla.org/en-US/docs/Web/API/URL/URL#browser_compatibility *Please note:* Because of how the `url` parameter is currently handled, there's actually *some* cases where passing a `URL`-object to `getDocument` already works. That, in my opinion, provides additional motivation for supporting `URL`-objects officially, since it makes the API more consistent. The following is an attempt to summarize the *current* situation, based on the actual code rather than the JSDocs: - `getDocument("url string")` works and is documented.[1] - `getDocument({ url: "url string", })` works and is documented.[1] - `getDocument(new URL(...))` throws immediately, since no supported parameters are found. - `getDocument({ url: new URL(...), })` actually works even though it's not documented.[1] Originally, when data was fetched on the worker-thread, this would likely have thrown since `URL` isn't clonable.[2] - `getDocument({ url: { abc: 123, }, })`, or some similarily meaningless input, will be "accepted" by `getDocument` and then throw a `MissingPDFException` when attempting to fetch the bogus data. With the changes in this patch, not only is `URL`-objects now officially supported and documented when calling `getDocument`, but we'll also do a much better job at actually validating any URL-data passed to `getDocument` (and instead fail early). --- [1] In *browsers*, we create a valid URL thus indirectly validating the input. In Node.js environments, on the other hand, no validation is done since obtaining a baseUrl is more difficult (and PDF.js is primarily written for browsers anyway). [2] https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types --- src/display/api.js | 28 +++++++++++++++++++--------- test/unit/api_spec.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index 15b605c84..6549d85ab 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -111,7 +111,7 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) { * Document initialization / loading parameters object. * * @typedef {Object} DocumentInitParameters - * @property {string} [url] - The URL of the PDF. + * @property {string|URL} [url] - The URL of the PDF. * @property {TypedArray|Array|string} [data] - Binary PDF data. Use * typed arrays (Uint8Array) to improve the memory usage. If PDF data is * BASE64-encoded, use `atob()` to convert it to a binary string first. @@ -192,16 +192,16 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) { * XHR as fallback) is used, which means it must follow same origin rules, * e.g. no cross-domain requests without CORS. * - * @param {string|TypedArray|DocumentInitParameters|PDFDataRangeTransport} src - - * Can be a URL to where a PDF file is located, a typed array (Uint8Array) - * already populated with data or parameter object. + * @param {string|URL|TypedArray|PDFDataRangeTransport|DocumentInitParameters} + * src - Can be a URL where a PDF file is located, a typed array (Uint8Array) + * already populated with data, or a parameter object. * @returns {PDFDocumentLoadingTask} */ function getDocument(src) { const task = new PDFDocumentLoadingTask(); let source; - if (typeof src === "string") { + if (typeof src === "string" || src instanceof URL) { source = { url: src }; } else if (isArrayBuffer(src)) { source = { data: src }; @@ -211,7 +211,7 @@ function getDocument(src) { if (typeof src !== "object") { throw new Error( "Invalid parameter in getDocument, " + - "need either Uint8Array, string or a parameter object" + "need either string, URL, Uint8Array, or parameter object." ); } if (!src.url && !src.data && !src.range) { @@ -231,11 +231,21 @@ function getDocument(src) { switch (key) { case "url": if (typeof window !== "undefined") { - // The full path is required in the 'url' field. - params[key] = new URL(value, window.location).href; + try { + // The full path is required in the 'url' field. + params[key] = new URL(value, window.location).href; + continue; + } catch (ex) { + warn(`Cannot create valid URL: "${ex}".`); + } + } else if (typeof value === "string" || value instanceof URL) { + params[key] = value.toString(); // Support Node.js environments. continue; } - break; + throw new Error( + "Invalid PDF url data: " + + "either string or URL-object is expected in the url property." + ); case "range": rangeTransport = value; continue; diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 791abdad6..22ea78f13 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -73,6 +73,36 @@ describe("api", function () { } describe("getDocument", function () { + it("creates pdf doc from URL-string", async function () { + const urlStr = TEST_PDFS_PATH + basicApiFileName; + const loadingTask = getDocument(urlStr); + const pdfDocument = await loadingTask.promise; + + expect(typeof urlStr).toEqual("string"); + expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true); + expect(pdfDocument.numPages).toEqual(3); + + await loadingTask.destroy(); + }); + + it("creates pdf doc from URL-object", async function () { + if (isNodeJS) { + pending("window.location is not supported in Node.js."); + } + const urlObj = new URL( + TEST_PDFS_PATH + basicApiFileName, + window.location + ); + const loadingTask = getDocument(urlObj); + const pdfDocument = await loadingTask.promise; + + expect(urlObj instanceof URL).toEqual(true); + expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true); + expect(pdfDocument.numPages).toEqual(3); + + await loadingTask.destroy(); + }); + it("creates pdf doc from URL", function (done) { const loadingTask = getDocument(basicApiGetDocumentParams);