Merge pull request #13166 from Snuffleupagus/getDocument-URL

[api-minor] Support proper `URL`-objects, in addition to URL-strings, in `getDocument`
This commit is contained in:
Tim van der Meij 2021-03-31 21:20:08 +02:00 committed by GitHub
commit 5be0fbe8f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 100 additions and 56 deletions

View File

@ -111,7 +111,7 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
* Document initialization / loading parameters object.
*
* @typedef {Object} DocumentInitParameters
* @property {string} [url] - The URL of the PDF.
* @property {string|URL} [url] - The URL of the PDF.
* @property {TypedArray|Array<number>|string} [data] - Binary PDF data. Use
* typed arrays (Uint8Array) to improve the memory usage. If PDF data is
* BASE64-encoded, use `atob()` to convert it to a binary string first.
@ -185,16 +185,6 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
* (see `web/debugger.js`). The default value is `false`.
*/
/**
* @typedef {Object} PDFDocumentStats
* @property {Object<string, boolean>} streamTypes - Used stream types in the
* document (an item is set to true if specific stream ID was used in the
* document).
* @property {Object<string, boolean>} fontTypes - Used font types in the
* document (an item is set to true if specific font ID was used in the
* document).
*/
/**
* This is the main entry point for loading a PDF and interacting with it.
*
@ -202,16 +192,16 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) {
* XHR as fallback) is used, which means it must follow same origin rules,
* e.g. no cross-domain requests without CORS.
*
* @param {string|TypedArray|DocumentInitParameters|PDFDataRangeTransport} src -
* Can be a URL to where a PDF file is located, a typed array (Uint8Array)
* already populated with data or parameter object.
* @param {string|URL|TypedArray|PDFDataRangeTransport|DocumentInitParameters}
* src - Can be a URL where a PDF file is located, a typed array (Uint8Array)
* already populated with data, or a parameter object.
* @returns {PDFDocumentLoadingTask}
*/
function getDocument(src) {
const task = new PDFDocumentLoadingTask();
let source;
if (typeof src === "string") {
if (typeof src === "string" || src instanceof URL) {
source = { url: src };
} else if (isArrayBuffer(src)) {
source = { data: src };
@ -221,7 +211,7 @@ function getDocument(src) {
if (typeof src !== "object") {
throw new Error(
"Invalid parameter in getDocument, " +
"need either Uint8Array, string or a parameter object"
"need either string, URL, Uint8Array, or parameter object."
);
}
if (!src.url && !src.data && !src.range) {
@ -236,49 +226,63 @@ function getDocument(src) {
worker = null;
for (const key in source) {
if (key === "url" && typeof window !== "undefined") {
// The full path is required in the 'url' field.
params[key] = new URL(source[key], window.location).href;
continue;
} else if (key === "range") {
rangeTransport = source[key];
continue;
} else if (key === "worker") {
worker = source[key];
continue;
} else if (key === "data") {
// Converting string or array-like data to Uint8Array.
const pdfBytes = source[key];
if (
typeof PDFJSDev !== "undefined" &&
PDFJSDev.test("GENERIC") &&
isNodeJS &&
typeof Buffer !== "undefined" && // eslint-disable-line no-undef
pdfBytes instanceof Buffer // eslint-disable-line no-undef
) {
params[key] = new Uint8Array(pdfBytes);
} else if (pdfBytes instanceof Uint8Array) {
// Use the data as-is when it's already a Uint8Array.
params[key] = pdfBytes;
} else if (typeof pdfBytes === "string") {
params[key] = stringToBytes(pdfBytes);
} else if (
typeof pdfBytes === "object" &&
pdfBytes !== null &&
!isNaN(pdfBytes.length)
) {
params[key] = new Uint8Array(pdfBytes);
} else if (isArrayBuffer(pdfBytes)) {
params[key] = new Uint8Array(pdfBytes);
} else {
const value = source[key];
switch (key) {
case "url":
if (typeof window !== "undefined") {
try {
// The full path is required in the 'url' field.
params[key] = new URL(value, window.location).href;
continue;
} catch (ex) {
warn(`Cannot create valid URL: "${ex}".`);
}
} else if (typeof value === "string" || value instanceof URL) {
params[key] = value.toString(); // Support Node.js environments.
continue;
}
throw new Error(
"Invalid PDF binary data: either typed array, " +
"string, or array-like object is expected in the data property."
"Invalid PDF url data: " +
"either string or URL-object is expected in the url property."
);
}
continue;
case "range":
rangeTransport = value;
continue;
case "worker":
worker = value;
continue;
case "data":
// Converting string or array-like data to Uint8Array.
if (
typeof PDFJSDev !== "undefined" &&
PDFJSDev.test("GENERIC") &&
isNodeJS &&
typeof Buffer !== "undefined" && // eslint-disable-line no-undef
value instanceof Buffer // eslint-disable-line no-undef
) {
params[key] = new Uint8Array(value);
} else if (value instanceof Uint8Array) {
break; // Use the data as-is when it's already a Uint8Array.
} else if (typeof value === "string") {
params[key] = stringToBytes(value);
} else if (
typeof value === "object" &&
value !== null &&
!isNaN(value.length)
) {
params[key] = new Uint8Array(value);
} else if (isArrayBuffer(value)) {
params[key] = new Uint8Array(value);
} else {
throw new Error(
"Invalid PDF binary data: either typed array, " +
"string, or array-like object is expected in the data property."
);
}
continue;
}
params[key] = source[key];
params[key] = value;
}
params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE;
@ -891,6 +895,16 @@ class PDFDocumentProxy {
return this._transport.downloadInfoCapability.promise;
}
/**
* @typedef {Object} PDFDocumentStats
* @property {Object<string, boolean>} streamTypes - Used stream types in the
* document (an item is set to true if specific stream ID was used in the
* document).
* @property {Object<string, boolean>} fontTypes - Used font types in the
* document (an item is set to true if specific font ID was used in the
* document).
*/
/**
* @returns {Promise<PDFDocumentStats>} A promise this is resolved with
* current statistics about document structures (see

View File

@ -73,6 +73,36 @@ describe("api", function () {
}
describe("getDocument", function () {
it("creates pdf doc from URL-string", async function () {
const urlStr = TEST_PDFS_PATH + basicApiFileName;
const loadingTask = getDocument(urlStr);
const pdfDocument = await loadingTask.promise;
expect(typeof urlStr).toEqual("string");
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3);
await loadingTask.destroy();
});
it("creates pdf doc from URL-object", async function () {
if (isNodeJS) {
pending("window.location is not supported in Node.js.");
}
const urlObj = new URL(
TEST_PDFS_PATH + basicApiFileName,
window.location
);
const loadingTask = getDocument(urlObj);
const pdfDocument = await loadingTask.promise;
expect(urlObj instanceof URL).toEqual(true);
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3);
await loadingTask.destroy();
});
it("creates pdf doc from URL", function (done) {
const loadingTask = getDocument(basicApiGetDocumentParams);