From cee97fcd15cda22953f1eacf42f82a0039f362b7 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 11 Jan 2023 22:03:36 +0100 Subject: [PATCH] [api-minor] Enabling transferring of data fetched with the `PDFFetchStream` implementation Note how in the API we're transferring the PDF data that's fetched over the network[1]: - https://github.com/mozilla/pdf.js/blob/f28bf23a314e36d9e255037f5716ae1eb8e16fbf/src/display/api.js#L2467-L2480 - https://github.com/mozilla/pdf.js/blob/f28bf23a314e36d9e255037f5716ae1eb8e16fbf/src/display/api.js#L2553-L2564 To support that functionality we have the `PDFDataTransportStream`, `PDFFetchStream`, `PDFNetworkStream`, and `PDFNodeStream` implementations. Here these stream-implementations vary slightly in how they handle `ArrayBuffer`s internally, w.r.t. transferring or copying the data: - In `PDFDataTransportStream` we optionally, after PR 15908, allow transferring of the PDF data as provided externally (used e.g. in the Firefox PDF Viewer). - In `PDFFetchStream` we're currenly always copying the PDF data returned by the Fetch API, which seems unnecessary. As discussed in PR 15908, it'd seem very weird if this sort of browser API didn't allow transferring of the returned data. - In `PDFNetworkStream` we're already, since many years, transferring the PDF data returned by the `XMLHttpRequest` functionality. Note how the `getArrayBuffer` helper function simply returns an `ArrayBuffer` response as-is. - In `PDFNodeStream` we're currently copying the PDF data, however this is unfortunately necessary since Node.js returns data as a `Buffer` object[2]. Given that the `PDFNetworkStream` has been, indirectly, supporting transferring of PDF data for years it would seem really strange if this didn't also apply to the `PDFFetchStream`-implementation. Hence this patch simply enables transferring of PDF data, when accessed using the Fetch API, unconditionally to help reduced main-thread memory usage since the `PDFFetchStream`-implementation is used *by default* in browsers (for the GENERIC build). --- [1] As opposed to PDF data being provided as e.g. a TypedArray when calling `getDocument` in the API. [2] This is a "special" Node.js object, see https://nodejs.org/api/buffer.html#buffer, which doesn't exist in browsers. --- src/display/api.js | 8 +++----- src/display/fetch_stream.js | 18 ++++++++++++++---- src/display/network.js | 3 +-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/display/api.js b/src/display/api.js index 86564b63e..faa4ffe3c 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -192,9 +192,7 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { * @property {boolean} [transferPdfData] - Determines if we can transfer * TypedArrays used for loading the PDF file, utilized together with: * - The `data`-option, for the `getDocument` function. - * - The `initialData`-option, for the `PDFDataRangeTransport` constructor. - * - The `chunk`-option, for the `PDFDataTransportStream._onReceiveData` - * method. + * - The `PDFDataTransportStream` implementation. * This will help reduce main-thread memory usage, however it will take * ownership of the TypedArrays. The default value is `false`. * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings @@ -2472,7 +2470,7 @@ class WorkerTransport { return; } assert( - isArrayBuffer(value), + value instanceof ArrayBuffer, "GetReader - expected an ArrayBuffer." ); // Enqueue data chunk into sink, and transfer it @@ -2558,7 +2556,7 @@ class WorkerTransport { return; } assert( - isArrayBuffer(value), + value instanceof ArrayBuffer, "GetRangeReader - expected an ArrayBuffer." ); sink.enqueue(new Uint8Array(value), 1, [value]); diff --git a/src/display/fetch_stream.js b/src/display/fetch_stream.js index a2d19f563..6680b067f 100644 --- a/src/display/fetch_stream.js +++ b/src/display/fetch_stream.js @@ -17,6 +17,7 @@ import { AbortException, assert, createPromiseCapability, + warn, } from "../shared/util.js"; import { createResponseStatusError, @@ -54,6 +55,17 @@ function createHeaders(httpHeaders) { return headers; } +function getArrayBuffer(val) { + if (val instanceof Uint8Array) { + return val.buffer; + } + if (val instanceof ArrayBuffer) { + return val; + } + warn(`getArrayBuffer - unexpected data format: ${val}`); + return new Uint8Array(val).buffer; +} + /** @implements {IPDFStream} */ class PDFFetchStream { constructor(source) { @@ -195,8 +207,7 @@ class PDFFetchStreamReader { total: this._contentLength, }); - const buffer = new Uint8Array(value).buffer; - return { value: buffer, done: false }; + return { value: getArrayBuffer(value), done: false }; } cancel(reason) { @@ -254,8 +265,7 @@ class PDFFetchStreamRangeReader { this._loaded += value.byteLength; this.onProgress?.({ loaded: this._loaded }); - const buffer = new Uint8Array(value).buffer; - return { value: buffer, done: false }; + return { value: getArrayBuffer(value), done: false }; } cancel(reason) { diff --git a/src/display/network.js b/src/display/network.js index 59d6bc933..00c895af3 100644 --- a/src/display/network.js +++ b/src/display/network.js @@ -38,8 +38,7 @@ function getArrayBuffer(xhr) { if (typeof data !== "string") { return data; } - const array = stringToBytes(data); - return array.buffer; + return stringToBytes(data).buffer; } class NetworkManager {