diff --git a/src/display/api.js b/src/display/api.js index cc8357c6d..a90e8bcd6 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -139,8 +139,12 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { * @typedef {Object} DocumentInitParameters * @property {string | URL} [url] - The URL of the PDF. * @property {BinaryData} [data] - Binary PDF data. - * Use typed arrays (Uint8Array) to improve the memory usage. If PDF data is + * Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is * BASE64-encoded, use `atob()` to convert it to a binary string first. + * + * NOTE: If TypedArrays are used they will generally be transferred to the + * worker-thread. This will help reduce main-thread memory usage, however + * it will take ownership of the TypedArrays. * @property {Object} [httpHeaders] - Basic authentication headers. * @property {boolean} [withCredentials] - Indicates whether or not * cross-site Access-Control requests should be made using credentials such @@ -189,12 +193,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { * @property {number} [maxImageSize] - The maximum allowed image size in total * pixels, i.e. width * height. Images above this value will not be rendered. * Use -1 for no limit, which is also the default value. - * @property {boolean} [transferPdfData] - Determines if we can transfer - * TypedArrays used for loading the PDF file, utilized together with: - * - The `data`-option, for the `getDocument` function. - * - The `PDFDataTransportStream` implementation. - * This will help reduce main-thread memory usage, however it will take - * ownership of the TypedArrays. The default value is `false`. * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings * as JavaScript. Primarily used to improve performance of font rendering, and * when parsing PDF functions. The default value is `true`. @@ -281,20 +279,20 @@ function getDocument(src) { worker = null; for (const key in source) { - const value = source[key]; + const val = source[key]; switch (key) { case "url": if (typeof window !== "undefined") { try { // The full path is required in the 'url' field. - params[key] = new URL(value, window.location).href; + params[key] = new URL(val, window.location).href; continue; } catch (ex) { warn(`Cannot create valid URL: "${ex}".`); } - } else if (typeof value === "string" || value instanceof URL) { - params[key] = value.toString(); // Support Node.js environments. + } else if (typeof val === "string" || val instanceof URL) { + params[key] = val.toString(); // Support Node.js environments. continue; } throw new Error( @@ -302,10 +300,10 @@ function getDocument(src) { "either string or URL-object is expected in the url property." ); case "range": - rangeTransport = value; + rangeTransport = val; continue; case "worker": - worker = value; + worker = val; continue; case "data": // Converting string or array-like data to Uint8Array. @@ -314,21 +312,24 @@ function getDocument(src) { PDFJSDev.test("GENERIC") && isNodeJS && typeof Buffer !== "undefined" && // eslint-disable-line no-undef - value instanceof Buffer // eslint-disable-line no-undef + val instanceof Buffer // eslint-disable-line no-undef ) { - params[key] = new Uint8Array(value); - } else if (value instanceof Uint8Array) { - break; // Use the data as-is when it's already a Uint8Array. - } else if (typeof value === "string") { - params[key] = stringToBytes(value); + params[key] = new Uint8Array(val); } else if ( - typeof value === "object" && - value !== null && - !isNaN(value.length) + val instanceof Uint8Array && + val.byteLength === val.buffer.byteLength ) { - params[key] = new Uint8Array(value); - } else if (isArrayBuffer(value)) { - params[key] = new Uint8Array(value); + // Use the data as-is when it's already a Uint8Array that completely + // "utilizes" its underlying ArrayBuffer, to prevent any possible + // issues when transferring it to the worker-thread. + break; + } else if (typeof val === "string") { + params[key] = stringToBytes(val); + } else if ( + (typeof val === "object" && val !== null && !isNaN(val.length)) || + isArrayBuffer(val) + ) { + params[key] = new Uint8Array(val); } else { throw new Error( "Invalid PDF binary data: either TypedArray, " + @@ -337,7 +338,7 @@ function getDocument(src) { } continue; } - params[key] = value; + params[key] = val; } params.CMapReaderFactory = @@ -345,7 +346,6 @@ function getDocument(src) { params.StandardFontDataFactory = params.StandardFontDataFactory || DefaultStandardFontDataFactory; params.ignoreErrors = params.stopAtErrors !== true; - params.transferPdfData = params.transferPdfData === true; params.fontExtraProperties = params.fontExtraProperties === true; params.pdfBug = params.pdfBug === true; params.enableXfa = params.enableXfa === true; @@ -443,7 +443,6 @@ function getDocument(src) { { length: params.length, initialData: params.initialData, - transferPdfData: params.transferPdfData, progressiveDone: params.progressiveDone, contentDispositionFilename: params.contentDispositionFilename, disableRange: params.disableRange, @@ -518,8 +517,7 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { source.contentDispositionFilename = pdfDataRangeTransport.contentDispositionFilename; } - const transfers = - source.transferPdfData && source.data ? [source.data.buffer] : null; + const transfers = source.data ? [source.data.buffer] : null; const workerId = await worker.messageHandler.sendWithPromise( "GetDocRequest", @@ -659,6 +657,10 @@ class PDFDocumentLoadingTask { /** * Abstract class to support range requests file loading. + * + * NOTE: The TypedArrays passed to the constructor and relevant methods below + * will generally be transferred to the worker-thread. This will help reduce + * main-thread memory usage, however it will take ownership of the TypedArrays. */ class PDFDataRangeTransport { /** diff --git a/src/display/transport_stream.js b/src/display/transport_stream.js index 38460d55d..b27dbaf7d 100644 --- a/src/display/transport_stream.js +++ b/src/display/transport_stream.js @@ -18,13 +18,10 @@ import { isPdfFile } from "./display_utils.js"; /** @implements {IPDFStream} */ class PDFDataTransportStream { - #transferPdfData = false; - constructor( { length, initialData, - transferPdfData = false, progressiveDone = false, contentDispositionFilename = null, disableRange = false, @@ -38,14 +35,17 @@ class PDFDataTransportStream { ); this._queuedChunks = []; - this.#transferPdfData = transferPdfData; this._progressiveDone = progressiveDone; this._contentDispositionFilename = contentDispositionFilename; if (initialData?.length > 0) { - const buffer = this.#transferPdfData - ? initialData.buffer - : new Uint8Array(initialData).buffer; + // Prevent any possible issues by only transferring a Uint8Array that + // completely "utilizes" its underlying ArrayBuffer. + const buffer = + initialData instanceof Uint8Array && + initialData.byteLength === initialData.buffer.byteLength + ? initialData.buffer + : new Uint8Array(initialData).buffer; this._queuedChunks.push(buffer); } @@ -77,8 +77,11 @@ class PDFDataTransportStream { } _onReceiveData({ begin, chunk }) { + // Prevent any possible issues by only transferring a Uint8Array that + // completely "utilizes" its underlying ArrayBuffer. const buffer = - this.#transferPdfData && chunk?.length >= 0 + chunk instanceof Uint8Array && + chunk.byteLength === chunk.buffer.byteLength ? chunk.buffer : new Uint8Array(chunk).buffer; diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 53cb12de9..0d373d18f 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -193,44 +193,10 @@ describe("api", function () { expect(data[0] instanceof PDFDocumentProxy).toEqual(true); expect(data[1].loaded / data[1].total).toEqual(1); - // Check that the TypedArray wasn't transferred. - expect(typedArrayPdf.length).toEqual(basicApiFileLength); - - await loadingTask.destroy(); - }); - - it("creates pdf doc from TypedArray, with `transferPdfData` set", async function () { - if (isNodeJS) { - pending("Worker is not supported in Node.js."); + if (!isNodeJS) { + // Check that the TypedArray was transferred. + expect(typedArrayPdf.length).toEqual(0); } - const typedArrayPdf = await DefaultFileReaderFactory.fetch({ - path: TEST_PDFS_PATH + basicApiFileName, - }); - - // Sanity check to make sure that we fetched the entire PDF file. - expect(typedArrayPdf instanceof Uint8Array).toEqual(true); - expect(typedArrayPdf.length).toEqual(basicApiFileLength); - - const loadingTask = getDocument({ - data: typedArrayPdf, - transferPdfData: true, - }); - expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); - - const progressReportedCapability = createPromiseCapability(); - loadingTask.onProgress = function (data) { - progressReportedCapability.resolve(data); - }; - - const data = await Promise.all([ - loadingTask.promise, - progressReportedCapability.promise, - ]); - expect(data[0] instanceof PDFDocumentProxy).toEqual(true); - expect(data[1].loaded / data[1].total).toEqual(1); - - // Check that the TypedArray was transferred. - expect(typedArrayPdf.length).toEqual(0); await loadingTask.destroy(); }); @@ -259,6 +225,11 @@ describe("api", function () { expect(data[0] instanceof PDFDocumentProxy).toEqual(true); expect(data[1].loaded / data[1].total).toEqual(1); + if (!isNodeJS) { + // Check that the ArrayBuffer was transferred. + expect(arrayBufferPdf.byteLength).toEqual(0); + } + await loadingTask.destroy(); }); @@ -3275,16 +3246,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) it("should fetch document info and page using ranges", async function () { const initialDataLength = 4000; + const subArrays = []; let fetches = 0; const data = await dataPromise; - const initialData = data.subarray(0, initialDataLength); + const initialData = new Uint8Array(data.subarray(0, initialDataLength)); + subArrays.push(initialData); + const transport = new PDFDataRangeTransport(data.length, initialData); transport.requestDataRange = function (begin, end) { fetches++; waitSome(function () { - transport.onDataProgress(4000); - transport.onDataRange(begin, data.subarray(begin, end)); + const chunk = new Uint8Array(data.subarray(begin, end)); + subArrays.push(chunk); + + transport.onDataProgress(initialDataLength); + transport.onDataRange(begin, chunk); }); }; @@ -3296,65 +3273,40 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) expect(pdfPage.rotate).toEqual(0); expect(fetches).toBeGreaterThan(2); - // Check that the TypedArray wasn't transferred. - expect(initialData.length).toEqual(initialDataLength); - - await loadingTask.destroy(); - }); - - it("should fetch document info and page using ranges, with `transferPdfData` set", async function () { - if (isNodeJS) { - pending("Worker is not supported in Node.js."); + if (!isNodeJS) { + // Check that the TypedArrays were transferred. + for (const array of subArrays) { + expect(array.length).toEqual(0); + } } - const initialDataLength = 4000; - let fetches = 0; - - const data = await dataPromise; - const initialData = new Uint8Array(data.subarray(0, initialDataLength)); - const transport = new PDFDataRangeTransport(data.length, initialData); - transport.requestDataRange = function (begin, end) { - fetches++; - waitSome(function () { - transport.onDataProgress(4000); - transport.onDataRange( - begin, - new Uint8Array(data.subarray(begin, end)) - ); - }); - }; - - const loadingTask = getDocument({ - range: transport, - transferPdfData: true, - }); - const pdfDocument = await loadingTask.promise; - expect(pdfDocument.numPages).toEqual(14); - - const pdfPage = await pdfDocument.getPage(10); - expect(pdfPage.rotate).toEqual(0); - expect(fetches).toBeGreaterThan(2); - - // Check that the TypedArray was transferred. - expect(initialData.length).toEqual(0); await loadingTask.destroy(); }); it("should fetch document info and page using range and streaming", async function () { const initialDataLength = 4000; + const subArrays = []; let fetches = 0; const data = await dataPromise; - const initialData = data.subarray(0, initialDataLength); + const initialData = new Uint8Array(data.subarray(0, initialDataLength)); + subArrays.push(initialData); + const transport = new PDFDataRangeTransport(data.length, initialData); transport.requestDataRange = function (begin, end) { fetches++; if (fetches === 1) { + const chunk = new Uint8Array(data.subarray(initialDataLength)); + subArrays.push(chunk); + // Send rest of the data on first range request. - transport.onDataProgressiveRead(data.subarray(initialDataLength)); + transport.onDataProgressiveRead(chunk); } waitSome(function () { - transport.onDataRange(begin, data.subarray(begin, end)); + const chunk = new Uint8Array(data.subarray(begin, end)); + subArrays.push(chunk); + + transport.onDataRange(begin, chunk); }); }; @@ -3369,6 +3321,14 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) await new Promise(resolve => { waitSome(resolve); }); + + if (!isNodeJS) { + // Check that the TypedArrays were transferred. + for (const array of subArrays) { + expect(array.length).toEqual(0); + } + } + await loadingTask.destroy(); }); @@ -3376,12 +3336,16 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) "should fetch document info and page, without range, " + "using complete initialData", async function () { + const subArrays = []; let fetches = 0; const data = await dataPromise; + const initialData = new Uint8Array(data); + subArrays.push(initialData); + const transport = new PDFDataRangeTransport( data.length, - data, + initialData, /* progressiveDone = */ true ); transport.requestDataRange = function (begin, end) { @@ -3399,6 +3363,13 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) expect(pdfPage.rotate).toEqual(0); expect(fetches).toEqual(0); + if (!isNodeJS) { + // Check that the TypedArrays were transferred. + for (const array of subArrays) { + expect(array.length).toEqual(0); + } + } + await loadingTask.destroy(); } ); diff --git a/web/app_options.js b/web/app_options.js index bc98b00db..d0cafe242 100644 --- a/web/app_options.js +++ b/web/app_options.js @@ -270,11 +270,6 @@ const defaultOptions = { : "../web/standard_fonts/", kind: OptionKind.API, }, - transferPdfData: { - /** @type {boolean} */ - value: typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"), - kind: OptionKind.API, - }, verbosity: { /** @type {number} */ value: 1,