Merge pull request #15920 from Snuffleupagus/transfer-pdf-data
[api-minor] Enable transferring of TypedArray PDF data by default (PR 15908 follow-up)
This commit is contained in:
		
						commit
						8f3fa18c93
					
				| @ -139,8 +139,12 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { | ||||
|  * @typedef {Object} DocumentInitParameters | ||||
|  * @property {string | URL} [url] - The URL of the PDF. | ||||
|  * @property {BinaryData} [data] - Binary PDF data. | ||||
|  *   Use typed arrays (Uint8Array) to improve the memory usage. If PDF data is | ||||
|  *   Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is | ||||
|  *   BASE64-encoded, use `atob()` to convert it to a binary string first. | ||||
|  * | ||||
|  *   NOTE: If TypedArrays are used they will generally be transferred to the | ||||
|  *   worker-thread. This will help reduce main-thread memory usage, however | ||||
|  *   it will take ownership of the TypedArrays. | ||||
|  * @property {Object} [httpHeaders] - Basic authentication headers. | ||||
|  * @property {boolean} [withCredentials] - Indicates whether or not | ||||
|  *   cross-site Access-Control requests should be made using credentials such | ||||
| @ -189,12 +193,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { | ||||
|  * @property {number} [maxImageSize] - The maximum allowed image size in total | ||||
|  *   pixels, i.e. width * height. Images above this value will not be rendered. | ||||
|  *   Use -1 for no limit, which is also the default value. | ||||
|  * @property {boolean} [transferPdfData] - Determines if we can transfer | ||||
|  *   TypedArrays used for loading the PDF file, utilized together with: | ||||
|  *    - The `data`-option, for the `getDocument` function. | ||||
|  *    - The `PDFDataTransportStream` implementation. | ||||
|  *   This will help reduce main-thread memory usage, however it will take | ||||
|  *   ownership of the TypedArrays. The default value is `false`. | ||||
|  * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings | ||||
|  *   as JavaScript. Primarily used to improve performance of font rendering, and | ||||
|  *   when parsing PDF functions. The default value is `true`. | ||||
| @ -281,20 +279,20 @@ function getDocument(src) { | ||||
|     worker = null; | ||||
| 
 | ||||
|   for (const key in source) { | ||||
|     const value = source[key]; | ||||
|     const val = source[key]; | ||||
| 
 | ||||
|     switch (key) { | ||||
|       case "url": | ||||
|         if (typeof window !== "undefined") { | ||||
|           try { | ||||
|             // The full path is required in the 'url' field.
 | ||||
|             params[key] = new URL(value, window.location).href; | ||||
|             params[key] = new URL(val, window.location).href; | ||||
|             continue; | ||||
|           } catch (ex) { | ||||
|             warn(`Cannot create valid URL: "${ex}".`); | ||||
|           } | ||||
|         } else if (typeof value === "string" || value instanceof URL) { | ||||
|           params[key] = value.toString(); // Support Node.js environments.
 | ||||
|         } else if (typeof val === "string" || val instanceof URL) { | ||||
|           params[key] = val.toString(); // Support Node.js environments.
 | ||||
|           continue; | ||||
|         } | ||||
|         throw new Error( | ||||
| @ -302,10 +300,10 @@ function getDocument(src) { | ||||
|             "either string or URL-object is expected in the url property." | ||||
|         ); | ||||
|       case "range": | ||||
|         rangeTransport = value; | ||||
|         rangeTransport = val; | ||||
|         continue; | ||||
|       case "worker": | ||||
|         worker = value; | ||||
|         worker = val; | ||||
|         continue; | ||||
|       case "data": | ||||
|         // Converting string or array-like data to Uint8Array.
 | ||||
| @ -314,21 +312,24 @@ function getDocument(src) { | ||||
|           PDFJSDev.test("GENERIC") && | ||||
|           isNodeJS && | ||||
|           typeof Buffer !== "undefined" && // eslint-disable-line no-undef
 | ||||
|           value instanceof Buffer // eslint-disable-line no-undef
 | ||||
|           val instanceof Buffer // eslint-disable-line no-undef
 | ||||
|         ) { | ||||
|           params[key] = new Uint8Array(value); | ||||
|         } else if (value instanceof Uint8Array) { | ||||
|           break; // Use the data as-is when it's already a Uint8Array.
 | ||||
|         } else if (typeof value === "string") { | ||||
|           params[key] = stringToBytes(value); | ||||
|           params[key] = new Uint8Array(val); | ||||
|         } else if ( | ||||
|           typeof value === "object" && | ||||
|           value !== null && | ||||
|           !isNaN(value.length) | ||||
|           val instanceof Uint8Array && | ||||
|           val.byteLength === val.buffer.byteLength | ||||
|         ) { | ||||
|           params[key] = new Uint8Array(value); | ||||
|         } else if (isArrayBuffer(value)) { | ||||
|           params[key] = new Uint8Array(value); | ||||
|           // Use the data as-is when it's already a Uint8Array that completely
 | ||||
|           // "utilizes" its underlying ArrayBuffer, to prevent any possible
 | ||||
|           // issues when transferring it to the worker-thread.
 | ||||
|           break; | ||||
|         } else if (typeof val === "string") { | ||||
|           params[key] = stringToBytes(val); | ||||
|         } else if ( | ||||
|           (typeof val === "object" && val !== null && !isNaN(val.length)) || | ||||
|           isArrayBuffer(val) | ||||
|         ) { | ||||
|           params[key] = new Uint8Array(val); | ||||
|         } else { | ||||
|           throw new Error( | ||||
|             "Invalid PDF binary data: either TypedArray, " + | ||||
| @ -337,7 +338,7 @@ function getDocument(src) { | ||||
|         } | ||||
|         continue; | ||||
|     } | ||||
|     params[key] = value; | ||||
|     params[key] = val; | ||||
|   } | ||||
| 
 | ||||
|   params.CMapReaderFactory = | ||||
| @ -345,7 +346,6 @@ function getDocument(src) { | ||||
|   params.StandardFontDataFactory = | ||||
|     params.StandardFontDataFactory || DefaultStandardFontDataFactory; | ||||
|   params.ignoreErrors = params.stopAtErrors !== true; | ||||
|   params.transferPdfData = params.transferPdfData === true; | ||||
|   params.fontExtraProperties = params.fontExtraProperties === true; | ||||
|   params.pdfBug = params.pdfBug === true; | ||||
|   params.enableXfa = params.enableXfa === true; | ||||
| @ -443,7 +443,6 @@ function getDocument(src) { | ||||
|             { | ||||
|               length: params.length, | ||||
|               initialData: params.initialData, | ||||
|               transferPdfData: params.transferPdfData, | ||||
|               progressiveDone: params.progressiveDone, | ||||
|               contentDispositionFilename: params.contentDispositionFilename, | ||||
|               disableRange: params.disableRange, | ||||
| @ -518,8 +517,7 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { | ||||
|     source.contentDispositionFilename = | ||||
|       pdfDataRangeTransport.contentDispositionFilename; | ||||
|   } | ||||
|   const transfers = | ||||
|     source.transferPdfData && source.data ? [source.data.buffer] : null; | ||||
|   const transfers = source.data ? [source.data.buffer] : null; | ||||
| 
 | ||||
|   const workerId = await worker.messageHandler.sendWithPromise( | ||||
|     "GetDocRequest", | ||||
| @ -659,6 +657,10 @@ class PDFDocumentLoadingTask { | ||||
| 
 | ||||
| /** | ||||
|  * Abstract class to support range requests file loading. | ||||
|  * | ||||
|  * NOTE: The TypedArrays passed to the constructor and relevant methods below | ||||
|  * will generally be transferred to the worker-thread. This will help reduce | ||||
|  * main-thread memory usage, however it will take ownership of the TypedArrays. | ||||
|  */ | ||||
| class PDFDataRangeTransport { | ||||
|   /** | ||||
|  | ||||
| @ -18,13 +18,10 @@ import { isPdfFile } from "./display_utils.js"; | ||||
| 
 | ||||
| /** @implements {IPDFStream} */ | ||||
| class PDFDataTransportStream { | ||||
|   #transferPdfData = false; | ||||
| 
 | ||||
|   constructor( | ||||
|     { | ||||
|       length, | ||||
|       initialData, | ||||
|       transferPdfData = false, | ||||
|       progressiveDone = false, | ||||
|       contentDispositionFilename = null, | ||||
|       disableRange = false, | ||||
| @ -38,14 +35,17 @@ class PDFDataTransportStream { | ||||
|     ); | ||||
| 
 | ||||
|     this._queuedChunks = []; | ||||
|     this.#transferPdfData = transferPdfData; | ||||
|     this._progressiveDone = progressiveDone; | ||||
|     this._contentDispositionFilename = contentDispositionFilename; | ||||
| 
 | ||||
|     if (initialData?.length > 0) { | ||||
|       const buffer = this.#transferPdfData | ||||
|         ? initialData.buffer | ||||
|         : new Uint8Array(initialData).buffer; | ||||
|       // Prevent any possible issues by only transferring a Uint8Array that
 | ||||
|       // completely "utilizes" its underlying ArrayBuffer.
 | ||||
|       const buffer = | ||||
|         initialData instanceof Uint8Array && | ||||
|         initialData.byteLength === initialData.buffer.byteLength | ||||
|           ? initialData.buffer | ||||
|           : new Uint8Array(initialData).buffer; | ||||
|       this._queuedChunks.push(buffer); | ||||
|     } | ||||
| 
 | ||||
| @ -77,8 +77,11 @@ class PDFDataTransportStream { | ||||
|   } | ||||
| 
 | ||||
|   _onReceiveData({ begin, chunk }) { | ||||
|     // Prevent any possible issues by only transferring a Uint8Array that
 | ||||
|     // completely "utilizes" its underlying ArrayBuffer.
 | ||||
|     const buffer = | ||||
|       this.#transferPdfData && chunk?.length >= 0 | ||||
|       chunk instanceof Uint8Array && | ||||
|       chunk.byteLength === chunk.buffer.byteLength | ||||
|         ? chunk.buffer | ||||
|         : new Uint8Array(chunk).buffer; | ||||
| 
 | ||||
|  | ||||
| @ -193,44 +193,10 @@ describe("api", function () { | ||||
|       expect(data[0] instanceof PDFDocumentProxy).toEqual(true); | ||||
|       expect(data[1].loaded / data[1].total).toEqual(1); | ||||
| 
 | ||||
|       // Check that the TypedArray wasn't transferred.
 | ||||
|       expect(typedArrayPdf.length).toEqual(basicApiFileLength); | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| 
 | ||||
|     it("creates pdf doc from TypedArray, with `transferPdfData` set", async function () { | ||||
|       if (isNodeJS) { | ||||
|         pending("Worker is not supported in Node.js."); | ||||
|       if (!isNodeJS) { | ||||
|         // Check that the TypedArray was transferred.
 | ||||
|         expect(typedArrayPdf.length).toEqual(0); | ||||
|       } | ||||
|       const typedArrayPdf = await DefaultFileReaderFactory.fetch({ | ||||
|         path: TEST_PDFS_PATH + basicApiFileName, | ||||
|       }); | ||||
| 
 | ||||
|       // Sanity check to make sure that we fetched the entire PDF file.
 | ||||
|       expect(typedArrayPdf instanceof Uint8Array).toEqual(true); | ||||
|       expect(typedArrayPdf.length).toEqual(basicApiFileLength); | ||||
| 
 | ||||
|       const loadingTask = getDocument({ | ||||
|         data: typedArrayPdf, | ||||
|         transferPdfData: true, | ||||
|       }); | ||||
|       expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); | ||||
| 
 | ||||
|       const progressReportedCapability = createPromiseCapability(); | ||||
|       loadingTask.onProgress = function (data) { | ||||
|         progressReportedCapability.resolve(data); | ||||
|       }; | ||||
| 
 | ||||
|       const data = await Promise.all([ | ||||
|         loadingTask.promise, | ||||
|         progressReportedCapability.promise, | ||||
|       ]); | ||||
|       expect(data[0] instanceof PDFDocumentProxy).toEqual(true); | ||||
|       expect(data[1].loaded / data[1].total).toEqual(1); | ||||
| 
 | ||||
|       // Check that the TypedArray was transferred.
 | ||||
|       expect(typedArrayPdf.length).toEqual(0); | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| @ -259,6 +225,11 @@ describe("api", function () { | ||||
|       expect(data[0] instanceof PDFDocumentProxy).toEqual(true); | ||||
|       expect(data[1].loaded / data[1].total).toEqual(1); | ||||
| 
 | ||||
|       if (!isNodeJS) { | ||||
|         // Check that the ArrayBuffer was transferred.
 | ||||
|         expect(arrayBufferPdf.byteLength).toEqual(0); | ||||
|       } | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| 
 | ||||
| @ -3275,16 +3246,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) | ||||
| 
 | ||||
|     it("should fetch document info and page using ranges", async function () { | ||||
|       const initialDataLength = 4000; | ||||
|       const subArrays = []; | ||||
|       let fetches = 0; | ||||
| 
 | ||||
|       const data = await dataPromise; | ||||
|       const initialData = data.subarray(0, initialDataLength); | ||||
|       const initialData = new Uint8Array(data.subarray(0, initialDataLength)); | ||||
|       subArrays.push(initialData); | ||||
| 
 | ||||
|       const transport = new PDFDataRangeTransport(data.length, initialData); | ||||
|       transport.requestDataRange = function (begin, end) { | ||||
|         fetches++; | ||||
|         waitSome(function () { | ||||
|           transport.onDataProgress(4000); | ||||
|           transport.onDataRange(begin, data.subarray(begin, end)); | ||||
|           const chunk = new Uint8Array(data.subarray(begin, end)); | ||||
|           subArrays.push(chunk); | ||||
| 
 | ||||
|           transport.onDataProgress(initialDataLength); | ||||
|           transport.onDataRange(begin, chunk); | ||||
|         }); | ||||
|       }; | ||||
| 
 | ||||
| @ -3296,65 +3273,40 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) | ||||
|       expect(pdfPage.rotate).toEqual(0); | ||||
|       expect(fetches).toBeGreaterThan(2); | ||||
| 
 | ||||
|       // Check that the TypedArray wasn't transferred.
 | ||||
|       expect(initialData.length).toEqual(initialDataLength); | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| 
 | ||||
|     it("should fetch document info and page using ranges, with `transferPdfData` set", async function () { | ||||
|       if (isNodeJS) { | ||||
|         pending("Worker is not supported in Node.js."); | ||||
|       if (!isNodeJS) { | ||||
|         // Check that the TypedArrays were transferred.
 | ||||
|         for (const array of subArrays) { | ||||
|           expect(array.length).toEqual(0); | ||||
|         } | ||||
|       } | ||||
|       const initialDataLength = 4000; | ||||
|       let fetches = 0; | ||||
| 
 | ||||
|       const data = await dataPromise; | ||||
|       const initialData = new Uint8Array(data.subarray(0, initialDataLength)); | ||||
|       const transport = new PDFDataRangeTransport(data.length, initialData); | ||||
|       transport.requestDataRange = function (begin, end) { | ||||
|         fetches++; | ||||
|         waitSome(function () { | ||||
|           transport.onDataProgress(4000); | ||||
|           transport.onDataRange( | ||||
|             begin, | ||||
|             new Uint8Array(data.subarray(begin, end)) | ||||
|           ); | ||||
|         }); | ||||
|       }; | ||||
| 
 | ||||
|       const loadingTask = getDocument({ | ||||
|         range: transport, | ||||
|         transferPdfData: true, | ||||
|       }); | ||||
|       const pdfDocument = await loadingTask.promise; | ||||
|       expect(pdfDocument.numPages).toEqual(14); | ||||
| 
 | ||||
|       const pdfPage = await pdfDocument.getPage(10); | ||||
|       expect(pdfPage.rotate).toEqual(0); | ||||
|       expect(fetches).toBeGreaterThan(2); | ||||
| 
 | ||||
|       // Check that the TypedArray was transferred.
 | ||||
|       expect(initialData.length).toEqual(0); | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| 
 | ||||
|     it("should fetch document info and page using range and streaming", async function () { | ||||
|       const initialDataLength = 4000; | ||||
|       const subArrays = []; | ||||
|       let fetches = 0; | ||||
| 
 | ||||
|       const data = await dataPromise; | ||||
|       const initialData = data.subarray(0, initialDataLength); | ||||
|       const initialData = new Uint8Array(data.subarray(0, initialDataLength)); | ||||
|       subArrays.push(initialData); | ||||
| 
 | ||||
|       const transport = new PDFDataRangeTransport(data.length, initialData); | ||||
|       transport.requestDataRange = function (begin, end) { | ||||
|         fetches++; | ||||
|         if (fetches === 1) { | ||||
|           const chunk = new Uint8Array(data.subarray(initialDataLength)); | ||||
|           subArrays.push(chunk); | ||||
| 
 | ||||
|           // Send rest of the data on first range request.
 | ||||
|           transport.onDataProgressiveRead(data.subarray(initialDataLength)); | ||||
|           transport.onDataProgressiveRead(chunk); | ||||
|         } | ||||
|         waitSome(function () { | ||||
|           transport.onDataRange(begin, data.subarray(begin, end)); | ||||
|           const chunk = new Uint8Array(data.subarray(begin, end)); | ||||
|           subArrays.push(chunk); | ||||
| 
 | ||||
|           transport.onDataRange(begin, chunk); | ||||
|         }); | ||||
|       }; | ||||
| 
 | ||||
| @ -3369,6 +3321,14 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) | ||||
|       await new Promise(resolve => { | ||||
|         waitSome(resolve); | ||||
|       }); | ||||
| 
 | ||||
|       if (!isNodeJS) { | ||||
|         // Check that the TypedArrays were transferred.
 | ||||
|         for (const array of subArrays) { | ||||
|           expect(array.length).toEqual(0); | ||||
|         } | ||||
|       } | ||||
| 
 | ||||
|       await loadingTask.destroy(); | ||||
|     }); | ||||
| 
 | ||||
| @ -3376,12 +3336,16 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) | ||||
|       "should fetch document info and page, without range, " + | ||||
|         "using complete initialData", | ||||
|       async function () { | ||||
|         const subArrays = []; | ||||
|         let fetches = 0; | ||||
| 
 | ||||
|         const data = await dataPromise; | ||||
|         const initialData = new Uint8Array(data); | ||||
|         subArrays.push(initialData); | ||||
| 
 | ||||
|         const transport = new PDFDataRangeTransport( | ||||
|           data.length, | ||||
|           data, | ||||
|           initialData, | ||||
|           /* progressiveDone = */ true | ||||
|         ); | ||||
|         transport.requestDataRange = function (begin, end) { | ||||
| @ -3399,6 +3363,13 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) | ||||
|         expect(pdfPage.rotate).toEqual(0); | ||||
|         expect(fetches).toEqual(0); | ||||
| 
 | ||||
|         if (!isNodeJS) { | ||||
|           // Check that the TypedArrays were transferred.
 | ||||
|           for (const array of subArrays) { | ||||
|             expect(array.length).toEqual(0); | ||||
|           } | ||||
|         } | ||||
| 
 | ||||
|         await loadingTask.destroy(); | ||||
|       } | ||||
|     ); | ||||
|  | ||||
| @ -270,11 +270,6 @@ const defaultOptions = { | ||||
|         : "../web/standard_fonts/", | ||||
|     kind: OptionKind.API, | ||||
|   }, | ||||
|   transferPdfData: { | ||||
|     /** @type {boolean} */ | ||||
|     value: typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"), | ||||
|     kind: OptionKind.API, | ||||
|   }, | ||||
|   verbosity: { | ||||
|     /** @type {number} */ | ||||
|     value: 1, | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user