Merge pull request #15920 from Snuffleupagus/transfer-pdf-data
[api-minor] Enable transferring of TypedArray PDF data by default (PR 15908 follow-up)
This commit is contained in:
commit
8f3fa18c93
@ -139,8 +139,12 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
|
||||
* @typedef {Object} DocumentInitParameters
|
||||
* @property {string | URL} [url] - The URL of the PDF.
|
||||
* @property {BinaryData} [data] - Binary PDF data.
|
||||
* Use typed arrays (Uint8Array) to improve the memory usage. If PDF data is
|
||||
* Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is
|
||||
* BASE64-encoded, use `atob()` to convert it to a binary string first.
|
||||
*
|
||||
* NOTE: If TypedArrays are used they will generally be transferred to the
|
||||
* worker-thread. This will help reduce main-thread memory usage, however
|
||||
* it will take ownership of the TypedArrays.
|
||||
* @property {Object} [httpHeaders] - Basic authentication headers.
|
||||
* @property {boolean} [withCredentials] - Indicates whether or not
|
||||
* cross-site Access-Control requests should be made using credentials such
|
||||
@ -189,12 +193,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
|
||||
* @property {number} [maxImageSize] - The maximum allowed image size in total
|
||||
* pixels, i.e. width * height. Images above this value will not be rendered.
|
||||
* Use -1 for no limit, which is also the default value.
|
||||
* @property {boolean} [transferPdfData] - Determines if we can transfer
|
||||
* TypedArrays used for loading the PDF file, utilized together with:
|
||||
* - The `data`-option, for the `getDocument` function.
|
||||
* - The `PDFDataTransportStream` implementation.
|
||||
* This will help reduce main-thread memory usage, however it will take
|
||||
* ownership of the TypedArrays. The default value is `false`.
|
||||
* @property {boolean} [isEvalSupported] - Determines if we can evaluate strings
|
||||
* as JavaScript. Primarily used to improve performance of font rendering, and
|
||||
* when parsing PDF functions. The default value is `true`.
|
||||
@ -281,20 +279,20 @@ function getDocument(src) {
|
||||
worker = null;
|
||||
|
||||
for (const key in source) {
|
||||
const value = source[key];
|
||||
const val = source[key];
|
||||
|
||||
switch (key) {
|
||||
case "url":
|
||||
if (typeof window !== "undefined") {
|
||||
try {
|
||||
// The full path is required in the 'url' field.
|
||||
params[key] = new URL(value, window.location).href;
|
||||
params[key] = new URL(val, window.location).href;
|
||||
continue;
|
||||
} catch (ex) {
|
||||
warn(`Cannot create valid URL: "${ex}".`);
|
||||
}
|
||||
} else if (typeof value === "string" || value instanceof URL) {
|
||||
params[key] = value.toString(); // Support Node.js environments.
|
||||
} else if (typeof val === "string" || val instanceof URL) {
|
||||
params[key] = val.toString(); // Support Node.js environments.
|
||||
continue;
|
||||
}
|
||||
throw new Error(
|
||||
@ -302,10 +300,10 @@ function getDocument(src) {
|
||||
"either string or URL-object is expected in the url property."
|
||||
);
|
||||
case "range":
|
||||
rangeTransport = value;
|
||||
rangeTransport = val;
|
||||
continue;
|
||||
case "worker":
|
||||
worker = value;
|
||||
worker = val;
|
||||
continue;
|
||||
case "data":
|
||||
// Converting string or array-like data to Uint8Array.
|
||||
@ -314,21 +312,24 @@ function getDocument(src) {
|
||||
PDFJSDev.test("GENERIC") &&
|
||||
isNodeJS &&
|
||||
typeof Buffer !== "undefined" && // eslint-disable-line no-undef
|
||||
value instanceof Buffer // eslint-disable-line no-undef
|
||||
val instanceof Buffer // eslint-disable-line no-undef
|
||||
) {
|
||||
params[key] = new Uint8Array(value);
|
||||
} else if (value instanceof Uint8Array) {
|
||||
break; // Use the data as-is when it's already a Uint8Array.
|
||||
} else if (typeof value === "string") {
|
||||
params[key] = stringToBytes(value);
|
||||
params[key] = new Uint8Array(val);
|
||||
} else if (
|
||||
typeof value === "object" &&
|
||||
value !== null &&
|
||||
!isNaN(value.length)
|
||||
val instanceof Uint8Array &&
|
||||
val.byteLength === val.buffer.byteLength
|
||||
) {
|
||||
params[key] = new Uint8Array(value);
|
||||
} else if (isArrayBuffer(value)) {
|
||||
params[key] = new Uint8Array(value);
|
||||
// Use the data as-is when it's already a Uint8Array that completely
|
||||
// "utilizes" its underlying ArrayBuffer, to prevent any possible
|
||||
// issues when transferring it to the worker-thread.
|
||||
break;
|
||||
} else if (typeof val === "string") {
|
||||
params[key] = stringToBytes(val);
|
||||
} else if (
|
||||
(typeof val === "object" && val !== null && !isNaN(val.length)) ||
|
||||
isArrayBuffer(val)
|
||||
) {
|
||||
params[key] = new Uint8Array(val);
|
||||
} else {
|
||||
throw new Error(
|
||||
"Invalid PDF binary data: either TypedArray, " +
|
||||
@ -337,7 +338,7 @@ function getDocument(src) {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
params[key] = value;
|
||||
params[key] = val;
|
||||
}
|
||||
|
||||
params.CMapReaderFactory =
|
||||
@ -345,7 +346,6 @@ function getDocument(src) {
|
||||
params.StandardFontDataFactory =
|
||||
params.StandardFontDataFactory || DefaultStandardFontDataFactory;
|
||||
params.ignoreErrors = params.stopAtErrors !== true;
|
||||
params.transferPdfData = params.transferPdfData === true;
|
||||
params.fontExtraProperties = params.fontExtraProperties === true;
|
||||
params.pdfBug = params.pdfBug === true;
|
||||
params.enableXfa = params.enableXfa === true;
|
||||
@ -443,7 +443,6 @@ function getDocument(src) {
|
||||
{
|
||||
length: params.length,
|
||||
initialData: params.initialData,
|
||||
transferPdfData: params.transferPdfData,
|
||||
progressiveDone: params.progressiveDone,
|
||||
contentDispositionFilename: params.contentDispositionFilename,
|
||||
disableRange: params.disableRange,
|
||||
@ -518,8 +517,7 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
|
||||
source.contentDispositionFilename =
|
||||
pdfDataRangeTransport.contentDispositionFilename;
|
||||
}
|
||||
const transfers =
|
||||
source.transferPdfData && source.data ? [source.data.buffer] : null;
|
||||
const transfers = source.data ? [source.data.buffer] : null;
|
||||
|
||||
const workerId = await worker.messageHandler.sendWithPromise(
|
||||
"GetDocRequest",
|
||||
@ -659,6 +657,10 @@ class PDFDocumentLoadingTask {
|
||||
|
||||
/**
|
||||
* Abstract class to support range requests file loading.
|
||||
*
|
||||
* NOTE: The TypedArrays passed to the constructor and relevant methods below
|
||||
* will generally be transferred to the worker-thread. This will help reduce
|
||||
* main-thread memory usage, however it will take ownership of the TypedArrays.
|
||||
*/
|
||||
class PDFDataRangeTransport {
|
||||
/**
|
||||
|
@ -18,13 +18,10 @@ import { isPdfFile } from "./display_utils.js";
|
||||
|
||||
/** @implements {IPDFStream} */
|
||||
class PDFDataTransportStream {
|
||||
#transferPdfData = false;
|
||||
|
||||
constructor(
|
||||
{
|
||||
length,
|
||||
initialData,
|
||||
transferPdfData = false,
|
||||
progressiveDone = false,
|
||||
contentDispositionFilename = null,
|
||||
disableRange = false,
|
||||
@ -38,14 +35,17 @@ class PDFDataTransportStream {
|
||||
);
|
||||
|
||||
this._queuedChunks = [];
|
||||
this.#transferPdfData = transferPdfData;
|
||||
this._progressiveDone = progressiveDone;
|
||||
this._contentDispositionFilename = contentDispositionFilename;
|
||||
|
||||
if (initialData?.length > 0) {
|
||||
const buffer = this.#transferPdfData
|
||||
? initialData.buffer
|
||||
: new Uint8Array(initialData).buffer;
|
||||
// Prevent any possible issues by only transferring a Uint8Array that
|
||||
// completely "utilizes" its underlying ArrayBuffer.
|
||||
const buffer =
|
||||
initialData instanceof Uint8Array &&
|
||||
initialData.byteLength === initialData.buffer.byteLength
|
||||
? initialData.buffer
|
||||
: new Uint8Array(initialData).buffer;
|
||||
this._queuedChunks.push(buffer);
|
||||
}
|
||||
|
||||
@ -77,8 +77,11 @@ class PDFDataTransportStream {
|
||||
}
|
||||
|
||||
_onReceiveData({ begin, chunk }) {
|
||||
// Prevent any possible issues by only transferring a Uint8Array that
|
||||
// completely "utilizes" its underlying ArrayBuffer.
|
||||
const buffer =
|
||||
this.#transferPdfData && chunk?.length >= 0
|
||||
chunk instanceof Uint8Array &&
|
||||
chunk.byteLength === chunk.buffer.byteLength
|
||||
? chunk.buffer
|
||||
: new Uint8Array(chunk).buffer;
|
||||
|
||||
|
@ -193,44 +193,10 @@ describe("api", function () {
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
// Check that the TypedArray wasn't transferred.
|
||||
expect(typedArrayPdf.length).toEqual(basicApiFileLength);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("creates pdf doc from TypedArray, with `transferPdfData` set", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Worker is not supported in Node.js.");
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(typedArrayPdf.length).toEqual(0);
|
||||
}
|
||||
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + basicApiFileName,
|
||||
});
|
||||
|
||||
// Sanity check to make sure that we fetched the entire PDF file.
|
||||
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
|
||||
expect(typedArrayPdf.length).toEqual(basicApiFileLength);
|
||||
|
||||
const loadingTask = getDocument({
|
||||
data: typedArrayPdf,
|
||||
transferPdfData: true,
|
||||
});
|
||||
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
|
||||
const progressReportedCapability = createPromiseCapability();
|
||||
loadingTask.onProgress = function (data) {
|
||||
progressReportedCapability.resolve(data);
|
||||
};
|
||||
|
||||
const data = await Promise.all([
|
||||
loadingTask.promise,
|
||||
progressReportedCapability.promise,
|
||||
]);
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(typedArrayPdf.length).toEqual(0);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
@ -259,6 +225,11 @@ describe("api", function () {
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the ArrayBuffer was transferred.
|
||||
expect(arrayBufferPdf.byteLength).toEqual(0);
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
@ -3275,16 +3246,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
|
||||
it("should fetch document info and page using ranges", async function () {
|
||||
const initialDataLength = 4000;
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = data.subarray(0, initialDataLength);
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
waitSome(function () {
|
||||
transport.onDataProgress(4000);
|
||||
transport.onDataRange(begin, data.subarray(begin, end));
|
||||
const chunk = new Uint8Array(data.subarray(begin, end));
|
||||
subArrays.push(chunk);
|
||||
|
||||
transport.onDataProgress(initialDataLength);
|
||||
transport.onDataRange(begin, chunk);
|
||||
});
|
||||
};
|
||||
|
||||
@ -3296,65 +3273,40 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toBeGreaterThan(2);
|
||||
|
||||
// Check that the TypedArray wasn't transferred.
|
||||
expect(initialData.length).toEqual(initialDataLength);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should fetch document info and page using ranges, with `transferPdfData` set", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Worker is not supported in Node.js.");
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
const initialDataLength = 4000;
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
waitSome(function () {
|
||||
transport.onDataProgress(4000);
|
||||
transport.onDataRange(
|
||||
begin,
|
||||
new Uint8Array(data.subarray(begin, end))
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
const loadingTask = getDocument({
|
||||
range: transport,
|
||||
transferPdfData: true,
|
||||
});
|
||||
const pdfDocument = await loadingTask.promise;
|
||||
expect(pdfDocument.numPages).toEqual(14);
|
||||
|
||||
const pdfPage = await pdfDocument.getPage(10);
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toBeGreaterThan(2);
|
||||
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(initialData.length).toEqual(0);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should fetch document info and page using range and streaming", async function () {
|
||||
const initialDataLength = 4000;
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = data.subarray(0, initialDataLength);
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
if (fetches === 1) {
|
||||
const chunk = new Uint8Array(data.subarray(initialDataLength));
|
||||
subArrays.push(chunk);
|
||||
|
||||
// Send rest of the data on first range request.
|
||||
transport.onDataProgressiveRead(data.subarray(initialDataLength));
|
||||
transport.onDataProgressiveRead(chunk);
|
||||
}
|
||||
waitSome(function () {
|
||||
transport.onDataRange(begin, data.subarray(begin, end));
|
||||
const chunk = new Uint8Array(data.subarray(begin, end));
|
||||
subArrays.push(chunk);
|
||||
|
||||
transport.onDataRange(begin, chunk);
|
||||
});
|
||||
};
|
||||
|
||||
@ -3369,6 +3321,14 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await new Promise(resolve => {
|
||||
waitSome(resolve);
|
||||
});
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
@ -3376,12 +3336,16 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
"should fetch document info and page, without range, " +
|
||||
"using complete initialData",
|
||||
async function () {
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = new Uint8Array(data);
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(
|
||||
data.length,
|
||||
data,
|
||||
initialData,
|
||||
/* progressiveDone = */ true
|
||||
);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
@ -3399,6 +3363,13 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toEqual(0);
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
}
|
||||
);
|
||||
|
@ -270,11 +270,6 @@ const defaultOptions = {
|
||||
: "../web/standard_fonts/",
|
||||
kind: OptionKind.API,
|
||||
},
|
||||
transferPdfData: {
|
||||
/** @type {boolean} */
|
||||
value: typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"),
|
||||
kind: OptionKind.API,
|
||||
},
|
||||
verbosity: {
|
||||
/** @type {number} */
|
||||
value: 1,
|
||||
|
Loading…
Reference in New Issue
Block a user