[api-minor] Enable transferring of TypedArray PDF data by default (PR 15908 follow-up)
This patch removes the recently introduced `transferPdfData` API-option, and simply enables transferring of TypedArray data *by default* instead of copying it. This will help reduce main-thread memory usage, however it will take ownership of the TypedArrays. Currently this only applies to the following cases: - TypedArrays passed to the `getDocument`-function in the API, in order to open PDF documents from binary data. - TypedArrays passed to a `PDFDataRangeTransport`-instance, used to support custom PDF document fetching/loading (see e.g. the Firefox PDF Viewer). *PLEASE NOTE:* To avoid being affected by this, please simply *copy* any TypedArray data before passing it to either of the functions/methods mentioned above. Now that we transfer TypedArray data that we previously only copied, we need to be more careful with input validation. Given how the `{IPDFStreamReader, IPDFStreamRangeReader}.read` methods will always return ArrayBuffer data, which is then transferred to the worker-thread[1], the actual TypedArray data passed to the API thus need to have the same exact size as its underlying ArrayBuffer to prevent issues. Hence we'll check for this and only allow transferring of *safe* TypedArray data, and fallback to simply copying the data just as before. This obviously shouldn't be an issue in the Firefox PDF Viewer, but for the general PDF.js library we need to be more careful here. --- [1] Seee09ad99973/src/display/api.js (L2492-L2506)
respectivelye09ad99973/src/display/api.js (L2578-L2590)
This commit is contained in:
parent
99cfab18c1
commit
397f943ca3
@ -139,8 +139,12 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
|
||||
* @typedef {Object} DocumentInitParameters
|
||||
* @property {string | URL} [url] - The URL of the PDF.
|
||||
* @property {BinaryData} [data] - Binary PDF data.
|
||||
* Use typed arrays (Uint8Array) to improve the memory usage. If PDF data is
|
||||
* Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is
|
||||
* BASE64-encoded, use `atob()` to convert it to a binary string first.
|
||||
*
|
||||
* NOTE: If TypedArrays are used they will generally be transferred to the
|
||||
* worker-thread. This will help reduce main-thread memory usage, however
|
||||
* it will take ownership of the TypedArrays.
|
||||
* @property {Object} [httpHeaders] - Basic authentication headers.
|
||||
* @property {boolean} [withCredentials] - Indicates whether or not
|
||||
* cross-site Access-Control requests should be made using credentials such
|
||||
@ -189,12 +193,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
|
||||
* @property {number} [maxImageSize] - The maximum allowed image size in total
|
||||
* pixels, i.e. width * height. Images above this value will not be rendered.
|
||||
* Use -1 for no limit, which is also the default value.
|
||||
* @property {boolean} [transferPdfData] - Determines if we can transfer
|
||||
* TypedArrays used for loading the PDF file, utilized together with:
|
||||
* - The `data`-option, for the `getDocument` function.
|
||||
* - The `PDFDataTransportStream` implementation.
|
||||
* This will help reduce main-thread memory usage, however it will take
|
||||
* ownership of the TypedArrays. The default value is `false`.
|
||||
* @property {boolean} [isEvalSupported] - Determines if we can evaluate strings
|
||||
* as JavaScript. Primarily used to improve performance of font rendering, and
|
||||
* when parsing PDF functions. The default value is `true`.
|
||||
@ -317,8 +315,14 @@ function getDocument(src) {
|
||||
val instanceof Buffer // eslint-disable-line no-undef
|
||||
) {
|
||||
params[key] = new Uint8Array(val);
|
||||
} else if (val instanceof Uint8Array) {
|
||||
break; // Use the data as-is when it's already a Uint8Array.
|
||||
} else if (
|
||||
val instanceof Uint8Array &&
|
||||
val.byteLength === val.buffer.byteLength
|
||||
) {
|
||||
// Use the data as-is when it's already a Uint8Array that completely
|
||||
// "utilizes" its underlying ArrayBuffer, to prevent any possible
|
||||
// issues when transferring it to the worker-thread.
|
||||
break;
|
||||
} else if (typeof val === "string") {
|
||||
params[key] = stringToBytes(val);
|
||||
} else if (
|
||||
@ -342,7 +346,6 @@ function getDocument(src) {
|
||||
params.StandardFontDataFactory =
|
||||
params.StandardFontDataFactory || DefaultStandardFontDataFactory;
|
||||
params.ignoreErrors = params.stopAtErrors !== true;
|
||||
params.transferPdfData = params.transferPdfData === true;
|
||||
params.fontExtraProperties = params.fontExtraProperties === true;
|
||||
params.pdfBug = params.pdfBug === true;
|
||||
params.enableXfa = params.enableXfa === true;
|
||||
@ -440,7 +443,6 @@ function getDocument(src) {
|
||||
{
|
||||
length: params.length,
|
||||
initialData: params.initialData,
|
||||
transferPdfData: params.transferPdfData,
|
||||
progressiveDone: params.progressiveDone,
|
||||
contentDispositionFilename: params.contentDispositionFilename,
|
||||
disableRange: params.disableRange,
|
||||
@ -515,8 +517,7 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
|
||||
source.contentDispositionFilename =
|
||||
pdfDataRangeTransport.contentDispositionFilename;
|
||||
}
|
||||
const transfers =
|
||||
source.transferPdfData && source.data ? [source.data.buffer] : null;
|
||||
const transfers = source.data ? [source.data.buffer] : null;
|
||||
|
||||
const workerId = await worker.messageHandler.sendWithPromise(
|
||||
"GetDocRequest",
|
||||
@ -656,6 +657,10 @@ class PDFDocumentLoadingTask {
|
||||
|
||||
/**
|
||||
* Abstract class to support range requests file loading.
|
||||
*
|
||||
* NOTE: The TypedArrays passed to the constructor and relevant methods below
|
||||
* will generally be transferred to the worker-thread. This will help reduce
|
||||
* main-thread memory usage, however it will take ownership of the TypedArrays.
|
||||
*/
|
||||
class PDFDataRangeTransport {
|
||||
/**
|
||||
|
@ -18,13 +18,10 @@ import { isPdfFile } from "./display_utils.js";
|
||||
|
||||
/** @implements {IPDFStream} */
|
||||
class PDFDataTransportStream {
|
||||
#transferPdfData = false;
|
||||
|
||||
constructor(
|
||||
{
|
||||
length,
|
||||
initialData,
|
||||
transferPdfData = false,
|
||||
progressiveDone = false,
|
||||
contentDispositionFilename = null,
|
||||
disableRange = false,
|
||||
@ -38,14 +35,17 @@ class PDFDataTransportStream {
|
||||
);
|
||||
|
||||
this._queuedChunks = [];
|
||||
this.#transferPdfData = transferPdfData;
|
||||
this._progressiveDone = progressiveDone;
|
||||
this._contentDispositionFilename = contentDispositionFilename;
|
||||
|
||||
if (initialData?.length > 0) {
|
||||
const buffer = this.#transferPdfData
|
||||
? initialData.buffer
|
||||
: new Uint8Array(initialData).buffer;
|
||||
// Prevent any possible issues by only transferring a Uint8Array that
|
||||
// completely "utilizes" its underlying ArrayBuffer.
|
||||
const buffer =
|
||||
initialData instanceof Uint8Array &&
|
||||
initialData.byteLength === initialData.buffer.byteLength
|
||||
? initialData.buffer
|
||||
: new Uint8Array(initialData).buffer;
|
||||
this._queuedChunks.push(buffer);
|
||||
}
|
||||
|
||||
@ -77,8 +77,11 @@ class PDFDataTransportStream {
|
||||
}
|
||||
|
||||
_onReceiveData({ begin, chunk }) {
|
||||
// Prevent any possible issues by only transferring a Uint8Array that
|
||||
// completely "utilizes" its underlying ArrayBuffer.
|
||||
const buffer =
|
||||
this.#transferPdfData && chunk?.length >= 0
|
||||
chunk instanceof Uint8Array &&
|
||||
chunk.byteLength === chunk.buffer.byteLength
|
||||
? chunk.buffer
|
||||
: new Uint8Array(chunk).buffer;
|
||||
|
||||
|
@ -193,44 +193,10 @@ describe("api", function () {
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
// Check that the TypedArray wasn't transferred.
|
||||
expect(typedArrayPdf.length).toEqual(basicApiFileLength);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("creates pdf doc from TypedArray, with `transferPdfData` set", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Worker is not supported in Node.js.");
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(typedArrayPdf.length).toEqual(0);
|
||||
}
|
||||
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
|
||||
path: TEST_PDFS_PATH + basicApiFileName,
|
||||
});
|
||||
|
||||
// Sanity check to make sure that we fetched the entire PDF file.
|
||||
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
|
||||
expect(typedArrayPdf.length).toEqual(basicApiFileLength);
|
||||
|
||||
const loadingTask = getDocument({
|
||||
data: typedArrayPdf,
|
||||
transferPdfData: true,
|
||||
});
|
||||
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
|
||||
const progressReportedCapability = createPromiseCapability();
|
||||
loadingTask.onProgress = function (data) {
|
||||
progressReportedCapability.resolve(data);
|
||||
};
|
||||
|
||||
const data = await Promise.all([
|
||||
loadingTask.promise,
|
||||
progressReportedCapability.promise,
|
||||
]);
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(typedArrayPdf.length).toEqual(0);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
@ -259,6 +225,11 @@ describe("api", function () {
|
||||
expect(data[0] instanceof PDFDocumentProxy).toEqual(true);
|
||||
expect(data[1].loaded / data[1].total).toEqual(1);
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the ArrayBuffer was transferred.
|
||||
expect(arrayBufferPdf.byteLength).toEqual(0);
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
@ -3275,16 +3246,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
|
||||
it("should fetch document info and page using ranges", async function () {
|
||||
const initialDataLength = 4000;
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = data.subarray(0, initialDataLength);
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
waitSome(function () {
|
||||
transport.onDataProgress(4000);
|
||||
transport.onDataRange(begin, data.subarray(begin, end));
|
||||
const chunk = new Uint8Array(data.subarray(begin, end));
|
||||
subArrays.push(chunk);
|
||||
|
||||
transport.onDataProgress(initialDataLength);
|
||||
transport.onDataRange(begin, chunk);
|
||||
});
|
||||
};
|
||||
|
||||
@ -3296,65 +3273,40 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toBeGreaterThan(2);
|
||||
|
||||
// Check that the TypedArray wasn't transferred.
|
||||
expect(initialData.length).toEqual(initialDataLength);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should fetch document info and page using ranges, with `transferPdfData` set", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Worker is not supported in Node.js.");
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
const initialDataLength = 4000;
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
waitSome(function () {
|
||||
transport.onDataProgress(4000);
|
||||
transport.onDataRange(
|
||||
begin,
|
||||
new Uint8Array(data.subarray(begin, end))
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
const loadingTask = getDocument({
|
||||
range: transport,
|
||||
transferPdfData: true,
|
||||
});
|
||||
const pdfDocument = await loadingTask.promise;
|
||||
expect(pdfDocument.numPages).toEqual(14);
|
||||
|
||||
const pdfPage = await pdfDocument.getPage(10);
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toBeGreaterThan(2);
|
||||
|
||||
// Check that the TypedArray was transferred.
|
||||
expect(initialData.length).toEqual(0);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("should fetch document info and page using range and streaming", async function () {
|
||||
const initialDataLength = 4000;
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = data.subarray(0, initialDataLength);
|
||||
const initialData = new Uint8Array(data.subarray(0, initialDataLength));
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(data.length, initialData);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
fetches++;
|
||||
if (fetches === 1) {
|
||||
const chunk = new Uint8Array(data.subarray(initialDataLength));
|
||||
subArrays.push(chunk);
|
||||
|
||||
// Send rest of the data on first range request.
|
||||
transport.onDataProgressiveRead(data.subarray(initialDataLength));
|
||||
transport.onDataProgressiveRead(chunk);
|
||||
}
|
||||
waitSome(function () {
|
||||
transport.onDataRange(begin, data.subarray(begin, end));
|
||||
const chunk = new Uint8Array(data.subarray(begin, end));
|
||||
subArrays.push(chunk);
|
||||
|
||||
transport.onDataRange(begin, chunk);
|
||||
});
|
||||
};
|
||||
|
||||
@ -3369,6 +3321,14 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await new Promise(resolve => {
|
||||
waitSome(resolve);
|
||||
});
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
@ -3376,12 +3336,16 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
"should fetch document info and page, without range, " +
|
||||
"using complete initialData",
|
||||
async function () {
|
||||
const subArrays = [];
|
||||
let fetches = 0;
|
||||
|
||||
const data = await dataPromise;
|
||||
const initialData = new Uint8Array(data);
|
||||
subArrays.push(initialData);
|
||||
|
||||
const transport = new PDFDataRangeTransport(
|
||||
data.length,
|
||||
data,
|
||||
initialData,
|
||||
/* progressiveDone = */ true
|
||||
);
|
||||
transport.requestDataRange = function (begin, end) {
|
||||
@ -3399,6 +3363,13 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
expect(pdfPage.rotate).toEqual(0);
|
||||
expect(fetches).toEqual(0);
|
||||
|
||||
if (!isNodeJS) {
|
||||
// Check that the TypedArrays were transferred.
|
||||
for (const array of subArrays) {
|
||||
expect(array.length).toEqual(0);
|
||||
}
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
}
|
||||
);
|
||||
|
@ -270,11 +270,6 @@ const defaultOptions = {
|
||||
: "../web/standard_fonts/",
|
||||
kind: OptionKind.API,
|
||||
},
|
||||
transferPdfData: {
|
||||
/** @type {boolean} */
|
||||
value: typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"),
|
||||
kind: OptionKind.API,
|
||||
},
|
||||
verbosity: {
|
||||
/** @type {number} */
|
||||
value: 1,
|
||||
|
Loading…
Reference in New Issue
Block a user