/* Copyright 2012 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @module pdfjsLib */ import { AbortException, AnnotationMode, assert, createPromiseCapability, getVerbosityLevel, info, InvalidPDFException, isArrayBuffer, MissingPDFException, PasswordException, RenderingIntentFlag, setVerbosityLevel, shadow, stringToBytes, UnexpectedResponseException, UnknownErrorException, unreachable, warn, } from "../shared/util.js"; import { AnnotationStorage, PrintAnnotationStorage, } from "./annotation_storage.js"; import { deprecated, DOMCanvasFactory, DOMCMapReaderFactory, DOMStandardFontDataFactory, isDataScheme, loadScript, PageViewport, RenderingCancelledException, StatTimer, } from "./display_utils.js"; import { FontFaceObject, FontLoader } from "./font_loader.js"; import { CanvasGraphics } from "./canvas.js"; import { GlobalWorkerOptions } from "./worker_options.js"; import { isNodeJS } from "../shared/is_node.js"; import { MessageHandler } from "../shared/message_handler.js"; import { Metadata } from "./metadata.js"; import { OptionalContentConfig } from "./optional_content_config.js"; import { PDFDataTransportStream } from "./transport_stream.js"; import { XfaText } from "./xfa_text.js"; const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536 const RENDERING_CANCELLED_TIMEOUT = 100; // ms let DefaultCanvasFactory = DOMCanvasFactory; let DefaultCMapReaderFactory = DOMCMapReaderFactory; let DefaultStandardFontDataFactory = DOMStandardFontDataFactory; if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS) { const { NodeCanvasFactory, NodeCMapReaderFactory, NodeStandardFontDataFactory, } = require("./node_utils.js"); DefaultCanvasFactory = NodeCanvasFactory; DefaultCMapReaderFactory = NodeCMapReaderFactory; DefaultStandardFontDataFactory = NodeStandardFontDataFactory; } /** * @typedef {function} IPDFStreamFactory * @param {DocumentInitParameters} params - The document initialization * parameters. The "url" key is always present. * @returns {Promise} A promise, which is resolved with an instance of * {IPDFStream}. * @ignore */ /** * @type {IPDFStreamFactory} * @private */ let createPDFNetworkStream; /** * Sets the function that instantiates an {IPDFStream} as an alternative PDF * data transport. * * @param {IPDFStreamFactory} pdfNetworkStreamFactory - The factory function * that takes document initialization parameters (including a "url") and * returns a promise which is resolved with an instance of {IPDFStream}. * @ignore */ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) { createPDFNetworkStream = pdfNetworkStreamFactory; } /** * @typedef { Int8Array | Uint8Array | Uint8ClampedArray | * Int16Array | Uint16Array | * Int32Array | Uint32Array | Float32Array | * Float64Array * } TypedArray */ /** * @typedef {Object} RefProxy * @property {number} num * @property {number} gen */ /** * Document initialization / loading parameters object. * * @typedef {Object} DocumentInitParameters * @property {string|URL} [url] - The URL of the PDF. * @property {TypedArray|Array|string} [data] - Binary PDF data. Use * typed arrays (Uint8Array) to improve the memory usage. If PDF data is * BASE64-encoded, use `atob()` to convert it to a binary string first. * @property {Object} [httpHeaders] - Basic authentication headers. * @property {boolean} [withCredentials] - Indicates whether or not * cross-site Access-Control requests should be made using credentials such * as cookies or authorization headers. The default is `false`. * @property {string} [password] - For decrypting password-protected PDFs. * @property {TypedArray} [initialData] - A typed array with the first portion * or all of the pdf data. Used by the extension since some data is already * loaded before the switch to range requests. * @property {number} [length] - The PDF file length. It's used for progress * reports and range requests operations. * @property {PDFDataRangeTransport} [range] - Allows for using a custom range * transport implementation. * @property {number} [rangeChunkSize] - Specify maximum number of bytes fetched * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE}. * @property {PDFWorker} [worker] - The worker that will be used for loading and * parsing the PDF data. * @property {number} [verbosity] - Controls the logging level; the constants * from {@link VerbosityLevel} should be used. * @property {string} [docBaseUrl] - The base URL of the document, used when * attempting to recover valid absolute URLs for annotations, and outline * items, that (incorrectly) only specify relative URLs. * @property {string} [cMapUrl] - The URL where the predefined Adobe CMaps are * located. Include the trailing slash. * @property {boolean} [cMapPacked] - Specifies if the Adobe CMaps are binary * packed or not. * @property {Object} [CMapReaderFactory] - The factory that will be used when * reading built-in CMap files. Providing a custom factory is useful for * environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMCMapReaderFactory}. * @property {boolean} [useSystemFonts] - When `true`, fonts that aren't * embedded in the PDF document will fallback to a system font. * The default value is `true` in web environments and `false` in Node.js; * unless `disableFontFace === true` in which case this defaults to `false` * regardless of the environment (to prevent completely broken fonts). * @property {string} [standardFontDataUrl] - The URL where the standard font * files are located. Include the trailing slash. * @property {Object} [StandardFontDataFactory] - The factory that will be used * when reading the standard font files. Providing a custom factory is useful * for environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMStandardFontDataFactory}. * @property {boolean} [useWorkerFetch] - Enable using the Fetch API in the * worker-thread when reading CMap and standard font files. When `true`, * the `CMapReaderFactory` and `StandardFontDataFactory` options are ignored. * The default value is `true` in web environments and `false` in Node.js. * @property {boolean} [stopAtErrors] - Reject certain promises, e.g. * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated * PDF data cannot be successfully parsed, instead of attempting to recover * whatever possible of the data. The default value is `false`. * @property {number} [maxImageSize] - The maximum allowed image size in total * pixels, i.e. width * height. Images above this value will not be rendered. * Use -1 for no limit, which is also the default value. * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings * as JavaScript. Primarily used to improve performance of font rendering, and * when parsing PDF functions. The default value is `true`. * @property {boolean} [disableFontFace] - By default fonts are converted to * OpenType fonts and loaded via the Font Loading API or `@font-face` rules. * If disabled, fonts will be rendered using a built-in font renderer that * constructs the glyphs with primitive path commands. * The default value is `false` in web environments and `true` in Node.js. * @property {boolean} [fontExtraProperties] - Include additional properties, * which are unused during rendering of PDF documents, when exporting the * parsed font data from the worker-thread. This may be useful for debugging * purposes (and backwards compatibility), but note that it will lead to * increased memory usage. The default value is `false`. * @property {boolean} [enableXfa] - Render Xfa forms if any. * The default value is `false`. * @property {HTMLDocument} [ownerDocument] - Specify an explicit document * context to create elements with and to load resources, such as fonts, * into. Defaults to the current document. * @property {boolean} [disableRange] - Disable range request loading of PDF * files. When enabled, and if the server supports partial content requests, * then the PDF will be fetched in chunks. The default value is `false`. * @property {boolean} [disableStream] - Disable streaming of PDF file data. * By default PDF.js attempts to load PDF files in chunks. The default value * is `false`. * @property {boolean} [disableAutoFetch] - Disable pre-fetching of PDF file * data. When range requests are enabled PDF.js will automatically keep * fetching more data even if it isn't needed to display the current page. * The default value is `false`. * * NOTE: It is also necessary to disable streaming, see above, in order for * disabling of pre-fetching to work correctly. * @property {boolean} [pdfBug] - Enables special hooks for debugging PDF.js * (see `web/debugger.js`). The default value is `false`. */ /** * This is the main entry point for loading a PDF and interacting with it. * * NOTE: If a URL is used to fetch the PDF data a standard Fetch API call (or * XHR as fallback) is used, which means it must follow same origin rules, * e.g. no cross-domain requests without CORS. * * @param {string|URL|TypedArray|PDFDataRangeTransport|DocumentInitParameters} * src - Can be a URL where a PDF file is located, a typed array (Uint8Array) * already populated with data, or a parameter object. * @returns {PDFDocumentLoadingTask} */ function getDocument(src) { const task = new PDFDocumentLoadingTask(); let source; if (typeof src === "string" || src instanceof URL) { source = { url: src }; } else if (isArrayBuffer(src)) { source = { data: src }; } else if (src instanceof PDFDataRangeTransport) { source = { range: src }; } else { if (typeof src !== "object") { throw new Error( "Invalid parameter in getDocument, " + "need either string, URL, Uint8Array, or parameter object." ); } if (!src.url && !src.data && !src.range) { throw new Error( "Invalid parameter object: need either .data, .range or .url" ); } source = src; } const params = Object.create(null); let rangeTransport = null, worker = null; for (const key in source) { const value = source[key]; switch (key) { case "url": if (typeof window !== "undefined") { try { // The full path is required in the 'url' field. params[key] = new URL(value, window.location).href; continue; } catch (ex) { warn(`Cannot create valid URL: "${ex}".`); } } else if (typeof value === "string" || value instanceof URL) { params[key] = value.toString(); // Support Node.js environments. continue; } throw new Error( "Invalid PDF url data: " + "either string or URL-object is expected in the url property." ); case "range": rangeTransport = value; continue; case "worker": worker = value; continue; case "data": // Converting string or array-like data to Uint8Array. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS && typeof Buffer !== "undefined" && // eslint-disable-line no-undef value instanceof Buffer // eslint-disable-line no-undef ) { params[key] = new Uint8Array(value); } else if (value instanceof Uint8Array) { break; // Use the data as-is when it's already a Uint8Array. } else if (typeof value === "string") { params[key] = stringToBytes(value); } else if ( typeof value === "object" && value !== null && !isNaN(value.length) ) { params[key] = new Uint8Array(value); } else if (isArrayBuffer(value)) { params[key] = new Uint8Array(value); } else { throw new Error( "Invalid PDF binary data: either typed array, " + "string, or array-like object is expected in the data property." ); } continue; } params[key] = value; } params.CMapReaderFactory = params.CMapReaderFactory || DefaultCMapReaderFactory; params.StandardFontDataFactory = params.StandardFontDataFactory || DefaultStandardFontDataFactory; params.ignoreErrors = params.stopAtErrors !== true; params.fontExtraProperties = params.fontExtraProperties === true; params.pdfBug = params.pdfBug === true; params.enableXfa = params.enableXfa === true; if (!Number.isInteger(params.rangeChunkSize) || params.rangeChunkSize < 1) { params.rangeChunkSize = DEFAULT_RANGE_CHUNK_SIZE; } if ( typeof params.docBaseUrl !== "string" || isDataScheme(params.docBaseUrl) ) { // Ignore "data:"-URLs, since they can't be used to recover valid absolute // URLs anyway. We want to avoid sending them to the worker-thread, since // they contain the *entire* PDF document and can thus be arbitrarily long. params.docBaseUrl = null; } if (!Number.isInteger(params.maxImageSize) || params.maxImageSize < -1) { params.maxImageSize = -1; } if (typeof params.cMapUrl !== "string") { params.cMapUrl = null; } if (typeof params.standardFontDataUrl !== "string") { params.standardFontDataUrl = null; } if (typeof params.useWorkerFetch !== "boolean") { params.useWorkerFetch = params.CMapReaderFactory === DOMCMapReaderFactory && params.StandardFontDataFactory === DOMStandardFontDataFactory; } if (typeof params.isEvalSupported !== "boolean") { params.isEvalSupported = true; } if (typeof params.disableFontFace !== "boolean") { params.disableFontFace = isNodeJS; } if (typeof params.useSystemFonts !== "boolean") { params.useSystemFonts = !isNodeJS && !params.disableFontFace; } if ( typeof params.ownerDocument !== "object" || params.ownerDocument === null ) { params.ownerDocument = globalThis.document; } if (typeof params.disableRange !== "boolean") { params.disableRange = false; } if (typeof params.disableStream !== "boolean") { params.disableStream = false; } if (typeof params.disableAutoFetch !== "boolean") { params.disableAutoFetch = false; } // Set the main-thread verbosity level. setVerbosityLevel(params.verbosity); if (!worker) { const workerParams = { verbosity: params.verbosity, port: GlobalWorkerOptions.workerPort, }; // Worker was not provided -- creating and owning our own. If message port // is specified in global worker options, using it. worker = workerParams.port ? PDFWorker.fromPort(workerParams) : new PDFWorker(workerParams); task._worker = worker; } const docId = task.docId; worker.promise .then(function () { if (task.destroyed) { throw new Error("Loading aborted"); } const workerIdPromise = _fetchDocument( worker, params, rangeTransport, docId ); const networkStreamPromise = new Promise(function (resolve) { let networkStream; if (rangeTransport) { networkStream = new PDFDataTransportStream( { length: params.length, initialData: params.initialData, progressiveDone: params.progressiveDone, contentDispositionFilename: params.contentDispositionFilename, disableRange: params.disableRange, disableStream: params.disableStream, }, rangeTransport ); } else if (!params.data) { networkStream = createPDFNetworkStream({ url: params.url, length: params.length, httpHeaders: params.httpHeaders, withCredentials: params.withCredentials, rangeChunkSize: params.rangeChunkSize, disableRange: params.disableRange, disableStream: params.disableStream, }); } resolve(networkStream); }); return Promise.all([workerIdPromise, networkStreamPromise]).then( function ([workerId, networkStream]) { if (task.destroyed) { throw new Error("Loading aborted"); } const messageHandler = new MessageHandler( docId, workerId, worker.port ); const transport = new WorkerTransport( messageHandler, task, networkStream, params ); task._transport = transport; messageHandler.send("Ready", null); } ); }) .catch(task._capability.reject); return task; } /** * Starts fetching of specified PDF document/data. * * @param {PDFWorker} worker * @param {Object} source * @param {PDFDataRangeTransport} pdfDataRangeTransport * @param {string} docId - Unique document ID, used in `MessageHandler`. * @returns {Promise} A promise that is resolved when the worker ID of * the `MessageHandler` is known. * @private */ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { if (worker.destroyed) { throw new Error("Worker was destroyed"); } if (pdfDataRangeTransport) { source.length = pdfDataRangeTransport.length; source.initialData = pdfDataRangeTransport.initialData; source.progressiveDone = pdfDataRangeTransport.progressiveDone; source.contentDispositionFilename = pdfDataRangeTransport.contentDispositionFilename; } const workerId = await worker.messageHandler.sendWithPromise( "GetDocRequest", { docId, apiVersion: typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING") ? PDFJSDev.eval("BUNDLE_VERSION") : null, // Only send the required properties, and *not* the entire object. source: { data: source.data, url: source.url, password: source.password, disableAutoFetch: source.disableAutoFetch, rangeChunkSize: source.rangeChunkSize, length: source.length, }, maxImageSize: source.maxImageSize, disableFontFace: source.disableFontFace, docBaseUrl: source.docBaseUrl, ignoreErrors: source.ignoreErrors, isEvalSupported: source.isEvalSupported, fontExtraProperties: source.fontExtraProperties, enableXfa: source.enableXfa, useSystemFonts: source.useSystemFonts, cMapUrl: source.useWorkerFetch ? source.cMapUrl : null, standardFontDataUrl: source.useWorkerFetch ? source.standardFontDataUrl : null, } ); // Release the TypedArray data, when it exists, since it's no longer needed // on the main-thread *after* it's been sent to the worker-thread. if (source.data) { source.data = null; } if (worker.destroyed) { throw new Error("Worker was destroyed"); } return workerId; } /** * @typedef {Object} OnProgressParameters * @property {number} loaded - Currently loaded number of bytes. * @property {number} total - Total number of bytes in the PDF file. */ /** * The loading task controls the operations required to load a PDF document * (such as network requests) and provides a way to listen for completion, * after which individual pages can be rendered. */ class PDFDocumentLoadingTask { static #docId = 0; constructor() { this._capability = createPromiseCapability(); this._transport = null; this._worker = null; /** * Unique identifier for the document loading task. * @type {string} */ this.docId = `d${PDFDocumentLoadingTask.#docId++}`; /** * Whether the loading task is destroyed or not. * @type {boolean} */ this.destroyed = false; /** * Callback to request a password if a wrong or no password was provided. * The callback receives two parameters: a function that should be called * with the new password, and a reason (see {@link PasswordResponses}). * @type {function} */ this.onPassword = null; /** * Callback to be able to monitor the loading progress of the PDF file * (necessary to implement e.g. a loading bar). * The callback receives an {@link OnProgressParameters} argument. * @type {function} */ this.onProgress = null; /** * Callback for when an unsupported feature is used in the PDF document. * The callback receives an {@link UNSUPPORTED_FEATURES} argument. * @type {function} */ this.onUnsupportedFeature = null; } /** * Promise for document loading task completion. * @type {Promise} */ get promise() { return this._capability.promise; } /** * Abort all network requests and destroy the worker. * @returns {Promise} A promise that is resolved when destruction is * completed. */ async destroy() { this.destroyed = true; await this._transport?.destroy(); this._transport = null; if (this._worker) { this._worker.destroy(); this._worker = null; } } } /** * Abstract class to support range requests file loading. */ class PDFDataRangeTransport { /** * @param {number} length * @param {Uint8Array} initialData * @param {boolean} [progressiveDone] * @param {string} [contentDispositionFilename] */ constructor( length, initialData, progressiveDone = false, contentDispositionFilename = null ) { this.length = length; this.initialData = initialData; this.progressiveDone = progressiveDone; this.contentDispositionFilename = contentDispositionFilename; this._rangeListeners = []; this._progressListeners = []; this._progressiveReadListeners = []; this._progressiveDoneListeners = []; this._readyCapability = createPromiseCapability(); } addRangeListener(listener) { this._rangeListeners.push(listener); } addProgressListener(listener) { this._progressListeners.push(listener); } addProgressiveReadListener(listener) { this._progressiveReadListeners.push(listener); } addProgressiveDoneListener(listener) { this._progressiveDoneListeners.push(listener); } onDataRange(begin, chunk) { for (const listener of this._rangeListeners) { listener(begin, chunk); } } onDataProgress(loaded, total) { this._readyCapability.promise.then(() => { for (const listener of this._progressListeners) { listener(loaded, total); } }); } onDataProgressiveRead(chunk) { this._readyCapability.promise.then(() => { for (const listener of this._progressiveReadListeners) { listener(chunk); } }); } onDataProgressiveDone() { this._readyCapability.promise.then(() => { for (const listener of this._progressiveDoneListeners) { listener(); } }); } transportReady() { this._readyCapability.resolve(); } requestDataRange(begin, end) { unreachable("Abstract method PDFDataRangeTransport.requestDataRange"); } abort() {} } /** * Proxy to a `PDFDocument` in the worker thread. */ class PDFDocumentProxy { constructor(pdfInfo, transport) { this._pdfInfo = pdfInfo; this._transport = transport; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { Object.defineProperty(this, "fingerprint", { get() { deprecated( "`PDFDocumentProxy.fingerprint`, " + "please use `PDFDocumentProxy.fingerprints` instead." ); return this.fingerprints[0]; }, }); Object.defineProperty(this, "getStats", { value: async () => { deprecated( "`PDFDocumentProxy.getStats`, " + "please use the `PDFDocumentProxy.stats`-getter instead." ); return this.stats || { streamTypes: {}, fontTypes: {} }; }, }); } } /** * @type {AnnotationStorage} Storage for annotation data in forms. */ get annotationStorage() { return this._transport.annotationStorage; } /** * @type {number} Total number of pages in the PDF file. */ get numPages() { return this._pdfInfo.numPages; } /** * @type {Array} A (not guaranteed to be) unique ID to * identify the PDF document. * NOTE: The first element will always be defined for all PDF documents, * whereas the second element is only defined for *modified* PDF documents. */ get fingerprints() { return this._pdfInfo.fingerprints; } /** * @typedef {Object} PDFDocumentStats * @property {Object} streamTypes - Used stream types in the * document (an item is set to true if specific stream ID was used in the * document). * @property {Object} fontTypes - Used font types in the * document (an item is set to true if specific font ID was used in the * document). */ /** * @type {PDFDocumentStats | null} The current statistics about document * structures, or `null` when no statistics exists. */ get stats() { return this._transport.stats; } /** * @type {boolean} True if only XFA form. */ get isPureXfa() { return !!this._transport._htmlForXfa; } /** * NOTE: This is (mostly) intended to support printing of XFA forms. * * @type {Object | null} An object representing a HTML tree structure * to render the XFA, or `null` when no XFA form exists. */ get allXfaHtml() { return this._transport._htmlForXfa; } /** * @param {number} pageNumber - The page number to get. The first page is 1. * @returns {Promise} A promise that is resolved with * a {@link PDFPageProxy} object. */ getPage(pageNumber) { return this._transport.getPage(pageNumber); } /** * @param {RefProxy} ref - The page reference. * @returns {Promise} A promise that is resolved with the page index, * starting from zero, that is associated with the reference. */ getPageIndex(ref) { return this._transport.getPageIndex(ref); } /** * @returns {Promise>>} A promise that is resolved * with a mapping from named destinations to references. * * This can be slow for large documents. Use `getDestination` instead. */ getDestinations() { return this._transport.getDestinations(); } /** * @param {string} id - The named destination to get. * @returns {Promise | null>} A promise that is resolved with all * information of the given named destination, or `null` when the named * destination is not present in the PDF file. */ getDestination(id) { return this._transport.getDestination(id); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} containing the page labels that correspond to the page * indexes, or `null` when no page labels are present in the PDF file. */ getPageLabels() { return this._transport.getPageLabels(); } /** * @returns {Promise} A promise that is resolved with a {string} * containing the page layout name. */ getPageLayout() { return this._transport.getPageLayout(); } /** * @returns {Promise} A promise that is resolved with a {string} * containing the page mode name. */ getPageMode() { return this._transport.getPageMode(); } /** * @returns {Promise} A promise that is resolved with an * {Object} containing the viewer preferences, or `null` when no viewer * preferences are present in the PDF file. */ getViewerPreferences() { return this._transport.getViewerPreferences(); } /** * @returns {Promise} A promise that is resolved with an {Array} * containing the destination, or `null` when no open action is present * in the PDF. */ getOpenAction() { return this._transport.getOpenAction(); } /** * @returns {Promise} A promise that is resolved with a lookup table * for mapping named attachments to their content. */ getAttachments() { return this._transport.getAttachments(); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} of all the JavaScript strings in the name tree, or `null` * if no JavaScript exists. */ getJavaScript() { return this._transport.getJavaScript(); } /** * @returns {Promise} A promise that is resolved with * an {Object} with the JavaScript actions: * - from the name tree (like getJavaScript); * - from A or AA entries in the catalog dictionary. * , or `null` if no JavaScript exists. */ getJSActions() { return this._transport.getDocJSActions(); } /** * @typedef {Object} OutlineNode * @property {string} title * @property {boolean} bold * @property {boolean} italic * @property {Uint8ClampedArray} color - The color in RGB format to use for * display purposes. * @property {string | Array | null} dest * @property {string | null} url * @property {string | undefined} unsafeUrl * @property {boolean | undefined} newWindow * @property {number | undefined} count * @property {Array} items */ /** * @returns {Promise>} A promise that is resolved with an * {Array} that is a tree outline (if it has one) of the PDF file. */ getOutline() { return this._transport.getOutline(); } /** * @returns {Promise} A promise that is resolved with * an {@link OptionalContentConfig} that contains all the optional content * groups (assuming that the document has any). */ getOptionalContentConfig() { return this._transport.getOptionalContentConfig(); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} that contains the permission flags for the PDF document, or * `null` when no permissions are present in the PDF file. */ getPermissions() { return this._transport.getPermissions(); } /** * @returns {Promise<{ info: Object, metadata: Metadata }>} A promise that is * resolved with an {Object} that has `info` and `metadata` properties. * `info` is an {Object} filled with anything available in the information * dictionary and similarly `metadata` is a {Metadata} object with * information from the metadata section of the PDF. */ getMetadata() { return this._transport.getMetadata(); } /** * @typedef {Object} MarkInfo * Properties correspond to Table 321 of the PDF 32000-1:2008 spec. * @property {boolean} Marked * @property {boolean} UserProperties * @property {boolean} Suspects */ /** * @returns {Promise} A promise that is resolved with * a {MarkInfo} object that contains the MarkInfo flags for the PDF * document, or `null` when no MarkInfo values are present in the PDF file. */ getMarkInfo() { return this._transport.getMarkInfo(); } /** * @returns {Promise} A promise that is resolved with a * {Uint8Array} that has the raw data from the PDF. */ getData() { return this._transport.getData(); } /** * @returns {Promise<{ length: number }>} A promise that is resolved when the * document's data is loaded. It is resolved with an {Object} that contains * the `length` property that indicates size of the PDF data in bytes. */ getDownloadInfo() { return this._transport.downloadInfoCapability.promise; } /** * Cleans up resources allocated by the document on both the main and worker * threads. * * NOTE: Do not, under any circumstances, call this method when rendering is * currently ongoing since that may lead to rendering errors. * * @param {boolean} [keepLoadedFonts] - Let fonts remain attached to the DOM. * NOTE: This will increase persistent memory usage, hence don't use this * option unless absolutely necessary. The default value is `false`. * @returns {Promise} A promise that is resolved when clean-up has finished. */ cleanup(keepLoadedFonts = false) { return this._transport.startCleanup(keepLoadedFonts || this.isPureXfa); } /** * Destroys the current document instance and terminates the worker. */ destroy() { return this.loadingTask.destroy(); } /** * @type {DocumentInitParameters} A subset of the current * {DocumentInitParameters}, which are needed in the viewer. */ get loadingParams() { return this._transport.loadingParams; } /** * @type {PDFDocumentLoadingTask} The loadingTask for the current document. */ get loadingTask() { return this._transport.loadingTask; } /** * @returns {Promise} A promise that is resolved with a * {Uint8Array} containing the full data of the saved document. */ saveDocument() { if ( (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && this._transport.annotationStorage.size <= 0 ) { deprecated( "saveDocument called while `annotationStorage` is empty, " + "please use the getData-method instead." ); } return this._transport.saveDocument(); } /** * @returns {Promise> | null>} A promise that is * resolved with an {Object} containing /AcroForm field data for the JS * sandbox, or `null` when no field data is present in the PDF file. */ getFieldObjects() { return this._transport.getFieldObjects(); } /** * @returns {Promise} A promise that is resolved with `true` * if some /AcroForm fields have JavaScript actions. */ hasJSActions() { return this._transport.hasJSActions(); } /** * @returns {Promise | null>} A promise that is resolved with an * {Array} containing IDs of annotations that have a calculation * action, or `null` when no such annotations are present in the PDF file. */ getCalculationOrderIds() { return this._transport.getCalculationOrderIds(); } } /** * Page getViewport parameters. * * @typedef {Object} GetViewportParameters * @property {number} scale - The desired scale of the viewport. * @property {number} [rotation] - The desired rotation, in degrees, of * the viewport. If omitted it defaults to the page rotation. * @property {number} [offsetX] - The horizontal, i.e. x-axis, offset. * The default value is `0`. * @property {number} [offsetY] - The vertical, i.e. y-axis, offset. * The default value is `0`. * @property {boolean} [dontFlip] - If true, the y-axis will not be * flipped. The default value is `false`. */ /** * Page getTextContent parameters. * * @typedef {Object} getTextContentParameters * @property {boolean} disableCombineTextItems - Do not attempt to combine * same line {@link TextItem}'s. The default value is `false`. * @property {boolean} [includeMarkedContent] - When true include marked * content items in the items array of TextContent. The default is `false`. */ /** * Page text content. * * @typedef {Object} TextContent * @property {Array} items - Array of * {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent * items are included when includeMarkedContent is true. * @property {Object} styles - {@link TextStyle} objects, * indexed by font name. */ /** * Page text content part. * * @typedef {Object} TextItem * @property {string} str - Text content. * @property {string} dir - Text direction: 'ttb', 'ltr' or 'rtl'. * @property {Array} transform - Transformation matrix. * @property {number} width - Width in device space. * @property {number} height - Height in device space. * @property {string} fontName - Font name used by PDF.js for converted font. * @property {boolean} hasEOL - Indicating if the text content is followed by a * line-break. */ /** * Page text marked content part. * * @typedef {Object} TextMarkedContent * @property {string} type - Either 'beginMarkedContent', * 'beginMarkedContentProps', or 'endMarkedContent'. * @property {string} id - The marked content identifier. Only used for type * 'beginMarkedContentProps'. */ /** * Text style. * * @typedef {Object} TextStyle * @property {number} ascent - Font ascent. * @property {number} descent - Font descent. * @property {boolean} vertical - Whether or not the text is in vertical mode. * @property {string} fontFamily - The possible font family. */ /** * Page annotation parameters. * * @typedef {Object} GetAnnotationsParameters * @property {string} [intent] - Determines the annotations that are fetched, * can be 'display' (viewable annotations), 'print' (printable annotations), * or 'any' (all annotations). The default value is 'display'. */ /** * Page render parameters. * * @typedef {Object} RenderParameters * @property {Object} canvasContext - A 2D context of a DOM Canvas object. * @property {PageViewport} viewport - Rendering viewport obtained by calling * the `PDFPageProxy.getViewport` method. * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are rendered * onto the canvas, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {Array} [transform] - Additional transform, applied just * before viewport transform. * @property {Object} [imageLayer] - An object that has `beginLayout`, * `endLayout` and `appendImage` functions. * @property {Object} [canvasFactory] - The factory instance that will be used * when creating canvases. The default value is {new DOMCanvasFactory()}. * @property {Object | string} [background] - Background to use for the canvas. * Any valid `canvas.fillStyle` can be used: a `DOMString` parsed as CSS * value, a `CanvasGradient` object (a linear or radial gradient) or * a `CanvasPattern` object (a repetitive image). The default value is * 'rgb(255,255,255)'. * * NOTE: This option may be partially, or completely, ignored when the * `pageColors`-option is used. * @property {Object} [pageColors] - Overwrites background and foreground colors * with user defined ones in order to improve readability in high contrast * mode. * @property {Promise} [optionalContentConfigPromise] - * A promise that should resolve with an {@link OptionalContentConfig} * created from `PDFDocumentProxy.getOptionalContentConfig`. If `null`, * the configuration will be fetched automatically with the default visibility * states set. * @property {Map} [annotationCanvasMap] - Map some * annotation ids with canvases used to render them. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Page getOperatorList parameters. * * @typedef {Object} GetOperatorListParameters * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are included * in the operatorList, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Structure tree node. The root node will have a role "Root". * * @typedef {Object} StructTreeNode * @property {Array} children - Array of * {@link StructTreeNode} and {@link StructTreeContent} objects. * @property {string} role - element's role, already mapped if a role map exists * in the PDF. */ /** * Structure tree content. * * @typedef {Object} StructTreeContent * @property {string} type - either "content" for page and stream structure * elements or "object" for object references. * @property {string} id - unique id that will map to the text layer. */ /** * PDF page operator list. * * @typedef {Object} PDFOperatorList * @property {Array} fnArray - Array containing the operator functions. * @property {Array} argsArray - Array containing the arguments of the * functions. */ /** * Proxy to a `PDFPage` in the worker thread. */ class PDFPageProxy { constructor(pageIndex, pageInfo, transport, ownerDocument, pdfBug = false) { this._pageIndex = pageIndex; this._pageInfo = pageInfo; this._ownerDocument = ownerDocument; this._transport = transport; this._stats = pdfBug ? new StatTimer() : null; this._pdfBug = pdfBug; /** @type {PDFObjects} */ this.commonObjs = transport.commonObjs; this.objs = new PDFObjects(); this._bitmaps = new Set(); this.cleanupAfterRender = false; this.pendingCleanup = false; this._intentStates = new Map(); this._annotationPromises = new Map(); this.destroyed = false; } /** * @type {number} Page number of the page. First page is 1. */ get pageNumber() { return this._pageIndex + 1; } /** * @type {number} The number of degrees the page is rotated clockwise. */ get rotate() { return this._pageInfo.rotate; } /** * @type {RefProxy | null} The reference that points to this page. */ get ref() { return this._pageInfo.ref; } /** * @type {number} The default size of units in 1/72nds of an inch. */ get userUnit() { return this._pageInfo.userUnit; } /** * @type {Array} An array of the visible portion of the PDF page in * user space units [x1, y1, x2, y2]. */ get view() { return this._pageInfo.view; } /** * @param {GetViewportParameters} params - Viewport parameters. * @returns {PageViewport} Contains 'width' and 'height' properties * along with transforms required for rendering. */ getViewport({ scale, rotation = this.rotate, offsetX = 0, offsetY = 0, dontFlip = false, } = {}) { return new PageViewport({ viewBox: this.view, scale, rotation, offsetX, offsetY, dontFlip, }); } /** * @param {GetAnnotationsParameters} params - Annotation parameters. * @returns {Promise>} A promise that is resolved with an * {Array} of the annotation objects. */ getAnnotations({ intent = "display" } = {}) { const intentArgs = this._transport.getRenderingIntent(intent); let promise = this._annotationPromises.get(intentArgs.cacheKey); if (!promise) { promise = this._transport.getAnnotations( this._pageIndex, intentArgs.renderingIntent ); this._annotationPromises.set(intentArgs.cacheKey, promise); if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { promise = promise.then(annotations => { for (const annotation of annotations) { if (annotation.titleObj !== undefined) { Object.defineProperty(annotation, "title", { get() { deprecated( "`title`-property on annotation, please use `titleObj` instead." ); return annotation.titleObj.str; }, }); } if (annotation.contentsObj !== undefined) { Object.defineProperty(annotation, "contents", { get() { deprecated( "`contents`-property on annotation, please use `contentsObj` instead." ); return annotation.contentsObj.str; }, }); } } return annotations; }); } } return promise; } /** * @returns {Promise} A promise that is resolved with an * {Object} with JS actions. */ getJSActions() { return (this._jsActionsPromise ||= this._transport.getPageJSActions( this._pageIndex )); } /** * @returns {Promise} A promise that is resolved with * an {Object} with a fake DOM object (a tree structure where elements * are {Object} with a name, attributes (class, style, ...), value and * children, very similar to a HTML DOM tree), or `null` if no XFA exists. */ async getXfa() { return this._transport._htmlForXfa?.children[this._pageIndex] || null; } /** * Begins the process of rendering a page to the desired context. * * @param {RenderParameters} params - Page render parameters. * @returns {RenderTask} An object that contains a promise that is * resolved when the page finishes rendering. */ render({ canvasContext, viewport, intent = "display", annotationMode = AnnotationMode.ENABLE, transform = null, imageLayer = null, canvasFactory = null, background = null, optionalContentConfigPromise = null, annotationCanvasMap = null, pageColors = null, printAnnotationStorage = null, }) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC")) { if (arguments[0]?.renderInteractiveForms !== undefined) { deprecated( "render no longer accepts the `renderInteractiveForms`-option, " + "please use the `annotationMode`-option instead." ); if ( arguments[0].renderInteractiveForms === true && annotationMode === AnnotationMode.ENABLE ) { annotationMode = AnnotationMode.ENABLE_FORMS; } } if (arguments[0]?.includeAnnotationStorage !== undefined) { deprecated( "render no longer accepts the `includeAnnotationStorage`-option, " + "please use the `annotationMode`-option instead." ); if ( arguments[0].includeAnnotationStorage === true && annotationMode === AnnotationMode.ENABLE ) { annotationMode = AnnotationMode.ENABLE_STORAGE; } } } if (this._stats) { this._stats.time("Overall"); } const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage ); // If there was a pending destroy, cancel it so no cleanup happens during // this call to render. this.pendingCleanup = false; if (!optionalContentConfigPromise) { optionalContentConfigPromise = this._transport.getOptionalContentConfig(); } let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } // Ensure that a pending `streamReader` cancel timeout is always aborted. if (intentState.streamReaderCancelTimeout) { clearTimeout(intentState.streamReaderCancelTimeout); intentState.streamReaderCancelTimeout = null; } const canvasFactoryInstance = canvasFactory || new DefaultCanvasFactory({ ownerDocument: this._ownerDocument }); const intentPrint = !!( intentArgs.renderingIntent & RenderingIntentFlag.PRINT ); // If there's no displayReadyCapability yet, then the operatorList // was never requested before. Make the request and create the promise. if (!intentState.displayReadyCapability) { intentState.displayReadyCapability = createPromiseCapability(); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; if (this._stats) { this._stats.time("Page Request"); } this._pumpOperatorList(intentArgs); } const complete = error => { intentState.renderTasks.delete(internalRenderTask); // Attempt to reduce memory usage during *printing*, by always running // cleanup once rendering has finished (regardless of cleanupAfterRender). if (this.cleanupAfterRender || intentPrint) { this.pendingCleanup = true; } this._tryCleanup(); if (error) { internalRenderTask.capability.reject(error); this._abortOperatorList({ intentState, reason: error instanceof Error ? error : new Error(error), }); } else { internalRenderTask.capability.resolve(); } if (this._stats) { this._stats.timeEnd("Rendering"); this._stats.timeEnd("Overall"); } }; const internalRenderTask = new InternalRenderTask({ callback: complete, // Only include the required properties, and *not* the entire object. params: { canvasContext, viewport, transform, imageLayer, background, }, objs: this.objs, commonObjs: this.commonObjs, annotationCanvasMap, operatorList: intentState.operatorList, pageIndex: this._pageIndex, canvasFactory: canvasFactoryInstance, useRequestAnimationFrame: !intentPrint, pdfBug: this._pdfBug, pageColors, }); (intentState.renderTasks ||= new Set()).add(internalRenderTask); const renderTask = internalRenderTask.task; Promise.all([ intentState.displayReadyCapability.promise, optionalContentConfigPromise, ]) .then(([transparency, optionalContentConfig]) => { if (this.pendingCleanup) { complete(); return; } if (this._stats) { this._stats.time("Rendering"); } internalRenderTask.initializeGraphics({ transparency, optionalContentConfig, }); internalRenderTask.operatorListChanged(); }) .catch(complete); return renderTask; } /** * @param {GetOperatorListParameters} params - Page getOperatorList * parameters. * @returns {Promise} A promise resolved with an * {@link PDFOperatorList} object that represents the page's operator list. */ getOperatorList({ intent = "display", annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, } = {}) { function operatorListChanged() { if (intentState.operatorList.lastChunk) { intentState.opListReadCapability.resolve(intentState.operatorList); intentState.renderTasks.delete(opListTask); } } const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage, /* isOpList = */ true ); let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } let opListTask; if (!intentState.opListReadCapability) { opListTask = Object.create(null); opListTask.operatorListChanged = operatorListChanged; intentState.opListReadCapability = createPromiseCapability(); (intentState.renderTasks ||= new Set()).add(opListTask); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; if (this._stats) { this._stats.time("Page Request"); } this._pumpOperatorList(intentArgs); } return intentState.opListReadCapability.promise; } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {ReadableStream} Stream for reading text content chunks. */ streamTextContent({ disableCombineTextItems = false, includeMarkedContent = false, } = {}) { const TEXT_CONTENT_CHUNK_SIZE = 100; return this._transport.messageHandler.sendWithStream( "GetTextContent", { pageIndex: this._pageIndex, combineTextItems: disableCombineTextItems !== true, includeMarkedContent: includeMarkedContent === true, }, { highWaterMark: TEXT_CONTENT_CHUNK_SIZE, size(textContent) { return textContent.items.length; }, } ); } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {Promise} A promise that is resolved with a * {@link TextContent} object that represents the page's text content. */ getTextContent(params = {}) { if (this._transport._htmlForXfa) { // TODO: We need to revisit this once the XFA foreground patch lands and // only do this for non-foreground XFA. return this.getXfa().then(xfa => { return XfaText.textContent(xfa); }); } const readableStream = this.streamTextContent(params); return new Promise(function (resolve, reject) { function pump() { reader.read().then(function ({ value, done }) { if (done) { resolve(textContent); return; } Object.assign(textContent.styles, value.styles); textContent.items.push(...value.items); pump(); }, reject); } const reader = readableStream.getReader(); const textContent = { items: [], styles: Object.create(null), }; pump(); }); } /** * @returns {Promise} A promise that is resolved with a * {@link StructTreeNode} object that represents the page's structure tree, * or `null` when no structure tree is present for the current page. */ getStructTree() { return (this._structTreePromise ||= this._transport.getStructTree( this._pageIndex )); } /** * Destroys the page object. * @private */ _destroy() { this.destroyed = true; const waitOn = []; for (const intentState of this._intentStates.values()) { this._abortOperatorList({ intentState, reason: new Error("Page was destroyed."), force: true, }); if (intentState.opListReadCapability) { // Avoid errors below, since the renderTasks are just stubs. continue; } for (const internalRenderTask of intentState.renderTasks) { waitOn.push(internalRenderTask.completed); internalRenderTask.cancel(); } } this.objs.clear(); for (const bitmap of this._bitmaps) { bitmap.close(); } this._bitmaps.clear(); this._annotationPromises.clear(); this._jsActionsPromise = null; this._structTreePromise = null; this.pendingCleanup = false; return Promise.all(waitOn); } /** * Cleans up resources allocated by the page. * * @param {boolean} [resetStats] - Reset page stats, if enabled. * The default value is `false`. * @returns {boolean} Indicates if clean-up was successfully run. */ cleanup(resetStats = false) { this.pendingCleanup = true; return this._tryCleanup(resetStats); } /** * Attempts to clean up if rendering is in a state where that's possible. * @private */ _tryCleanup(resetStats = false) { if (!this.pendingCleanup) { return false; } for (const { renderTasks, operatorList } of this._intentStates.values()) { if (renderTasks.size > 0 || !operatorList.lastChunk) { return false; } } this._intentStates.clear(); this.objs.clear(); this._annotationPromises.clear(); this._jsActionsPromise = null; this._structTreePromise = null; if (resetStats && this._stats) { this._stats = new StatTimer(); } for (const bitmap of this._bitmaps) { bitmap.close(); } this._bitmaps.clear(); this.pendingCleanup = false; return true; } /** * @private */ _startRenderPage(transparency, cacheKey) { const intentState = this._intentStates.get(cacheKey); if (!intentState) { return; // Rendering was cancelled. } if (this._stats) { this._stats.timeEnd("Page Request"); } // TODO Refactor RenderPageRequest to separate rendering // and operator list logic if (intentState.displayReadyCapability) { intentState.displayReadyCapability.resolve(transparency); } } /** * @private */ _renderPageChunk(operatorListChunk, intentState) { // Add the new chunk to the current operator list. for (let i = 0, ii = operatorListChunk.length; i < ii; i++) { intentState.operatorList.fnArray.push(operatorListChunk.fnArray[i]); intentState.operatorList.argsArray.push(operatorListChunk.argsArray[i]); } intentState.operatorList.lastChunk = operatorListChunk.lastChunk; intentState.operatorList.separateAnnots = operatorListChunk.separateAnnots; // Notify all the rendering tasks there are more operators to be consumed. for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } if (operatorListChunk.lastChunk) { this._tryCleanup(); } } /** * @private */ _pumpOperatorList({ renderingIntent, cacheKey, annotationStorageMap }) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") ) { assert( Number.isInteger(renderingIntent) && renderingIntent > 0, '_pumpOperatorList: Expected valid "renderingIntent" argument.' ); } const readableStream = this._transport.messageHandler.sendWithStream( "GetOperatorList", { pageIndex: this._pageIndex, intent: renderingIntent, cacheKey, annotationStorage: annotationStorageMap, } ); const reader = readableStream.getReader(); const intentState = this._intentStates.get(cacheKey); intentState.streamReader = reader; const pump = () => { reader.read().then( ({ value, done }) => { if (done) { intentState.streamReader = null; return; } if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } this._renderPageChunk(value, intentState); pump(); }, reason => { intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } if (intentState.operatorList) { // Mark operator list as complete. intentState.operatorList.lastChunk = true; for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } this._tryCleanup(); } if (intentState.displayReadyCapability) { intentState.displayReadyCapability.reject(reason); } else if (intentState.opListReadCapability) { intentState.opListReadCapability.reject(reason); } else { throw reason; } } ); }; pump(); } /** * @private */ _abortOperatorList({ intentState, reason, force = false }) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") ) { assert( reason instanceof Error, '_abortOperatorList: Expected valid "reason" argument.' ); } if (!intentState.streamReader) { return; } if (!force) { // Ensure that an Error occurring in *only* one `InternalRenderTask`, e.g. // multiple render() calls on the same canvas, won't break all rendering. if (intentState.renderTasks.size > 0) { return; } // Don't immediately abort parsing on the worker-thread when rendering is // cancelled, since that will unnecessarily delay re-rendering when (for // partially parsed pages) e.g. zooming/rotation occurs in the viewer. if (reason instanceof RenderingCancelledException) { intentState.streamReaderCancelTimeout = setTimeout(() => { this._abortOperatorList({ intentState, reason, force: true }); intentState.streamReaderCancelTimeout = null; }, RENDERING_CANCELLED_TIMEOUT); return; } } intentState.streamReader .cancel(new AbortException(reason.message)) .catch(() => { // Avoid "Uncaught promise" messages in the console. }); intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } // Remove the current `intentState`, since a cancelled `getOperatorList` // call on the worker-thread cannot be re-started... for (const [curCacheKey, curIntentState] of this._intentStates) { if (curIntentState === intentState) { this._intentStates.delete(curCacheKey); break; } } // ... and force clean-up to ensure that any old state is always removed. this.cleanup(); } /** * @type {Object} Returns page stats, if enabled; returns `null` otherwise. */ get stats() { return this._stats; } } class LoopbackPort { constructor() { this._listeners = []; this._deferred = Promise.resolve(); } postMessage(obj, transfers) { const event = { data: structuredClone(obj, transfers), }; this._deferred.then(() => { for (const listener of this._listeners) { listener.call(this, event); } }); } addEventListener(name, listener) { this._listeners.push(listener); } removeEventListener(name, listener) { const i = this._listeners.indexOf(listener); this._listeners.splice(i, 1); } terminate() { this._listeners.length = 0; } } /** * @typedef {Object} PDFWorkerParameters * @property {string} [name] - The name of the worker. * @property {Object} [port] - The `workerPort` object. * @property {number} [verbosity] - Controls the logging level; * the constants from {@link VerbosityLevel} should be used. */ const PDFWorkerUtil = { isWorkerDisabled: false, fallbackWorkerSrc: null, fakeWorkerId: 0, }; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { // eslint-disable-next-line no-undef if (isNodeJS && typeof __non_webpack_require__ === "function") { // Workers aren't supported in Node.js, force-disabling them there. PDFWorkerUtil.isWorkerDisabled = true; PDFWorkerUtil.fallbackWorkerSrc = PDFJSDev.test("LIB") ? "../pdf.worker.js" : "./pdf.worker.js"; } else if (typeof document === "object") { const pdfjsFilePath = document?.currentScript?.src; if (pdfjsFilePath) { PDFWorkerUtil.fallbackWorkerSrc = pdfjsFilePath.replace( /(\.(?:min\.)?js)(\?.*)?$/i, ".worker$1$2" ); } } // Check if URLs have the same origin. For non-HTTP based URLs, returns false. PDFWorkerUtil.isSameOrigin = function (baseUrl, otherUrl) { let base; try { base = new URL(baseUrl); if (!base.origin || base.origin === "null") { return false; // non-HTTP url } } catch (e) { return false; } const other = new URL(otherUrl, base); return base.origin === other.origin; }; PDFWorkerUtil.createCDNWrapper = function (url) { // We will rely on blob URL's property to specify origin. // We want this function to fail in case if createObjectURL or Blob do not // exist or fail for some reason -- our Worker creation will fail anyway. const wrapper = `importScripts("${url}");`; return URL.createObjectURL(new Blob([wrapper])); }; } /** * PDF.js web worker abstraction that controls the instantiation of PDF * documents. Message handlers are used to pass information from the main * thread to the worker thread and vice versa. If the creation of a web * worker is not possible, a "fake" worker will be used instead. * * @param {PDFWorkerParameters} params - The worker initialization parameters. */ class PDFWorker { static #workerPorts = new WeakMap(); constructor({ name = null, port = null, verbosity = getVerbosityLevel(), } = {}) { if (port && PDFWorker.#workerPorts.has(port)) { throw new Error("Cannot use more than one PDFWorker per port."); } this.name = name; this.destroyed = false; this.verbosity = verbosity; this._readyCapability = createPromiseCapability(); this._port = null; this._webWorker = null; this._messageHandler = null; if (port) { PDFWorker.#workerPorts.set(port, this); this._initializeFromPort(port); return; } this._initialize(); } /** * Promise for worker initialization completion. * @type {Promise} */ get promise() { return this._readyCapability.promise; } /** * The current `workerPort`, when it exists. * @type {Worker} */ get port() { return this._port; } /** * The current MessageHandler-instance. * @type {MessageHandler} */ get messageHandler() { return this._messageHandler; } _initializeFromPort(port) { this._port = port; this._messageHandler = new MessageHandler("main", "worker", port); this._messageHandler.on("ready", function () { // Ignoring "ready" event -- MessageHandler should already be initialized // and ready to accept messages. }); this._readyCapability.resolve(); } _initialize() { // If worker support isn't disabled explicit and the browser has worker // support, create a new web worker and test if it/the browser fulfills // all requirements to run parts of pdf.js in a web worker. // Right now, the requirement is, that an Uint8Array is still an // Uint8Array as it arrives on the worker. (Chrome added this with v.15.) if ( typeof Worker !== "undefined" && !PDFWorkerUtil.isWorkerDisabled && !PDFWorker._mainThreadWorkerMessageHandler ) { let workerSrc = PDFWorker.workerSrc; try { // Wraps workerSrc path into blob URL, if the former does not belong // to the same origin. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && !PDFWorkerUtil.isSameOrigin(window.location.href, workerSrc) ) { workerSrc = PDFWorkerUtil.createCDNWrapper( new URL(workerSrc, window.location).href ); } // Some versions of FF can't create a worker on localhost, see: // https://bugzilla.mozilla.org/show_bug.cgi?id=683280 const worker = new Worker(workerSrc); const messageHandler = new MessageHandler("main", "worker", worker); const terminateEarly = () => { worker.removeEventListener("error", onWorkerError); messageHandler.destroy(); worker.terminate(); if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); } else { // Fall back to fake worker if the termination is caused by an // error (e.g. NetworkError / SecurityError). this._setupFakeWorker(); } }; const onWorkerError = () => { if (!this._webWorker) { // Worker failed to initialize due to an error. Clean up and fall // back to the fake worker. terminateEarly(); } }; worker.addEventListener("error", onWorkerError); messageHandler.on("test", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } if (data) { this._messageHandler = messageHandler; this._port = worker; this._webWorker = worker; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); } else { this._setupFakeWorker(); messageHandler.destroy(); worker.terminate(); } }); messageHandler.on("ready", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } try { sendTest(); } catch (e) { // We need fallback to a faked worker. this._setupFakeWorker(); } }); const sendTest = () => { const testObj = new Uint8Array(); // Ensure that we can use `postMessage` transfers. messageHandler.send("test", testObj, [testObj.buffer]); }; // It might take time for the worker to initialize. We will try to send // the "test" message immediately, and once the "ready" message arrives. // The worker shall process only the first received "test" message. sendTest(); return; } catch (e) { info("The worker has been disabled."); } } // Either workers are disabled, not supported or have thrown an exception. // Thus, we fallback to a faked worker. this._setupFakeWorker(); } _setupFakeWorker() { if (!PDFWorkerUtil.isWorkerDisabled) { warn("Setting up fake worker."); PDFWorkerUtil.isWorkerDisabled = true; } PDFWorker._setupFakeWorkerGlobal .then(WorkerMessageHandler => { if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); return; } const port = new LoopbackPort(); this._port = port; // All fake workers use the same port, making id unique. const id = `fake${PDFWorkerUtil.fakeWorkerId++}`; // If the main thread is our worker, setup the handling for the // messages -- the main thread sends to it self. const workerHandler = new MessageHandler(id + "_worker", id, port); WorkerMessageHandler.setup(workerHandler, port); const messageHandler = new MessageHandler(id, id + "_worker", port); this._messageHandler = messageHandler; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); }) .catch(reason => { this._readyCapability.reject( new Error(`Setting up fake worker failed: "${reason.message}".`) ); }); } /** * Destroys the worker instance. */ destroy() { this.destroyed = true; if (this._webWorker) { // We need to terminate only web worker created resource. this._webWorker.terminate(); this._webWorker = null; } PDFWorker.#workerPorts.delete(this._port); this._port = null; if (this._messageHandler) { this._messageHandler.destroy(); this._messageHandler = null; } } /** * @param {PDFWorkerParameters} params - The worker initialization parameters. */ static fromPort(params) { if (!params?.port) { throw new Error("PDFWorker.fromPort - invalid method signature."); } if (this.#workerPorts.has(params.port)) { return this.#workerPorts.get(params.port); } return new PDFWorker(params); } /** * The current `workerSrc`, when it exists. * @type {string} */ static get workerSrc() { if (GlobalWorkerOptions.workerSrc) { return GlobalWorkerOptions.workerSrc; } if (PDFWorkerUtil.fallbackWorkerSrc !== null) { if (!isNodeJS) { deprecated('No "GlobalWorkerOptions.workerSrc" specified.'); } return PDFWorkerUtil.fallbackWorkerSrc; } throw new Error('No "GlobalWorkerOptions.workerSrc" specified.'); } static get _mainThreadWorkerMessageHandler() { try { return globalThis.pdfjsWorker?.WorkerMessageHandler || null; } catch (ex) { return null; } } // Loads worker code into the main-thread. static get _setupFakeWorkerGlobal() { const loader = async () => { const mainWorkerMessageHandler = this._mainThreadWorkerMessageHandler; if (mainWorkerMessageHandler) { // The worker was already loaded using e.g. a `