/* Copyright 2012 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @module pdfjsLib */ import { AbortException, AnnotationMode, assert, createPromiseCapability, getVerbosityLevel, info, InvalidPDFException, isArrayBuffer, MAX_IMAGE_SIZE_TO_CACHE, MissingPDFException, PasswordException, RenderingIntentFlag, setVerbosityLevel, shadow, stringToBytes, UnexpectedResponseException, UnknownErrorException, unreachable, warn, } from "../shared/util.js"; import { AnnotationStorage, PrintAnnotationStorage, } from "./annotation_storage.js"; import { deprecated, DOMCanvasFactory, DOMCMapReaderFactory, DOMFilterFactory, DOMStandardFontDataFactory, isDataScheme, isValidFetchUrl, loadScript, PageViewport, RenderingCancelledException, StatTimer, } from "./display_utils.js"; import { FontFaceObject, FontLoader } from "./font_loader.js"; import { CanvasGraphics } from "./canvas.js"; import { GlobalWorkerOptions } from "./worker_options.js"; import { isNodeJS } from "../shared/is_node.js"; import { MessageHandler } from "../shared/message_handler.js"; import { Metadata } from "./metadata.js"; import { OptionalContentConfig } from "./optional_content_config.js"; import { PDFDataTransportStream } from "./transport_stream.js"; import { XfaText } from "./xfa_text.js"; const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536 const RENDERING_CANCELLED_TIMEOUT = 100; // ms const DELAYED_CLEANUP_TIMEOUT = 5000; // ms let DefaultCanvasFactory = DOMCanvasFactory; let DefaultCMapReaderFactory = DOMCMapReaderFactory; let DefaultFilterFactory = DOMFilterFactory; let DefaultStandardFontDataFactory = DOMStandardFontDataFactory; if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS) { const { NodeCanvasFactory, NodeCMapReaderFactory, NodeFilterFactory, NodeStandardFontDataFactory, } = require("./node_utils.js"); DefaultCanvasFactory = NodeCanvasFactory; DefaultCMapReaderFactory = NodeCMapReaderFactory; DefaultFilterFactory = NodeFilterFactory; DefaultStandardFontDataFactory = NodeStandardFontDataFactory; } let createPDFNetworkStream; if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { const streamsPromise = Promise.all([ import("./network.js"), import("./fetch_stream.js"), ]); createPDFNetworkStream = async params => { const [{ PDFNetworkStream }, { PDFFetchStream }] = await streamsPromise; return isValidFetchUrl(params.url) ? new PDFFetchStream(params) : new PDFNetworkStream(params); }; } else if (PDFJSDev.test("GENERIC || CHROME")) { if (PDFJSDev.test("GENERIC") && isNodeJS) { const { PDFNodeStream } = require("./node_stream.js"); createPDFNetworkStream = params => { return new PDFNodeStream(params); }; } else { const { PDFNetworkStream } = require("./network.js"); const { PDFFetchStream } = require("./fetch_stream.js"); createPDFNetworkStream = params => { return isValidFetchUrl(params.url) ? new PDFFetchStream(params) : new PDFNetworkStream(params); }; } } /** * @typedef { Int8Array | Uint8Array | Uint8ClampedArray | * Int16Array | Uint16Array | * Int32Array | Uint32Array | Float32Array | * Float64Array * } TypedArray */ /** * @typedef { TypedArray | ArrayBuffer | Array<number> | string } BinaryData */ /** * @typedef {Object} RefProxy * @property {number} num * @property {number} gen */ /** * Document initialization / loading parameters object. * * @typedef {Object} DocumentInitParameters * @property {string | URL} [url] - The URL of the PDF. * @property {BinaryData} [data] - Binary PDF data. * Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is * BASE64-encoded, use `atob()` to convert it to a binary string first. * * NOTE: If TypedArrays are used they will generally be transferred to the * worker-thread. This will help reduce main-thread memory usage, however * it will take ownership of the TypedArrays. * @property {Object} [httpHeaders] - Basic authentication headers. * @property {boolean} [withCredentials] - Indicates whether or not * cross-site Access-Control requests should be made using credentials such * as cookies or authorization headers. The default is `false`. * @property {string} [password] - For decrypting password-protected PDFs. * @property {number} [length] - The PDF file length. It's used for progress * reports and range requests operations. * @property {PDFDataRangeTransport} [range] - Allows for using a custom range * transport implementation. * @property {number} [rangeChunkSize] - Specify maximum number of bytes fetched * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE}. * @property {PDFWorker} [worker] - The worker that will be used for loading and * parsing the PDF data. * @property {number} [verbosity] - Controls the logging level; the constants * from {@link VerbosityLevel} should be used. * @property {string} [docBaseUrl] - The base URL of the document, used when * attempting to recover valid absolute URLs for annotations, and outline * items, that (incorrectly) only specify relative URLs. * @property {string} [cMapUrl] - The URL where the predefined Adobe CMaps are * located. Include the trailing slash. * @property {boolean} [cMapPacked] - Specifies if the Adobe CMaps are binary * packed or not. The default value is `true`. * @property {Object} [CMapReaderFactory] - The factory that will be used when * reading built-in CMap files. Providing a custom factory is useful for * environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMCMapReaderFactory}. * @property {boolean} [useSystemFonts] - When `true`, fonts that aren't * embedded in the PDF document will fallback to a system font. * The default value is `true` in web environments and `false` in Node.js; * unless `disableFontFace === true` in which case this defaults to `false` * regardless of the environment (to prevent completely broken fonts). * @property {string} [standardFontDataUrl] - The URL where the standard font * files are located. Include the trailing slash. * @property {Object} [StandardFontDataFactory] - The factory that will be used * when reading the standard font files. Providing a custom factory is useful * for environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMStandardFontDataFactory}. * @property {boolean} [useWorkerFetch] - Enable using the Fetch API in the * worker-thread when reading CMap and standard font files. When `true`, * the `CMapReaderFactory` and `StandardFontDataFactory` options are ignored. * The default value is `true` in web environments and `false` in Node.js. * @property {boolean} [stopAtErrors] - Reject certain promises, e.g. * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated * PDF data cannot be successfully parsed, instead of attempting to recover * whatever possible of the data. The default value is `false`. * @property {number} [maxImageSize] - The maximum allowed image size in total * pixels, i.e. width * height. Images above this value will not be rendered. * Use -1 for no limit, which is also the default value. * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings * as JavaScript. Primarily used to improve performance of font rendering, and * when parsing PDF functions. The default value is `true`. * @property {boolean} [isOffscreenCanvasSupported] - Determines if we can use * `OffscreenCanvas` in the worker. Primarily used to improve performance of * image conversion/rendering. * The default value is `true` in web environments and `false` in Node.js. * @property {boolean} [canvasMaxAreaInBytes] - The integer value is used to * know when an image must be resized (uses `OffscreenCanvas` in the worker). * If it's -1 then a possibly slow algorithm is used to guess the max value. * @property {boolean} [disableFontFace] - By default fonts are converted to * OpenType fonts and loaded via the Font Loading API or `@font-face` rules. * If disabled, fonts will be rendered using a built-in font renderer that * constructs the glyphs with primitive path commands. * The default value is `false` in web environments and `true` in Node.js. * @property {boolean} [fontExtraProperties] - Include additional properties, * which are unused during rendering of PDF documents, when exporting the * parsed font data from the worker-thread. This may be useful for debugging * purposes (and backwards compatibility), but note that it will lead to * increased memory usage. The default value is `false`. * @property {boolean} [enableXfa] - Render Xfa forms if any. * The default value is `false`. * @property {HTMLDocument} [ownerDocument] - Specify an explicit document * context to create elements with and to load resources, such as fonts, * into. Defaults to the current document. * @property {boolean} [disableRange] - Disable range request loading of PDF * files. When enabled, and if the server supports partial content requests, * then the PDF will be fetched in chunks. The default value is `false`. * @property {boolean} [disableStream] - Disable streaming of PDF file data. * By default PDF.js attempts to load PDF files in chunks. The default value * is `false`. * @property {boolean} [disableAutoFetch] - Disable pre-fetching of PDF file * data. When range requests are enabled PDF.js will automatically keep * fetching more data even if it isn't needed to display the current page. * The default value is `false`. * * NOTE: It is also necessary to disable streaming, see above, in order for * disabling of pre-fetching to work correctly. * @property {boolean} [pdfBug] - Enables special hooks for debugging PDF.js * (see `web/debugger.js`). The default value is `false`. * @property {Object} [canvasFactory] - The factory instance that will be used * when creating canvases. The default value is {new DOMCanvasFactory()}. * @property {Object} [filterFactory] - A factory instance that will be used * to create SVG filters when rendering some images on the main canvas. */ /** * This is the main entry point for loading a PDF and interacting with it. * * NOTE: If a URL is used to fetch the PDF data a standard Fetch API call (or * XHR as fallback) is used, which means it must follow same origin rules, * e.g. no cross-domain requests without CORS. * * @param {string | URL | TypedArray | ArrayBuffer | DocumentInitParameters} * src - Can be a URL where a PDF file is located, a typed array (Uint8Array) * already populated with data, or a parameter object. * @returns {PDFDocumentLoadingTask} */ function getDocument(src) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { if (typeof src === "string" || src instanceof URL) { src = { url: src }; } else if (isArrayBuffer(src)) { src = { data: src }; } } if (typeof src !== "object") { throw new Error("Invalid parameter in getDocument, need parameter object."); } if (!src.url && !src.data && !src.range) { throw new Error( "Invalid parameter object: need either .data, .range or .url" ); } const task = new PDFDocumentLoadingTask(); const { docId } = task; const url = src.url ? getUrlProp(src.url) : null; const data = src.data ? getDataProp(src.data) : null; const httpHeaders = src.httpHeaders || null; const withCredentials = src.withCredentials === true; const password = src.password ?? null; const rangeTransport = src.range instanceof PDFDataRangeTransport ? src.range : null; const rangeChunkSize = Number.isInteger(src.rangeChunkSize) && src.rangeChunkSize > 0 ? src.rangeChunkSize : DEFAULT_RANGE_CHUNK_SIZE; let worker = src.worker instanceof PDFWorker ? src.worker : null; const verbosity = src.verbosity; // Ignore "data:"-URLs, since they can't be used to recover valid absolute // URLs anyway. We want to avoid sending them to the worker-thread, since // they contain the *entire* PDF document and can thus be arbitrarily long. const docBaseUrl = typeof src.docBaseUrl === "string" && !isDataScheme(src.docBaseUrl) ? src.docBaseUrl : null; const cMapUrl = typeof src.cMapUrl === "string" ? src.cMapUrl : null; const cMapPacked = src.cMapPacked !== false; const CMapReaderFactory = src.CMapReaderFactory || DefaultCMapReaderFactory; const standardFontDataUrl = typeof src.standardFontDataUrl === "string" ? src.standardFontDataUrl : null; const StandardFontDataFactory = src.StandardFontDataFactory || DefaultStandardFontDataFactory; const ignoreErrors = src.stopAtErrors !== true; const maxImageSize = Number.isInteger(src.maxImageSize) && src.maxImageSize > -1 ? src.maxImageSize : -1; const isEvalSupported = src.isEvalSupported !== false; const isOffscreenCanvasSupported = typeof src.isOffscreenCanvasSupported === "boolean" ? src.isOffscreenCanvasSupported : !isNodeJS; const canvasMaxAreaInBytes = Number.isInteger(src.canvasMaxAreaInBytes) ? src.canvasMaxAreaInBytes : -1; const disableFontFace = typeof src.disableFontFace === "boolean" ? src.disableFontFace : isNodeJS; const fontExtraProperties = src.fontExtraProperties === true; const enableXfa = src.enableXfa === true; const ownerDocument = src.ownerDocument || globalThis.document; const disableRange = src.disableRange === true; const disableStream = src.disableStream === true; const disableAutoFetch = src.disableAutoFetch === true; const pdfBug = src.pdfBug === true; // Parameters whose default values depend on other parameters. const length = rangeTransport ? rangeTransport.length : src.length ?? NaN; const useSystemFonts = typeof src.useSystemFonts === "boolean" ? src.useSystemFonts : !isNodeJS && !disableFontFace; const useWorkerFetch = typeof src.useWorkerFetch === "boolean" ? src.useWorkerFetch : (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) || (CMapReaderFactory === DOMCMapReaderFactory && StandardFontDataFactory === DOMStandardFontDataFactory && isValidFetchUrl(cMapUrl, document.baseURI) && isValidFetchUrl(standardFontDataUrl, document.baseURI)); const canvasFactory = src.canvasFactory || new DefaultCanvasFactory({ ownerDocument }); const filterFactory = src.filterFactory || new DefaultFilterFactory({ docId, ownerDocument }); // Parameters only intended for development/testing purposes. const styleElement = typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING") ? src.styleElement : null; // Set the main-thread verbosity level. setVerbosityLevel(verbosity); // Ensure that the various factories can be initialized, when necessary, // since the user may provide *custom* ones. const transportFactory = { canvasFactory, filterFactory, }; if (!useWorkerFetch) { transportFactory.cMapReaderFactory = new CMapReaderFactory({ baseUrl: cMapUrl, isCompressed: cMapPacked, }); transportFactory.standardFontDataFactory = new StandardFontDataFactory({ baseUrl: standardFontDataUrl, }); } if (!worker) { const workerParams = { verbosity, port: GlobalWorkerOptions.workerPort, }; // Worker was not provided -- creating and owning our own. If message port // is specified in global worker options, using it. worker = workerParams.port ? PDFWorker.fromPort(workerParams) : new PDFWorker(workerParams); task._worker = worker; } const fetchDocParams = { docId, apiVersion: typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING") ? PDFJSDev.eval("BUNDLE_VERSION") : null, data, password, disableAutoFetch, rangeChunkSize, length, docBaseUrl, enableXfa, evaluatorOptions: { maxImageSize, disableFontFace, ignoreErrors, isEvalSupported, isOffscreenCanvasSupported, canvasMaxAreaInBytes, fontExtraProperties, useSystemFonts, cMapUrl: useWorkerFetch ? cMapUrl : null, standardFontDataUrl: useWorkerFetch ? standardFontDataUrl : null, }, }; const transportParams = { ignoreErrors, isEvalSupported, disableFontFace, fontExtraProperties, enableXfa, ownerDocument, disableAutoFetch, pdfBug, styleElement, }; worker.promise .then(function () { if (task.destroyed) { throw new Error("Loading aborted"); } const workerIdPromise = _fetchDocument(worker, fetchDocParams); const networkStreamPromise = new Promise(function (resolve) { let networkStream; if (rangeTransport) { networkStream = new PDFDataTransportStream( { length, initialData: rangeTransport.initialData, progressiveDone: rangeTransport.progressiveDone, contentDispositionFilename: rangeTransport.contentDispositionFilename, disableRange, disableStream, }, rangeTransport ); } else if (!data) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { throw new Error("Not implemented: createPDFNetworkStream"); } networkStream = createPDFNetworkStream({ url, length, httpHeaders, withCredentials, rangeChunkSize, disableRange, disableStream, }); } resolve(networkStream); }); return Promise.all([workerIdPromise, networkStreamPromise]).then( function ([workerId, networkStream]) { if (task.destroyed) { throw new Error("Loading aborted"); } const messageHandler = new MessageHandler( docId, workerId, worker.port ); const transport = new WorkerTransport( messageHandler, task, networkStream, transportParams, transportFactory ); task._transport = transport; messageHandler.send("Ready", null); } ); }) .catch(task._capability.reject); return task; } /** * Starts fetching of specified PDF document/data. * * @param {PDFWorker} worker * @param {Object} source * @returns {Promise<string>} A promise that is resolved when the worker ID of * the `MessageHandler` is known. * @private */ async function _fetchDocument(worker, source) { if (worker.destroyed) { throw new Error("Worker was destroyed"); } const workerId = await worker.messageHandler.sendWithPromise( "GetDocRequest", source, source.data ? [source.data.buffer] : null ); if (worker.destroyed) { throw new Error("Worker was destroyed"); } return workerId; } function getUrlProp(val) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { return null; // The 'url' is unused with `PDFDataRangeTransport`. } if (val instanceof URL) { return val.href; } try { // The full path is required in the 'url' field. return new URL(val, window.location).href; } catch (ex) { if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS && typeof val === "string" ) { return val; // Use the url as-is in Node.js environments. } } throw new Error( "Invalid PDF url data: " + "either string or URL-object is expected in the url property." ); } function getDataProp(val) { // Converting string or array-like data to Uint8Array. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS && typeof Buffer !== "undefined" && // eslint-disable-line no-undef val instanceof Buffer // eslint-disable-line no-undef ) { deprecated( "Please provide binary data as `Uint8Array`, rather than `Buffer`." ); return new Uint8Array(val); } if (val instanceof Uint8Array && val.byteLength === val.buffer.byteLength) { // Use the data as-is when it's already a Uint8Array that completely // "utilizes" its underlying ArrayBuffer, to prevent any possible // issues when transferring it to the worker-thread. return val; } if (typeof val === "string") { return stringToBytes(val); } if ((typeof val === "object" && !isNaN(val?.length)) || isArrayBuffer(val)) { return new Uint8Array(val); } throw new Error( "Invalid PDF binary data: either TypedArray, " + "string, or array-like object is expected in the data property." ); } /** * @typedef {Object} OnProgressParameters * @property {number} loaded - Currently loaded number of bytes. * @property {number} total - Total number of bytes in the PDF file. */ /** * The loading task controls the operations required to load a PDF document * (such as network requests) and provides a way to listen for completion, * after which individual pages can be rendered. */ class PDFDocumentLoadingTask { static #docId = 0; constructor() { this._capability = createPromiseCapability(); this._transport = null; this._worker = null; /** * Unique identifier for the document loading task. * @type {string} */ this.docId = `d${PDFDocumentLoadingTask.#docId++}`; /** * Whether the loading task is destroyed or not. * @type {boolean} */ this.destroyed = false; /** * Callback to request a password if a wrong or no password was provided. * The callback receives two parameters: a function that should be called * with the new password, and a reason (see {@link PasswordResponses}). * @type {function} */ this.onPassword = null; /** * Callback to be able to monitor the loading progress of the PDF file * (necessary to implement e.g. a loading bar). * The callback receives an {@link OnProgressParameters} argument. * @type {function} */ this.onProgress = null; } /** * Promise for document loading task completion. * @type {Promise<PDFDocumentProxy>} */ get promise() { return this._capability.promise; } /** * Abort all network requests and destroy the worker. * @returns {Promise<void>} A promise that is resolved when destruction is * completed. */ async destroy() { this.destroyed = true; await this._transport?.destroy(); this._transport = null; if (this._worker) { this._worker.destroy(); this._worker = null; } } } /** * Abstract class to support range requests file loading. * * NOTE: The TypedArrays passed to the constructor and relevant methods below * will generally be transferred to the worker-thread. This will help reduce * main-thread memory usage, however it will take ownership of the TypedArrays. */ class PDFDataRangeTransport { /** * @param {number} length * @param {Uint8Array|null} initialData * @param {boolean} [progressiveDone] * @param {string} [contentDispositionFilename] */ constructor( length, initialData, progressiveDone = false, contentDispositionFilename = null ) { this.length = length; this.initialData = initialData; this.progressiveDone = progressiveDone; this.contentDispositionFilename = contentDispositionFilename; this._rangeListeners = []; this._progressListeners = []; this._progressiveReadListeners = []; this._progressiveDoneListeners = []; this._readyCapability = createPromiseCapability(); } /** * @param {function} listener */ addRangeListener(listener) { this._rangeListeners.push(listener); } /** * @param {function} listener */ addProgressListener(listener) { this._progressListeners.push(listener); } /** * @param {function} listener */ addProgressiveReadListener(listener) { this._progressiveReadListeners.push(listener); } /** * @param {function} listener */ addProgressiveDoneListener(listener) { this._progressiveDoneListeners.push(listener); } /** * @param {number} begin * @param {Uint8Array|null} chunk */ onDataRange(begin, chunk) { for (const listener of this._rangeListeners) { listener(begin, chunk); } } /** * @param {number} loaded * @param {number|undefined} total */ onDataProgress(loaded, total) { this._readyCapability.promise.then(() => { for (const listener of this._progressListeners) { listener(loaded, total); } }); } /** * @param {Uint8Array|null} chunk */ onDataProgressiveRead(chunk) { this._readyCapability.promise.then(() => { for (const listener of this._progressiveReadListeners) { listener(chunk); } }); } onDataProgressiveDone() { this._readyCapability.promise.then(() => { for (const listener of this._progressiveDoneListeners) { listener(); } }); } transportReady() { this._readyCapability.resolve(); } /** * @param {number} begin * @param {number} end */ requestDataRange(begin, end) { unreachable("Abstract method PDFDataRangeTransport.requestDataRange"); } abort() {} } /** * Proxy to a `PDFDocument` in the worker thread. */ class PDFDocumentProxy { constructor(pdfInfo, transport) { this._pdfInfo = pdfInfo; this._transport = transport; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { // For testing purposes. Object.defineProperty(this, "getXFADatasets", { value: () => { return this._transport.getXFADatasets(); }, }); } } /** * @type {AnnotationStorage} Storage for annotation data in forms. */ get annotationStorage() { return this._transport.annotationStorage; } /** * @type {Object} The filter factory instance. */ get filterFactory() { return this._transport.filterFactory; } /** * @type {number} Total number of pages in the PDF file. */ get numPages() { return this._pdfInfo.numPages; } /** * @type {Array<string, string|null>} A (not guaranteed to be) unique ID to * identify the PDF document. * NOTE: The first element will always be defined for all PDF documents, * whereas the second element is only defined for *modified* PDF documents. */ get fingerprints() { return this._pdfInfo.fingerprints; } /** * @type {boolean} True if only XFA form. */ get isPureXfa() { return shadow(this, "isPureXfa", !!this._transport._htmlForXfa); } /** * NOTE: This is (mostly) intended to support printing of XFA forms. * * @type {Object | null} An object representing a HTML tree structure * to render the XFA, or `null` when no XFA form exists. */ get allXfaHtml() { return this._transport._htmlForXfa; } /** * @param {number} pageNumber - The page number to get. The first page is 1. * @returns {Promise<PDFPageProxy>} A promise that is resolved with * a {@link PDFPageProxy} object. */ getPage(pageNumber) { return this._transport.getPage(pageNumber); } /** * @param {RefProxy} ref - The page reference. * @returns {Promise<number>} A promise that is resolved with the page index, * starting from zero, that is associated with the reference. */ getPageIndex(ref) { return this._transport.getPageIndex(ref); } /** * @returns {Promise<Object<string, Array<any>>>} A promise that is resolved * with a mapping from named destinations to references. * * This can be slow for large documents. Use `getDestination` instead. */ getDestinations() { return this._transport.getDestinations(); } /** * @param {string} id - The named destination to get. * @returns {Promise<Array<any> | null>} A promise that is resolved with all * information of the given named destination, or `null` when the named * destination is not present in the PDF file. */ getDestination(id) { return this._transport.getDestination(id); } /** * @returns {Promise<Array<string> | null>} A promise that is resolved with * an {Array} containing the page labels that correspond to the page * indexes, or `null` when no page labels are present in the PDF file. */ getPageLabels() { return this._transport.getPageLabels(); } /** * @returns {Promise<string>} A promise that is resolved with a {string} * containing the page layout name. */ getPageLayout() { return this._transport.getPageLayout(); } /** * @returns {Promise<string>} A promise that is resolved with a {string} * containing the page mode name. */ getPageMode() { return this._transport.getPageMode(); } /** * @returns {Promise<Object | null>} A promise that is resolved with an * {Object} containing the viewer preferences, or `null` when no viewer * preferences are present in the PDF file. */ getViewerPreferences() { return this._transport.getViewerPreferences(); } /** * @returns {Promise<any | null>} A promise that is resolved with an {Array} * containing the destination, or `null` when no open action is present * in the PDF. */ getOpenAction() { return this._transport.getOpenAction(); } /** * @returns {Promise<any>} A promise that is resolved with a lookup table * for mapping named attachments to their content. */ getAttachments() { return this._transport.getAttachments(); } /** * @returns {Promise<Array<string> | null>} A promise that is resolved with * an {Array} of all the JavaScript strings in the name tree, or `null` * if no JavaScript exists. */ getJavaScript() { return this._transport.getJavaScript(); } /** * @returns {Promise<Object | null>} A promise that is resolved with * an {Object} with the JavaScript actions: * - from the name tree (like getJavaScript); * - from A or AA entries in the catalog dictionary. * , or `null` if no JavaScript exists. */ getJSActions() { return this._transport.getDocJSActions(); } /** * @typedef {Object} OutlineNode * @property {string} title * @property {boolean} bold * @property {boolean} italic * @property {Uint8ClampedArray} color - The color in RGB format to use for * display purposes. * @property {string | Array<any> | null} dest * @property {string | null} url * @property {string | undefined} unsafeUrl * @property {boolean | undefined} newWindow * @property {number | undefined} count * @property {Array<OutlineNode>} items */ /** * @returns {Promise<Array<OutlineNode>>} A promise that is resolved with an * {Array} that is a tree outline (if it has one) of the PDF file. */ getOutline() { return this._transport.getOutline(); } /** * @returns {Promise<OptionalContentConfig>} A promise that is resolved with * an {@link OptionalContentConfig} that contains all the optional content * groups (assuming that the document has any). */ getOptionalContentConfig() { return this._transport.getOptionalContentConfig(); } /** * @returns {Promise<Array<number> | null>} A promise that is resolved with * an {Array} that contains the permission flags for the PDF document, or * `null` when no permissions are present in the PDF file. */ getPermissions() { return this._transport.getPermissions(); } /** * @returns {Promise<{ info: Object, metadata: Metadata }>} A promise that is * resolved with an {Object} that has `info` and `metadata` properties. * `info` is an {Object} filled with anything available in the information * dictionary and similarly `metadata` is a {Metadata} object with * information from the metadata section of the PDF. */ getMetadata() { return this._transport.getMetadata(); } /** * @typedef {Object} MarkInfo * Properties correspond to Table 321 of the PDF 32000-1:2008 spec. * @property {boolean} Marked * @property {boolean} UserProperties * @property {boolean} Suspects */ /** * @returns {Promise<MarkInfo | null>} A promise that is resolved with * a {MarkInfo} object that contains the MarkInfo flags for the PDF * document, or `null` when no MarkInfo values are present in the PDF file. */ getMarkInfo() { return this._transport.getMarkInfo(); } /** * @returns {Promise<Uint8Array>} A promise that is resolved with a * {Uint8Array} containing the raw data of the PDF document. */ getData() { return this._transport.getData(); } /** * @returns {Promise<Uint8Array>} A promise that is resolved with a * {Uint8Array} containing the full data of the saved document. */ saveDocument() { return this._transport.saveDocument(); } /** * @returns {Promise<{ length: number }>} A promise that is resolved when the * document's data is loaded. It is resolved with an {Object} that contains * the `length` property that indicates size of the PDF data in bytes. */ getDownloadInfo() { return this._transport.downloadInfoCapability.promise; } /** * Cleans up resources allocated by the document on both the main and worker * threads. * * NOTE: Do not, under any circumstances, call this method when rendering is * currently ongoing since that may lead to rendering errors. * * @param {boolean} [keepLoadedFonts] - Let fonts remain attached to the DOM. * NOTE: This will increase persistent memory usage, hence don't use this * option unless absolutely necessary. The default value is `false`. * @returns {Promise} A promise that is resolved when clean-up has finished. */ cleanup(keepLoadedFonts = false) { return this._transport.startCleanup(keepLoadedFonts || this.isPureXfa); } /** * Destroys the current document instance and terminates the worker. */ destroy() { return this.loadingTask.destroy(); } /** * @type {DocumentInitParameters} A subset of the current * {DocumentInitParameters}, which are needed in the viewer. */ get loadingParams() { return this._transport.loadingParams; } /** * @type {PDFDocumentLoadingTask} The loadingTask for the current document. */ get loadingTask() { return this._transport.loadingTask; } /** * @returns {Promise<Object<string, Array<Object>> | null>} A promise that is * resolved with an {Object} containing /AcroForm field data for the JS * sandbox, or `null` when no field data is present in the PDF file. */ getFieldObjects() { return this._transport.getFieldObjects(); } /** * @returns {Promise<boolean>} A promise that is resolved with `true` * if some /AcroForm fields have JavaScript actions. */ hasJSActions() { return this._transport.hasJSActions(); } /** * @returns {Promise<Array<string> | null>} A promise that is resolved with an * {Array<string>} containing IDs of annotations that have a calculation * action, or `null` when no such annotations are present in the PDF file. */ getCalculationOrderIds() { return this._transport.getCalculationOrderIds(); } } /** * Page getViewport parameters. * * @typedef {Object} GetViewportParameters * @property {number} scale - The desired scale of the viewport. * @property {number} [rotation] - The desired rotation, in degrees, of * the viewport. If omitted it defaults to the page rotation. * @property {number} [offsetX] - The horizontal, i.e. x-axis, offset. * The default value is `0`. * @property {number} [offsetY] - The vertical, i.e. y-axis, offset. * The default value is `0`. * @property {boolean} [dontFlip] - If true, the y-axis will not be * flipped. The default value is `false`. */ /** * Page getTextContent parameters. * * @typedef {Object} getTextContentParameters * @property {boolean} disableCombineTextItems - Do not attempt to combine * same line {@link TextItem}'s. The default value is `false`. * @property {boolean} [includeMarkedContent] - When true include marked * content items in the items array of TextContent. The default is `false`. */ /** * Page text content. * * @typedef {Object} TextContent * @property {Array<TextItem | TextMarkedContent>} items - Array of * {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent * items are included when includeMarkedContent is true. * @property {Object<string, TextStyle>} styles - {@link TextStyle} objects, * indexed by font name. */ /** * Page text content part. * * @typedef {Object} TextItem * @property {string} str - Text content. * @property {string} dir - Text direction: 'ttb', 'ltr' or 'rtl'. * @property {Array<any>} transform - Transformation matrix. * @property {number} width - Width in device space. * @property {number} height - Height in device space. * @property {string} fontName - Font name used by PDF.js for converted font. * @property {boolean} hasEOL - Indicating if the text content is followed by a * line-break. */ /** * Page text marked content part. * * @typedef {Object} TextMarkedContent * @property {string} type - Either 'beginMarkedContent', * 'beginMarkedContentProps', or 'endMarkedContent'. * @property {string} id - The marked content identifier. Only used for type * 'beginMarkedContentProps'. */ /** * Text style. * * @typedef {Object} TextStyle * @property {number} ascent - Font ascent. * @property {number} descent - Font descent. * @property {boolean} vertical - Whether or not the text is in vertical mode. * @property {string} fontFamily - The possible font family. */ /** * Page annotation parameters. * * @typedef {Object} GetAnnotationsParameters * @property {string} [intent] - Determines the annotations that are fetched, * can be 'display' (viewable annotations), 'print' (printable annotations), * or 'any' (all annotations). The default value is 'display'. */ /** * Page render parameters. * * @typedef {Object} RenderParameters * @property {Object} canvasContext - A 2D context of a DOM Canvas object. * @property {PageViewport} viewport - Rendering viewport obtained by calling * the `PDFPageProxy.getViewport` method. * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are rendered * onto the canvas, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {Array<any>} [transform] - Additional transform, applied just * before viewport transform. * @property {Object} [canvasFactory] - The factory instance that will be used * when creating canvases. The default value is {new DOMCanvasFactory()}. * @property {Object | string} [background] - Background to use for the canvas. * Any valid `canvas.fillStyle` can be used: a `DOMString` parsed as CSS * <color> value, a `CanvasGradient` object (a linear or radial gradient) or * a `CanvasPattern` object (a repetitive image). The default value is * 'rgb(255,255,255)'. * * NOTE: This option may be partially, or completely, ignored when the * `pageColors`-option is used. * @property {Object} [pageColors] - Overwrites background and foreground colors * with user defined ones in order to improve readability in high contrast * mode. * @property {Promise<OptionalContentConfig>} [optionalContentConfigPromise] - * A promise that should resolve with an {@link OptionalContentConfig} * created from `PDFDocumentProxy.getOptionalContentConfig`. If `null`, * the configuration will be fetched automatically with the default visibility * states set. * @property {Map<string, HTMLCanvasElement>} [annotationCanvasMap] - Map some * annotation ids with canvases used to render them. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Page getOperatorList parameters. * * @typedef {Object} GetOperatorListParameters * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are included * in the operatorList, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Structure tree node. The root node will have a role "Root". * * @typedef {Object} StructTreeNode * @property {Array<StructTreeNode | StructTreeContent>} children - Array of * {@link StructTreeNode} and {@link StructTreeContent} objects. * @property {string} role - element's role, already mapped if a role map exists * in the PDF. */ /** * Structure tree content. * * @typedef {Object} StructTreeContent * @property {string} type - either "content" for page and stream structure * elements or "object" for object references. * @property {string} id - unique id that will map to the text layer. */ /** * PDF page operator list. * * @typedef {Object} PDFOperatorList * @property {Array<number>} fnArray - Array containing the operator functions. * @property {Array<any>} argsArray - Array containing the arguments of the * functions. */ /** * Proxy to a `PDFPage` in the worker thread. */ class PDFPageProxy { #delayedCleanupTimeout = null; #pendingCleanup = false; constructor(pageIndex, pageInfo, transport, pdfBug = false) { this._pageIndex = pageIndex; this._pageInfo = pageInfo; this._transport = transport; this._stats = pdfBug ? new StatTimer() : null; this._pdfBug = pdfBug; /** @type {PDFObjects} */ this.commonObjs = transport.commonObjs; this.objs = new PDFObjects(); this._maybeCleanupAfterRender = false; this._intentStates = new Map(); this.destroyed = false; } /** * @type {number} Page number of the page. First page is 1. */ get pageNumber() { return this._pageIndex + 1; } /** * @type {number} The number of degrees the page is rotated clockwise. */ get rotate() { return this._pageInfo.rotate; } /** * @type {RefProxy | null} The reference that points to this page. */ get ref() { return this._pageInfo.ref; } /** * @type {number} The default size of units in 1/72nds of an inch. */ get userUnit() { return this._pageInfo.userUnit; } /** * @type {Array<number>} An array of the visible portion of the PDF page in * user space units [x1, y1, x2, y2]. */ get view() { return this._pageInfo.view; } /** * @param {GetViewportParameters} params - Viewport parameters. * @returns {PageViewport} Contains 'width' and 'height' properties * along with transforms required for rendering. */ getViewport({ scale, rotation = this.rotate, offsetX = 0, offsetY = 0, dontFlip = false, } = {}) { return new PageViewport({ viewBox: this.view, scale, rotation, offsetX, offsetY, dontFlip, }); } /** * @param {GetAnnotationsParameters} params - Annotation parameters. * @returns {Promise<Array<any>>} A promise that is resolved with an * {Array} of the annotation objects. */ getAnnotations({ intent = "display" } = {}) { const intentArgs = this._transport.getRenderingIntent(intent); return this._transport.getAnnotations( this._pageIndex, intentArgs.renderingIntent ); } /** * @returns {Promise<Object>} A promise that is resolved with an * {Object} with JS actions. */ getJSActions() { return this._transport.getPageJSActions(this._pageIndex); } /** * @type {boolean} True if only XFA form. */ get isPureXfa() { return shadow(this, "isPureXfa", !!this._transport._htmlForXfa); } /** * @returns {Promise<Object | null>} A promise that is resolved with * an {Object} with a fake DOM object (a tree structure where elements * are {Object} with a name, attributes (class, style, ...), value and * children, very similar to a HTML DOM tree), or `null` if no XFA exists. */ async getXfa() { return this._transport._htmlForXfa?.children[this._pageIndex] || null; } /** * Begins the process of rendering a page to the desired context. * * @param {RenderParameters} params - Page render parameters. * @returns {RenderTask} An object that contains a promise that is * resolved when the page finishes rendering. */ render({ canvasContext, viewport, intent = "display", annotationMode = AnnotationMode.ENABLE, transform = null, canvasFactory = null, background = null, optionalContentConfigPromise = null, annotationCanvasMap = null, pageColors = null, printAnnotationStorage = null, }) { if ( (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && canvasFactory ) { deprecated( "render no longer accepts the `canvasFactory`-option, " + "please pass it to the `getDocument`-function instead." ); } this._stats?.time("Overall"); const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage ); // If there was a pending destroy, cancel it so no cleanup happens during // this call to render... this.#pendingCleanup = false; // ... and ensure that a delayed cleanup is always aborted. this.#abortDelayedCleanup(); if (!optionalContentConfigPromise) { optionalContentConfigPromise = this._transport.getOptionalContentConfig(); } let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } // Ensure that a pending `streamReader` cancel timeout is always aborted. if (intentState.streamReaderCancelTimeout) { clearTimeout(intentState.streamReaderCancelTimeout); intentState.streamReaderCancelTimeout = null; } const intentPrint = !!( intentArgs.renderingIntent & RenderingIntentFlag.PRINT ); // If there's no displayReadyCapability yet, then the operatorList // was never requested before. Make the request and create the promise. if (!intentState.displayReadyCapability) { intentState.displayReadyCapability = createPromiseCapability(); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; this._stats?.time("Page Request"); this._pumpOperatorList(intentArgs); } const complete = error => { intentState.renderTasks.delete(internalRenderTask); // Attempt to reduce memory usage during *printing*, by always running // cleanup immediately once rendering has finished. if (this._maybeCleanupAfterRender || intentPrint) { this.#pendingCleanup = true; } this.#tryCleanup(/* delayed = */ !intentPrint); if (error) { internalRenderTask.capability.reject(error); this._abortOperatorList({ intentState, reason: error instanceof Error ? error : new Error(error), }); } else { internalRenderTask.capability.resolve(); } this._stats?.timeEnd("Rendering"); this._stats?.timeEnd("Overall"); }; const internalRenderTask = new InternalRenderTask({ callback: complete, // Only include the required properties, and *not* the entire object. params: { canvasContext, viewport, transform, background, }, objs: this.objs, commonObjs: this.commonObjs, annotationCanvasMap, operatorList: intentState.operatorList, pageIndex: this._pageIndex, canvasFactory: canvasFactory || this._transport.canvasFactory, filterFactory: this._transport.filterFactory, useRequestAnimationFrame: !intentPrint, pdfBug: this._pdfBug, pageColors, }); (intentState.renderTasks ||= new Set()).add(internalRenderTask); const renderTask = internalRenderTask.task; Promise.all([ intentState.displayReadyCapability.promise, optionalContentConfigPromise, ]) .then( ([ { transparency, isOffscreenCanvasSupported }, optionalContentConfig, ]) => { if (this.#pendingCleanup) { complete(); return; } this._stats?.time("Rendering"); internalRenderTask.initializeGraphics({ transparency, isOffscreenCanvasSupported, optionalContentConfig, }); internalRenderTask.operatorListChanged(); } ) .catch(complete); return renderTask; } /** * @param {GetOperatorListParameters} params - Page getOperatorList * parameters. * @returns {Promise<PDFOperatorList>} A promise resolved with an * {@link PDFOperatorList} object that represents the page's operator list. */ getOperatorList({ intent = "display", annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, } = {}) { function operatorListChanged() { if (intentState.operatorList.lastChunk) { intentState.opListReadCapability.resolve(intentState.operatorList); intentState.renderTasks.delete(opListTask); } } const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage, /* isOpList = */ true ); let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } let opListTask; if (!intentState.opListReadCapability) { opListTask = Object.create(null); opListTask.operatorListChanged = operatorListChanged; intentState.opListReadCapability = createPromiseCapability(); (intentState.renderTasks ||= new Set()).add(opListTask); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; this._stats?.time("Page Request"); this._pumpOperatorList(intentArgs); } return intentState.opListReadCapability.promise; } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {ReadableStream} Stream for reading text content chunks. */ streamTextContent({ disableCombineTextItems = false, includeMarkedContent = false, } = {}) { const TEXT_CONTENT_CHUNK_SIZE = 100; return this._transport.messageHandler.sendWithStream( "GetTextContent", { pageIndex: this._pageIndex, combineTextItems: disableCombineTextItems !== true, includeMarkedContent: includeMarkedContent === true, }, { highWaterMark: TEXT_CONTENT_CHUNK_SIZE, size(textContent) { return textContent.items.length; }, } ); } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {Promise<TextContent>} A promise that is resolved with a * {@link TextContent} object that represents the page's text content. */ getTextContent(params = {}) { if (this._transport._htmlForXfa) { // TODO: We need to revisit this once the XFA foreground patch lands and // only do this for non-foreground XFA. return this.getXfa().then(xfa => { return XfaText.textContent(xfa); }); } const readableStream = this.streamTextContent(params); return new Promise(function (resolve, reject) { function pump() { reader.read().then(function ({ value, done }) { if (done) { resolve(textContent); return; } Object.assign(textContent.styles, value.styles); textContent.items.push(...value.items); pump(); }, reject); } const reader = readableStream.getReader(); const textContent = { items: [], styles: Object.create(null), }; pump(); }); } /** * @returns {Promise<StructTreeNode>} A promise that is resolved with a * {@link StructTreeNode} object that represents the page's structure tree, * or `null` when no structure tree is present for the current page. */ getStructTree() { return this._transport.getStructTree(this._pageIndex); } /** * Destroys the page object. * @private */ _destroy() { this.destroyed = true; const waitOn = []; for (const intentState of this._intentStates.values()) { this._abortOperatorList({ intentState, reason: new Error("Page was destroyed."), force: true, }); if (intentState.opListReadCapability) { // Avoid errors below, since the renderTasks are just stubs. continue; } for (const internalRenderTask of intentState.renderTasks) { waitOn.push(internalRenderTask.completed); internalRenderTask.cancel(); } } this.objs.clear(); this.#pendingCleanup = false; this.#abortDelayedCleanup(); return Promise.all(waitOn); } /** * Cleans up resources allocated by the page. * * @param {boolean} [resetStats] - Reset page stats, if enabled. * The default value is `false`. * @returns {boolean} Indicates if clean-up was successfully run. */ cleanup(resetStats = false) { this.#pendingCleanup = true; const success = this.#tryCleanup(/* delayed = */ false); if (resetStats && success) { this._stats &&= new StatTimer(); } return success; } /** * Attempts to clean up if rendering is in a state where that's possible. * @param {boolean} [delayed] - Delay the cleanup, to e.g. improve zooming * performance in documents with large images. * The default value is `false`. * @returns {boolean} Indicates if clean-up was successfully run. */ #tryCleanup(delayed = false) { this.#abortDelayedCleanup(); if (!this.#pendingCleanup) { return false; } if (delayed) { this.#delayedCleanupTimeout = setTimeout(() => { this.#delayedCleanupTimeout = null; this.#tryCleanup(/* delayed = */ false); }, DELAYED_CLEANUP_TIMEOUT); return false; } for (const { renderTasks, operatorList } of this._intentStates.values()) { if (renderTasks.size > 0 || !operatorList.lastChunk) { return false; } } this._intentStates.clear(); this.objs.clear(); this.#pendingCleanup = false; return true; } #abortDelayedCleanup() { if (this.#delayedCleanupTimeout) { clearTimeout(this.#delayedCleanupTimeout); this.#delayedCleanupTimeout = null; } } /** * @private */ _startRenderPage(transparency, isOffscreenCanvasSupported, cacheKey) { const intentState = this._intentStates.get(cacheKey); if (!intentState) { return; // Rendering was cancelled. } this._stats?.timeEnd("Page Request"); // TODO Refactor RenderPageRequest to separate rendering // and operator list logic intentState.displayReadyCapability?.resolve({ transparency, isOffscreenCanvasSupported, }); } /** * @private */ _renderPageChunk(operatorListChunk, intentState) { // Add the new chunk to the current operator list. for (let i = 0, ii = operatorListChunk.length; i < ii; i++) { intentState.operatorList.fnArray.push(operatorListChunk.fnArray[i]); intentState.operatorList.argsArray.push(operatorListChunk.argsArray[i]); } intentState.operatorList.lastChunk = operatorListChunk.lastChunk; intentState.operatorList.separateAnnots = operatorListChunk.separateAnnots; // Notify all the rendering tasks there are more operators to be consumed. for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } if (operatorListChunk.lastChunk) { this.#tryCleanup(/* delayed = */ true); } } /** * @private */ _pumpOperatorList({ renderingIntent, cacheKey, annotationStorageMap }) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") ) { assert( Number.isInteger(renderingIntent) && renderingIntent > 0, '_pumpOperatorList: Expected valid "renderingIntent" argument.' ); } const readableStream = this._transport.messageHandler.sendWithStream( "GetOperatorList", { pageIndex: this._pageIndex, intent: renderingIntent, cacheKey, annotationStorage: annotationStorageMap, } ); const reader = readableStream.getReader(); const intentState = this._intentStates.get(cacheKey); intentState.streamReader = reader; const pump = () => { reader.read().then( ({ value, done }) => { if (done) { intentState.streamReader = null; return; } if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } this._renderPageChunk(value, intentState); pump(); }, reason => { intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } if (intentState.operatorList) { // Mark operator list as complete. intentState.operatorList.lastChunk = true; for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } this.#tryCleanup(/* delayed = */ true); } if (intentState.displayReadyCapability) { intentState.displayReadyCapability.reject(reason); } else if (intentState.opListReadCapability) { intentState.opListReadCapability.reject(reason); } else { throw reason; } } ); }; pump(); } /** * @private */ _abortOperatorList({ intentState, reason, force = false }) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") ) { assert( reason instanceof Error, '_abortOperatorList: Expected valid "reason" argument.' ); } if (!intentState.streamReader) { return; } // Ensure that a pending `streamReader` cancel timeout is always aborted. if (intentState.streamReaderCancelTimeout) { clearTimeout(intentState.streamReaderCancelTimeout); intentState.streamReaderCancelTimeout = null; } if (!force) { // Ensure that an Error occurring in *only* one `InternalRenderTask`, e.g. // multiple render() calls on the same canvas, won't break all rendering. if (intentState.renderTasks.size > 0) { return; } // Don't immediately abort parsing on the worker-thread when rendering is // cancelled, since that will unnecessarily delay re-rendering when (for // partially parsed pages) e.g. zooming/rotation occurs in the viewer. if (reason instanceof RenderingCancelledException) { let delay = RENDERING_CANCELLED_TIMEOUT; if (reason.extraDelay > 0 && reason.extraDelay < /* ms = */ 1000) { // Above, we prevent the total delay from becoming arbitrarily large. delay += reason.extraDelay; } intentState.streamReaderCancelTimeout = setTimeout(() => { intentState.streamReaderCancelTimeout = null; this._abortOperatorList({ intentState, reason, force: true }); }, delay); return; } } intentState.streamReader .cancel(new AbortException(reason.message)) .catch(() => { // Avoid "Uncaught promise" messages in the console. }); intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } // Remove the current `intentState`, since a cancelled `getOperatorList` // call on the worker-thread cannot be re-started... for (const [curCacheKey, curIntentState] of this._intentStates) { if (curIntentState === intentState) { this._intentStates.delete(curCacheKey); break; } } // ... and force clean-up to ensure that any old state is always removed. this.cleanup(); } /** * @type {Object} Returns page stats, if enabled; returns `null` otherwise. */ get stats() { return this._stats; } } class LoopbackPort { #listeners = new Set(); #deferred = Promise.resolve(); postMessage(obj, transfers) { const event = { data: structuredClone(obj, transfers), }; this.#deferred.then(() => { for (const listener of this.#listeners) { listener.call(this, event); } }); } addEventListener(name, listener) { this.#listeners.add(listener); } removeEventListener(name, listener) { this.#listeners.delete(listener); } terminate() { this.#listeners.clear(); } } /** * @typedef {Object} PDFWorkerParameters * @property {string} [name] - The name of the worker. * @property {Object} [port] - The `workerPort` object. * @property {number} [verbosity] - Controls the logging level; * the constants from {@link VerbosityLevel} should be used. */ const PDFWorkerUtil = { isWorkerDisabled: false, fallbackWorkerSrc: null, fakeWorkerId: 0, }; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { // eslint-disable-next-line no-undef if (isNodeJS && typeof __non_webpack_require__ === "function") { // Workers aren't supported in Node.js, force-disabling them there. PDFWorkerUtil.isWorkerDisabled = true; PDFWorkerUtil.fallbackWorkerSrc = PDFJSDev.test("LIB") ? "../pdf.worker.js" : "./pdf.worker.js"; } else if (typeof document === "object") { const pdfjsFilePath = document?.currentScript?.src; if (pdfjsFilePath) { PDFWorkerUtil.fallbackWorkerSrc = pdfjsFilePath.replace( /(\.(?:min\.)?js)(\?.*)?$/i, ".worker$1$2" ); } } // Check if URLs have the same origin. For non-HTTP based URLs, returns false. PDFWorkerUtil.isSameOrigin = function (baseUrl, otherUrl) { let base; try { base = new URL(baseUrl); if (!base.origin || base.origin === "null") { return false; // non-HTTP url } } catch (e) { return false; } const other = new URL(otherUrl, base); return base.origin === other.origin; }; PDFWorkerUtil.createCDNWrapper = function (url) { // We will rely on blob URL's property to specify origin. // We want this function to fail in case if createObjectURL or Blob do not // exist or fail for some reason -- our Worker creation will fail anyway. const wrapper = `importScripts("${url}");`; return URL.createObjectURL(new Blob([wrapper])); }; } /** * PDF.js web worker abstraction that controls the instantiation of PDF * documents. Message handlers are used to pass information from the main * thread to the worker thread and vice versa. If the creation of a web * worker is not possible, a "fake" worker will be used instead. * * @param {PDFWorkerParameters} params - The worker initialization parameters. */ class PDFWorker { static #workerPorts = new WeakMap(); constructor({ name = null, port = null, verbosity = getVerbosityLevel(), } = {}) { if (port && PDFWorker.#workerPorts.has(port)) { throw new Error("Cannot use more than one PDFWorker per port."); } this.name = name; this.destroyed = false; this.verbosity = verbosity; this._readyCapability = createPromiseCapability(); this._port = null; this._webWorker = null; this._messageHandler = null; if (port) { PDFWorker.#workerPorts.set(port, this); this._initializeFromPort(port); return; } this._initialize(); } /** * Promise for worker initialization completion. * @type {Promise<void>} */ get promise() { return this._readyCapability.promise; } /** * The current `workerPort`, when it exists. * @type {Worker} */ get port() { return this._port; } /** * The current MessageHandler-instance. * @type {MessageHandler} */ get messageHandler() { return this._messageHandler; } _initializeFromPort(port) { this._port = port; this._messageHandler = new MessageHandler("main", "worker", port); this._messageHandler.on("ready", function () { // Ignoring "ready" event -- MessageHandler should already be initialized // and ready to accept messages. }); this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. this._messageHandler.send("configure", { verbosity: this.verbosity, }); } _initialize() { // If worker support isn't disabled explicit and the browser has worker // support, create a new web worker and test if it/the browser fulfills // all requirements to run parts of pdf.js in a web worker. // Right now, the requirement is, that an Uint8Array is still an // Uint8Array as it arrives on the worker. (Chrome added this with v.15.) if ( !PDFWorkerUtil.isWorkerDisabled && !PDFWorker._mainThreadWorkerMessageHandler ) { let { workerSrc } = PDFWorker; try { // Wraps workerSrc path into blob URL, if the former does not belong // to the same origin. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && !PDFWorkerUtil.isSameOrigin(window.location.href, workerSrc) ) { workerSrc = PDFWorkerUtil.createCDNWrapper( new URL(workerSrc, window.location).href ); } // Some versions of FF can't create a worker on localhost, see: // https://bugzilla.mozilla.org/show_bug.cgi?id=683280 const worker = (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) && !workerSrc.endsWith("/build/pdf.worker.js") && !workerSrc.endsWith("/src/worker_loader.js") ? new Worker(workerSrc, { type: "module" }) : new Worker(workerSrc); const messageHandler = new MessageHandler("main", "worker", worker); const terminateEarly = () => { worker.removeEventListener("error", onWorkerError); messageHandler.destroy(); worker.terminate(); if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); } else { // Fall back to fake worker if the termination is caused by an // error (e.g. NetworkError / SecurityError). this._setupFakeWorker(); } }; const onWorkerError = () => { if (!this._webWorker) { // Worker failed to initialize due to an error. Clean up and fall // back to the fake worker. terminateEarly(); } }; worker.addEventListener("error", onWorkerError); messageHandler.on("test", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } if (data) { this._messageHandler = messageHandler; this._port = worker; this._webWorker = worker; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); } else { this._setupFakeWorker(); messageHandler.destroy(); worker.terminate(); } }); messageHandler.on("ready", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } try { sendTest(); } catch (e) { // We need fallback to a faked worker. this._setupFakeWorker(); } }); const sendTest = () => { const testObj = new Uint8Array(); // Ensure that we can use `postMessage` transfers. messageHandler.send("test", testObj, [testObj.buffer]); }; // It might take time for the worker to initialize. We will try to send // the "test" message immediately, and once the "ready" message arrives. // The worker shall process only the first received "test" message. sendTest(); return; } catch (e) { info("The worker has been disabled."); } } // Either workers are disabled, not supported or have thrown an exception. // Thus, we fallback to a faked worker. this._setupFakeWorker(); } _setupFakeWorker() { if (!PDFWorkerUtil.isWorkerDisabled) { warn("Setting up fake worker."); PDFWorkerUtil.isWorkerDisabled = true; } PDFWorker._setupFakeWorkerGlobal .then(WorkerMessageHandler => { if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); return; } const port = new LoopbackPort(); this._port = port; // All fake workers use the same port, making id unique. const id = `fake${PDFWorkerUtil.fakeWorkerId++}`; // If the main thread is our worker, setup the handling for the // messages -- the main thread sends to it self. const workerHandler = new MessageHandler(id + "_worker", id, port); WorkerMessageHandler.setup(workerHandler, port); const messageHandler = new MessageHandler(id, id + "_worker", port); this._messageHandler = messageHandler; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); }) .catch(reason => { this._readyCapability.reject( new Error(`Setting up fake worker failed: "${reason.message}".`) ); }); } /** * Destroys the worker instance. */ destroy() { this.destroyed = true; if (this._webWorker) { // We need to terminate only web worker created resource. this._webWorker.terminate(); this._webWorker = null; } PDFWorker.#workerPorts.delete(this._port); this._port = null; if (this._messageHandler) { this._messageHandler.destroy(); this._messageHandler = null; } } /** * @param {PDFWorkerParameters} params - The worker initialization parameters. */ static fromPort(params) { if (!params?.port) { throw new Error("PDFWorker.fromPort - invalid method signature."); } if (this.#workerPorts.has(params.port)) { return this.#workerPorts.get(params.port); } return new PDFWorker(params); } /** * The current `workerSrc`, when it exists. * @type {string} */ static get workerSrc() { if (GlobalWorkerOptions.workerSrc) { return GlobalWorkerOptions.workerSrc; } if ( (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && PDFWorkerUtil.fallbackWorkerSrc !== null ) { if (!isNodeJS) { deprecated('No "GlobalWorkerOptions.workerSrc" specified.'); } return PDFWorkerUtil.fallbackWorkerSrc; } throw new Error('No "GlobalWorkerOptions.workerSrc" specified.'); } static get _mainThreadWorkerMessageHandler() { try { return globalThis.pdfjsWorker?.WorkerMessageHandler || null; } catch (ex) { return null; } } // Loads worker code into the main-thread. static get _setupFakeWorkerGlobal() { const loader = async () => { const mainWorkerMessageHandler = this._mainThreadWorkerMessageHandler; if (mainWorkerMessageHandler) { // The worker was already loaded using e.g. a `<script>` tag. return mainWorkerMessageHandler; } if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) { const worker = await import("pdfjs/pdf.worker.js"); return worker.WorkerMessageHandler; } if ( PDFJSDev.test("GENERIC") && isNodeJS && // eslint-disable-next-line no-undef typeof __non_webpack_require__ === "function" ) { // Since bundlers, such as Webpack, cannot be told to leave `require` // statements alone we are thus forced to jump through hoops in order // to prevent `Critical dependency: ...` warnings in third-party // deployments of the built `pdf.js`/`pdf.worker.js` files; see // https://github.com/webpack/webpack/issues/8826 // // The following hack is based on the assumption that code running in // Node.js won't ever be affected by e.g. Content Security Policies that // prevent the use of `eval`. If that ever occurs, we should revert this // to a normal `__non_webpack_require__` statement and simply document // the Webpack warnings instead (telling users to ignore them). // // eslint-disable-next-line no-eval const worker = eval("require")(this.workerSrc); return worker.WorkerMessageHandler; } await loadScript(this.workerSrc); return window.pdfjsWorker.WorkerMessageHandler; }; return shadow(this, "_setupFakeWorkerGlobal", loader()); } } /** * For internal use only. * @ignore */ class WorkerTransport { #methodPromises = new Map(); #pageCache = new Map(); #pagePromises = new Map(); constructor(messageHandler, loadingTask, networkStream, params, factory) { this.messageHandler = messageHandler; this.loadingTask = loadingTask; this.commonObjs = new PDFObjects(); this.fontLoader = new FontLoader({ ownerDocument: params.ownerDocument, styleElement: params.styleElement, }); this._params = params; this.canvasFactory = factory.canvasFactory; this.filterFactory = factory.filterFactory; this.cMapReaderFactory = factory.cMapReaderFactory; this.standardFontDataFactory = factory.standardFontDataFactory; this.destroyed = false; this.destroyCapability = null; this._passwordCapability = null; this._networkStream = networkStream; this._fullReader = null; this._lastProgress = null; this.downloadInfoCapability = createPromiseCapability(); this.setupMessageHandler(); if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { // For testing purposes. Object.defineProperty(this, "getXFADatasets", { value: () => { return this.messageHandler.sendWithPromise("GetXFADatasets", null); }, }); } } #cacheSimpleMethod(name, data = null) { const cachedPromise = this.#methodPromises.get(name); if (cachedPromise) { return cachedPromise; } const promise = this.messageHandler.sendWithPromise(name, data); this.#methodPromises.set(name, promise); return promise; } get annotationStorage() { return shadow(this, "annotationStorage", new AnnotationStorage()); } getRenderingIntent( intent, annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, isOpList = false ) { let renderingIntent = RenderingIntentFlag.DISPLAY; // Default value. let annotationMap = null; switch (intent) { case "any": renderingIntent = RenderingIntentFlag.ANY; break; case "display": break; case "print": renderingIntent = RenderingIntentFlag.PRINT; break; default: warn(`getRenderingIntent - invalid intent: ${intent}`); } switch (annotationMode) { case AnnotationMode.DISABLE: renderingIntent += RenderingIntentFlag.ANNOTATIONS_DISABLE; break; case AnnotationMode.ENABLE: break; case AnnotationMode.ENABLE_FORMS: renderingIntent += RenderingIntentFlag.ANNOTATIONS_FORMS; break; case AnnotationMode.ENABLE_STORAGE: renderingIntent += RenderingIntentFlag.ANNOTATIONS_STORAGE; const annotationStorage = renderingIntent & RenderingIntentFlag.PRINT && printAnnotationStorage instanceof PrintAnnotationStorage ? printAnnotationStorage : this.annotationStorage; annotationMap = annotationStorage.serializable; break; default: warn(`getRenderingIntent - invalid annotationMode: ${annotationMode}`); } if (isOpList) { renderingIntent += RenderingIntentFlag.OPLIST; } return { renderingIntent, cacheKey: `${renderingIntent}_${AnnotationStorage.getHash( annotationMap )}`, annotationStorageMap: annotationMap, }; } destroy() { if (this.destroyCapability) { return this.destroyCapability.promise; } this.destroyed = true; this.destroyCapability = createPromiseCapability(); if (this._passwordCapability) { this._passwordCapability.reject( new Error("Worker was destroyed during onPassword callback") ); } const waitOn = []; // We need to wait for all renderings to be completed, e.g. // timeout/rAF can take a long time. for (const page of this.#pageCache.values()) { waitOn.push(page._destroy()); } this.#pageCache.clear(); this.#pagePromises.clear(); // Allow `AnnotationStorage`-related clean-up when destroying the document. if (this.hasOwnProperty("annotationStorage")) { this.annotationStorage.resetModified(); } // We also need to wait for the worker to finish its long running tasks. const terminated = this.messageHandler.sendWithPromise("Terminate", null); waitOn.push(terminated); Promise.all(waitOn).then(() => { this.commonObjs.clear(); this.fontLoader.clear(); this.#methodPromises.clear(); this.filterFactory.destroy(); if (this._networkStream) { this._networkStream.cancelAllRequests( new AbortException("Worker was terminated.") ); } if (this.messageHandler) { this.messageHandler.destroy(); this.messageHandler = null; } this.destroyCapability.resolve(); }, this.destroyCapability.reject); return this.destroyCapability.promise; } setupMessageHandler() { const { messageHandler, loadingTask } = this; messageHandler.on("GetReader", (data, sink) => { assert( this._networkStream, "GetReader - no `IPDFStream` instance available." ); this._fullReader = this._networkStream.getFullReader(); this._fullReader.onProgress = evt => { this._lastProgress = { loaded: evt.loaded, total: evt.total, }; }; sink.onPull = () => { this._fullReader .read() .then(function ({ value, done }) { if (done) { sink.close(); return; } assert( value instanceof ArrayBuffer, "GetReader - expected an ArrayBuffer." ); // Enqueue data chunk into sink, and transfer it // to other side as `Transferable` object. sink.enqueue(new Uint8Array(value), 1, [value]); }) .catch(reason => { sink.error(reason); }); }; sink.onCancel = reason => { this._fullReader.cancel(reason); sink.ready.catch(readyReason => { if (this.destroyed) { return; // Ignore any pending requests if the worker was terminated. } throw readyReason; }); }; }); messageHandler.on("ReaderHeadersReady", data => { const headersCapability = createPromiseCapability(); const fullReader = this._fullReader; fullReader.headersReady.then(() => { // If stream or range are disabled, it's our only way to report // loading progress. if (!fullReader.isStreamingSupported || !fullReader.isRangeSupported) { if (this._lastProgress) { loadingTask.onProgress?.(this._lastProgress); } fullReader.onProgress = evt => { loadingTask.onProgress?.({ loaded: evt.loaded, total: evt.total, }); }; } headersCapability.resolve({ isStreamingSupported: fullReader.isStreamingSupported, isRangeSupported: fullReader.isRangeSupported, contentLength: fullReader.contentLength, }); }, headersCapability.reject); return headersCapability.promise; }); messageHandler.on("GetRangeReader", (data, sink) => { assert( this._networkStream, "GetRangeReader - no `IPDFStream` instance available." ); const rangeReader = this._networkStream.getRangeReader( data.begin, data.end ); // When streaming is enabled, it's possible that the data requested here // has already been fetched via the `_fullRequestReader` implementation. // However, given that the PDF data is loaded asynchronously on the // main-thread and then sent via `postMessage` to the worker-thread, // it may not have been available during parsing (hence the attempt to // use range requests here). // // To avoid wasting time and resources here, we'll thus *not* dispatch // range requests if the data was already loaded but has not been sent to // the worker-thread yet (which will happen via the `_fullRequestReader`). if (!rangeReader) { sink.close(); return; } sink.onPull = () => { rangeReader .read() .then(function ({ value, done }) { if (done) { sink.close(); return; } assert( value instanceof ArrayBuffer, "GetRangeReader - expected an ArrayBuffer." ); sink.enqueue(new Uint8Array(value), 1, [value]); }) .catch(reason => { sink.error(reason); }); }; sink.onCancel = reason => { rangeReader.cancel(reason); sink.ready.catch(readyReason => { if (this.destroyed) { return; // Ignore any pending requests if the worker was terminated. } throw readyReason; }); }; }); messageHandler.on("GetDoc", ({ pdfInfo }) => { this._numPages = pdfInfo.numPages; this._htmlForXfa = pdfInfo.htmlForXfa; delete pdfInfo.htmlForXfa; loadingTask._capability.resolve(new PDFDocumentProxy(pdfInfo, this)); }); messageHandler.on("DocException", function (ex) { let reason; switch (ex.name) { case "PasswordException": reason = new PasswordException(ex.message, ex.code); break; case "InvalidPDFException": reason = new InvalidPDFException(ex.message); break; case "MissingPDFException": reason = new MissingPDFException(ex.message); break; case "UnexpectedResponseException": reason = new UnexpectedResponseException(ex.message, ex.status); break; case "UnknownErrorException": reason = new UnknownErrorException(ex.message, ex.details); break; default: unreachable("DocException - expected a valid Error."); } loadingTask._capability.reject(reason); }); messageHandler.on("PasswordRequest", exception => { this._passwordCapability = createPromiseCapability(); if (loadingTask.onPassword) { const updatePassword = password => { if (password instanceof Error) { this._passwordCapability.reject(password); } else { this._passwordCapability.resolve({ password }); } }; try { loadingTask.onPassword(updatePassword, exception.code); } catch (ex) { this._passwordCapability.reject(ex); } } else { this._passwordCapability.reject( new PasswordException(exception.message, exception.code) ); } return this._passwordCapability.promise; }); messageHandler.on("DataLoaded", data => { // For consistency: Ensure that progress is always reported when the // entire PDF file has been loaded, regardless of how it was fetched. loadingTask.onProgress?.({ loaded: data.length, total: data.length, }); this.downloadInfoCapability.resolve(data); }); messageHandler.on("StartRenderPage", data => { if (this.destroyed) { return; // Ignore any pending requests if the worker was terminated. } const page = this.#pageCache.get(data.pageIndex); page._startRenderPage( data.transparency, data.isOffscreenCanvasSupported, data.cacheKey ); }); messageHandler.on("commonobj", ([id, type, exportedData]) => { if (this.destroyed) { return; // Ignore any pending requests if the worker was terminated. } if (this.commonObjs.has(id)) { return; } switch (type) { case "Font": const params = this._params; if ("error" in exportedData) { const exportedError = exportedData.error; warn(`Error during font loading: ${exportedError}`); this.commonObjs.resolve(id, exportedError); break; } let fontRegistry = null; if (params.pdfBug && globalThis.FontInspector?.enabled) { fontRegistry = { registerFont(font, url) { globalThis.FontInspector.fontAdded(font, url); }, }; } const font = new FontFaceObject(exportedData, { isEvalSupported: params.isEvalSupported, disableFontFace: params.disableFontFace, ignoreErrors: params.ignoreErrors, fontRegistry, }); this.fontLoader .bind(font) .catch(reason => { return messageHandler.sendWithPromise("FontFallback", { id }); }) .finally(() => { if (!params.fontExtraProperties && font.data) { // Immediately release the `font.data` property once the font // has been attached to the DOM, since it's no longer needed, // rather than waiting for a `PDFDocumentProxy.cleanup` call. // Since `font.data` could be very large, e.g. in some cases // multiple megabytes, this will help reduce memory usage. font.data = null; } this.commonObjs.resolve(id, font); }); break; case "FontPath": case "Image": case "Pattern": this.commonObjs.resolve(id, exportedData); break; default: throw new Error(`Got unknown common object type ${type}`); } }); messageHandler.on("obj", ([id, pageIndex, type, imageData]) => { if (this.destroyed) { // Ignore any pending requests if the worker was terminated. return; } const pageProxy = this.#pageCache.get(pageIndex); if (pageProxy.objs.has(id)) { return; } switch (type) { case "Image": pageProxy.objs.resolve(id, imageData); // Heuristic that will allow us not to store large data. if (imageData) { let length; if (imageData.bitmap) { const { width, height } = imageData; length = width * height * 4; } else { length = imageData.data?.length || 0; } if (length > MAX_IMAGE_SIZE_TO_CACHE) { pageProxy._maybeCleanupAfterRender = true; } } break; case "Pattern": pageProxy.objs.resolve(id, imageData); break; default: throw new Error(`Got unknown object type ${type}`); } }); messageHandler.on("DocProgress", data => { if (this.destroyed) { return; // Ignore any pending requests if the worker was terminated. } loadingTask.onProgress?.({ loaded: data.loaded, total: data.total, }); }); messageHandler.on("FetchBuiltInCMap", data => { if (this.destroyed) { return Promise.reject(new Error("Worker was destroyed.")); } if (!this.cMapReaderFactory) { return Promise.reject( new Error( "CMapReaderFactory not initialized, see the `useWorkerFetch` parameter." ) ); } return this.cMapReaderFactory.fetch(data); }); messageHandler.on("FetchStandardFontData", data => { if (this.destroyed) { return Promise.reject(new Error("Worker was destroyed.")); } if (!this.standardFontDataFactory) { return Promise.reject( new Error( "StandardFontDataFactory not initialized, see the `useWorkerFetch` parameter." ) ); } return this.standardFontDataFactory.fetch(data); }); } getData() { return this.messageHandler.sendWithPromise("GetData", null); } saveDocument() { if (this.annotationStorage.size <= 0) { warn( "saveDocument called while `annotationStorage` is empty, " + "please use the getData-method instead." ); } return this.messageHandler .sendWithPromise("SaveDocument", { isPureXfa: !!this._htmlForXfa, numPages: this._numPages, annotationStorage: this.annotationStorage.serializable, filename: this._fullReader?.filename ?? null, }) .finally(() => { this.annotationStorage.resetModified(); }); } getPage(pageNumber) { if ( !Number.isInteger(pageNumber) || pageNumber <= 0 || pageNumber > this._numPages ) { return Promise.reject(new Error("Invalid page request.")); } const pageIndex = pageNumber - 1, cachedPromise = this.#pagePromises.get(pageIndex); if (cachedPromise) { return cachedPromise; } const promise = this.messageHandler .sendWithPromise("GetPage", { pageIndex, }) .then(pageInfo => { if (this.destroyed) { throw new Error("Transport destroyed"); } const page = new PDFPageProxy( pageIndex, pageInfo, this, this._params.pdfBug ); this.#pageCache.set(pageIndex, page); return page; }); this.#pagePromises.set(pageIndex, promise); return promise; } getPageIndex(ref) { if ( typeof ref !== "object" || ref === null || !Number.isInteger(ref.num) || ref.num < 0 || !Number.isInteger(ref.gen) || ref.gen < 0 ) { return Promise.reject(new Error("Invalid pageIndex request.")); } return this.messageHandler.sendWithPromise("GetPageIndex", { num: ref.num, gen: ref.gen, }); } getAnnotations(pageIndex, intent) { return this.messageHandler.sendWithPromise("GetAnnotations", { pageIndex, intent, }); } getFieldObjects() { return this.#cacheSimpleMethod("GetFieldObjects"); } hasJSActions() { return this.#cacheSimpleMethod("HasJSActions"); } getCalculationOrderIds() { return this.messageHandler.sendWithPromise("GetCalculationOrderIds", null); } getDestinations() { return this.messageHandler.sendWithPromise("GetDestinations", null); } getDestination(id) { if (typeof id !== "string") { return Promise.reject(new Error("Invalid destination request.")); } return this.messageHandler.sendWithPromise("GetDestination", { id, }); } getPageLabels() { return this.messageHandler.sendWithPromise("GetPageLabels", null); } getPageLayout() { return this.messageHandler.sendWithPromise("GetPageLayout", null); } getPageMode() { return this.messageHandler.sendWithPromise("GetPageMode", null); } getViewerPreferences() { return this.messageHandler.sendWithPromise("GetViewerPreferences", null); } getOpenAction() { return this.messageHandler.sendWithPromise("GetOpenAction", null); } getAttachments() { return this.messageHandler.sendWithPromise("GetAttachments", null); } getJavaScript() { return this.messageHandler.sendWithPromise("GetJavaScript", null); } getDocJSActions() { return this.messageHandler.sendWithPromise("GetDocJSActions", null); } getPageJSActions(pageIndex) { return this.messageHandler.sendWithPromise("GetPageJSActions", { pageIndex, }); } getStructTree(pageIndex) { return this.messageHandler.sendWithPromise("GetStructTree", { pageIndex, }); } getOutline() { return this.messageHandler.sendWithPromise("GetOutline", null); } getOptionalContentConfig() { return this.messageHandler .sendWithPromise("GetOptionalContentConfig", null) .then(results => { return new OptionalContentConfig(results); }); } getPermissions() { return this.messageHandler.sendWithPromise("GetPermissions", null); } getMetadata() { const name = "GetMetadata", cachedPromise = this.#methodPromises.get(name); if (cachedPromise) { return cachedPromise; } const promise = this.messageHandler .sendWithPromise(name, null) .then(results => { return { info: results[0], metadata: results[1] ? new Metadata(results[1]) : null, contentDispositionFilename: this._fullReader?.filename ?? null, contentLength: this._fullReader?.contentLength ?? null, }; }); this.#methodPromises.set(name, promise); return promise; } getMarkInfo() { return this.messageHandler.sendWithPromise("GetMarkInfo", null); } async startCleanup(keepLoadedFonts = false) { if (this.destroyed) { return; // No need to manually clean-up when destruction has started. } await this.messageHandler.sendWithPromise("Cleanup", null); for (const page of this.#pageCache.values()) { const cleanupSuccessful = page.cleanup(); if (!cleanupSuccessful) { throw new Error( `startCleanup: Page ${page.pageNumber} is currently rendering.` ); } } this.commonObjs.clear(); if (!keepLoadedFonts) { this.fontLoader.clear(); } this.#methodPromises.clear(); this.filterFactory.destroy(); } get loadingParams() { const { disableAutoFetch, enableXfa } = this._params; return shadow(this, "loadingParams", { disableAutoFetch, enableXfa, }); } } /** * A PDF document and page is built of many objects. E.g. there are objects for * fonts, images, rendering code, etc. These objects may get processed inside of * a worker. This class implements some basic methods to manage these objects. */ class PDFObjects { #objs = Object.create(null); /** * Ensures there is an object defined for `objId`. * * @param {string} objId * @returns {Object} */ #ensureObj(objId) { const obj = this.#objs[objId]; if (obj) { return obj; } return (this.#objs[objId] = { capability: createPromiseCapability(), data: null, }); } /** * If called *without* callback, this returns the data of `objId` but the * object needs to be resolved. If it isn't, this method throws. * * If called *with* a callback, the callback is called with the data of the * object once the object is resolved. That means, if you call this method * and the object is already resolved, the callback gets called right away. * * @param {string} objId * @param {function} [callback] * @returns {any} */ get(objId, callback = null) { // If there is a callback, then the get can be async and the object is // not required to be resolved right now. if (callback) { const obj = this.#ensureObj(objId); obj.capability.promise.then(() => callback(obj.data)); return null; } // If there isn't a callback, the user expects to get the resolved data // directly. const obj = this.#objs[objId]; // If there isn't an object yet or the object isn't resolved, then the // data isn't ready yet! if (!obj?.capability.settled) { throw new Error(`Requesting object that isn't resolved yet ${objId}.`); } return obj.data; } /** * @param {string} objId * @returns {boolean} */ has(objId) { const obj = this.#objs[objId]; return obj?.capability.settled || false; } /** * Resolves the object `objId` with optional `data`. * * @param {string} objId * @param {any} [data] */ resolve(objId, data = null) { const obj = this.#ensureObj(objId); obj.data = data; obj.capability.resolve(); } clear() { for (const objId in this.#objs) { const { data } = this.#objs[objId]; data?.bitmap?.close(); // Release any `ImageBitmap` data. } this.#objs = Object.create(null); } } /** * Allows controlling of the rendering tasks. */ class RenderTask { #internalRenderTask = null; constructor(internalRenderTask) { this.#internalRenderTask = internalRenderTask; /** * Callback for incremental rendering -- a function that will be called * each time the rendering is paused. To continue rendering call the * function that is the first argument to the callback. * @type {function} */ this.onContinue = null; } /** * Promise for rendering task completion. * @type {Promise<void>} */ get promise() { return this.#internalRenderTask.capability.promise; } /** * Cancels the rendering task. If the task is currently rendering it will * not be cancelled until graphics pauses with a timeout. The promise that * this object extends will be rejected when cancelled. * * @param {number} [extraDelay] */ cancel(extraDelay = 0) { this.#internalRenderTask.cancel(/* error = */ null, extraDelay); } /** * Whether form fields are rendered separately from the main operatorList. * @type {boolean} */ get separateAnnots() { const { separateAnnots } = this.#internalRenderTask.operatorList; if (!separateAnnots) { return false; } const { annotationCanvasMap } = this.#internalRenderTask; return ( separateAnnots.form || (separateAnnots.canvas && annotationCanvasMap?.size > 0) ); } } /** * For internal use only. * @ignore */ class InternalRenderTask { static #canvasInUse = new WeakSet(); constructor({ callback, params, objs, commonObjs, annotationCanvasMap, operatorList, pageIndex, canvasFactory, filterFactory, useRequestAnimationFrame = false, pdfBug = false, pageColors = null, }) { this.callback = callback; this.params = params; this.objs = objs; this.commonObjs = commonObjs; this.annotationCanvasMap = annotationCanvasMap; this.operatorListIdx = null; this.operatorList = operatorList; this._pageIndex = pageIndex; this.canvasFactory = canvasFactory; this.filterFactory = filterFactory; this._pdfBug = pdfBug; this.pageColors = pageColors; this.running = false; this.graphicsReadyCallback = null; this.graphicsReady = false; this._useRequestAnimationFrame = useRequestAnimationFrame === true && typeof window !== "undefined"; this.cancelled = false; this.capability = createPromiseCapability(); this.task = new RenderTask(this); // caching this-bound methods this._cancelBound = this.cancel.bind(this); this._continueBound = this._continue.bind(this); this._scheduleNextBound = this._scheduleNext.bind(this); this._nextBound = this._next.bind(this); this._canvas = params.canvasContext.canvas; } get completed() { return this.capability.promise.catch(function () { // Ignoring errors, since we only want to know when rendering is // no longer pending. }); } initializeGraphics({ transparency = false, isOffscreenCanvasSupported = false, optionalContentConfig, }) { if (this.cancelled) { return; } if (this._canvas) { if (InternalRenderTask.#canvasInUse.has(this._canvas)) { throw new Error( "Cannot use the same canvas during multiple render() operations. " + "Use different canvas or ensure previous operations were " + "cancelled or completed." ); } InternalRenderTask.#canvasInUse.add(this._canvas); } if (this._pdfBug && globalThis.StepperManager?.enabled) { this.stepper = globalThis.StepperManager.create(this._pageIndex); this.stepper.init(this.operatorList); this.stepper.nextBreakPoint = this.stepper.getNextBreakPoint(); } const { canvasContext, viewport, transform, background } = this.params; this.gfx = new CanvasGraphics( canvasContext, this.commonObjs, this.objs, this.canvasFactory, this.filterFactory, { optionalContentConfig }, this.annotationCanvasMap, this.pageColors ); this.gfx.beginDrawing({ transform, viewport, transparency, background, }); this.operatorListIdx = 0; this.graphicsReady = true; this.graphicsReadyCallback?.(); } cancel(error = null, extraDelay = 0) { this.running = false; this.cancelled = true; this.gfx?.endDrawing(); if (this._canvas) { InternalRenderTask.#canvasInUse.delete(this._canvas); } this.callback( error || new RenderingCancelledException( `Rendering cancelled, page ${this._pageIndex + 1}`, "canvas", extraDelay ) ); } operatorListChanged() { if (!this.graphicsReady) { if (!this.graphicsReadyCallback) { this.graphicsReadyCallback = this._continueBound; } return; } this.stepper?.updateOperatorList(this.operatorList); if (this.running) { return; } this._continue(); } _continue() { this.running = true; if (this.cancelled) { return; } if (this.task.onContinue) { this.task.onContinue(this._scheduleNextBound); } else { this._scheduleNext(); } } _scheduleNext() { if (this._useRequestAnimationFrame) { window.requestAnimationFrame(() => { this._nextBound().catch(this._cancelBound); }); } else { Promise.resolve().then(this._nextBound).catch(this._cancelBound); } } async _next() { if (this.cancelled) { return; } this.operatorListIdx = this.gfx.executeOperatorList( this.operatorList, this.operatorListIdx, this._continueBound, this.stepper ); if (this.operatorListIdx === this.operatorList.argsArray.length) { this.running = false; if (this.operatorList.lastChunk) { this.gfx.endDrawing(); if (this._canvas) { InternalRenderTask.#canvasInUse.delete(this._canvas); } this.callback(); } } } } /** @type {string} */ const version = typeof PDFJSDev !== "undefined" ? PDFJSDev.eval("BUNDLE_VERSION") : null; /** @type {string} */ const build = typeof PDFJSDev !== "undefined" ? PDFJSDev.eval("BUNDLE_BUILD") : null; export { build, DefaultCanvasFactory, DefaultCMapReaderFactory, DefaultFilterFactory, DefaultStandardFontDataFactory, getDocument, LoopbackPort, PDFDataRangeTransport, PDFDocumentLoadingTask, PDFDocumentProxy, PDFPageProxy, PDFWorker, PDFWorkerUtil, RenderTask, version, };