/* Copyright 2012 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @module pdfjsLib */ import { AbortException, AnnotationMode, assert, getVerbosityLevel, info, InvalidPDFException, isArrayBuffer, MAX_IMAGE_SIZE_TO_CACHE, MissingPDFException, PasswordException, PromiseCapability, RenderingIntentFlag, setVerbosityLevel, shadow, stringToBytes, UnexpectedResponseException, UnknownErrorException, unreachable, warn, } from "../shared/util.js"; import { AnnotationStorage, PrintAnnotationStorage, } from "./annotation_storage.js"; import { deprecated, DOMCanvasFactory, DOMCMapReaderFactory, DOMFilterFactory, DOMStandardFontDataFactory, isDataScheme, isValidFetchUrl, loadScript, PageViewport, RenderingCancelledException, StatTimer, } from "./display_utils.js"; import { FontFaceObject, FontLoader } from "./font_loader.js"; import { CanvasGraphics } from "./canvas.js"; import { GlobalWorkerOptions } from "./worker_options.js"; import { isNodeJS } from "../shared/is_node.js"; import { MessageHandler } from "../shared/message_handler.js"; import { Metadata } from "./metadata.js"; import { OptionalContentConfig } from "./optional_content_config.js"; import { PDFDataTransportStream } from "./transport_stream.js"; import { XfaText } from "./xfa_text.js"; const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536 const RENDERING_CANCELLED_TIMEOUT = 100; // ms const DELAYED_CLEANUP_TIMEOUT = 5000; // ms let DefaultCanvasFactory = DOMCanvasFactory; let DefaultCMapReaderFactory = DOMCMapReaderFactory; let DefaultFilterFactory = DOMFilterFactory; let DefaultStandardFontDataFactory = DOMStandardFontDataFactory; if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS) { const { NodeCanvasFactory, NodeCMapReaderFactory, NodeFilterFactory, NodeStandardFontDataFactory, } = require("./node_utils.js"); DefaultCanvasFactory = NodeCanvasFactory; DefaultCMapReaderFactory = NodeCMapReaderFactory; DefaultFilterFactory = NodeFilterFactory; DefaultStandardFontDataFactory = NodeStandardFontDataFactory; } let createPDFNetworkStream; if (typeof PDFJSDev === "undefined") { const streamsPromise = Promise.all([ import("./network.js"), import("./fetch_stream.js"), ]); createPDFNetworkStream = async params => { const [{ PDFNetworkStream }, { PDFFetchStream }] = await streamsPromise; return isValidFetchUrl(params.url) ? new PDFFetchStream(params) : new PDFNetworkStream(params); }; } else if (PDFJSDev.test("GENERIC || CHROME")) { if (PDFJSDev.test("GENERIC") && isNodeJS) { const { PDFNodeStream } = require("./node_stream.js"); createPDFNetworkStream = params => { return new PDFNodeStream(params); }; } else { const { PDFNetworkStream } = require("./network.js"); const { PDFFetchStream } = require("./fetch_stream.js"); createPDFNetworkStream = params => { return isValidFetchUrl(params.url) ? new PDFFetchStream(params) : new PDFNetworkStream(params); }; } } /** * @typedef { Int8Array | Uint8Array | Uint8ClampedArray | * Int16Array | Uint16Array | * Int32Array | Uint32Array | Float32Array | * Float64Array * } TypedArray */ /** * @typedef { TypedArray | ArrayBuffer | Array | string } BinaryData */ /** * @typedef {Object} RefProxy * @property {number} num * @property {number} gen */ /** * Document initialization / loading parameters object. * * @typedef {Object} DocumentInitParameters * @property {string | URL} [url] - The URL of the PDF. * @property {BinaryData} [data] - Binary PDF data. * Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is * BASE64-encoded, use `atob()` to convert it to a binary string first. * * NOTE: If TypedArrays are used they will generally be transferred to the * worker-thread. This will help reduce main-thread memory usage, however * it will take ownership of the TypedArrays. * @property {Object} [httpHeaders] - Basic authentication headers. * @property {boolean} [withCredentials] - Indicates whether or not * cross-site Access-Control requests should be made using credentials such * as cookies or authorization headers. The default is `false`. * @property {string} [password] - For decrypting password-protected PDFs. * @property {number} [length] - The PDF file length. It's used for progress * reports and range requests operations. * @property {PDFDataRangeTransport} [range] - Allows for using a custom range * transport implementation. * @property {number} [rangeChunkSize] - Specify maximum number of bytes fetched * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE}. * @property {PDFWorker} [worker] - The worker that will be used for loading and * parsing the PDF data. * @property {number} [verbosity] - Controls the logging level; the constants * from {@link VerbosityLevel} should be used. * @property {string} [docBaseUrl] - The base URL of the document, used when * attempting to recover valid absolute URLs for annotations, and outline * items, that (incorrectly) only specify relative URLs. * @property {string} [cMapUrl] - The URL where the predefined Adobe CMaps are * located. Include the trailing slash. * @property {boolean} [cMapPacked] - Specifies if the Adobe CMaps are binary * packed or not. The default value is `true`. * @property {Object} [CMapReaderFactory] - The factory that will be used when * reading built-in CMap files. Providing a custom factory is useful for * environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMCMapReaderFactory}. * @property {boolean} [useSystemFonts] - When `true`, fonts that aren't * embedded in the PDF document will fallback to a system font. * The default value is `true` in web environments and `false` in Node.js; * unless `disableFontFace === true` in which case this defaults to `false` * regardless of the environment (to prevent completely broken fonts). * @property {string} [standardFontDataUrl] - The URL where the standard font * files are located. Include the trailing slash. * @property {Object} [StandardFontDataFactory] - The factory that will be used * when reading the standard font files. Providing a custom factory is useful * for environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMStandardFontDataFactory}. * @property {boolean} [useWorkerFetch] - Enable using the Fetch API in the * worker-thread when reading CMap and standard font files. When `true`, * the `CMapReaderFactory` and `StandardFontDataFactory` options are ignored. * The default value is `true` in web environments and `false` in Node.js. * @property {boolean} [stopAtErrors] - Reject certain promises, e.g. * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated * PDF data cannot be successfully parsed, instead of attempting to recover * whatever possible of the data. The default value is `false`. * @property {number} [maxImageSize] - The maximum allowed image size in total * pixels, i.e. width * height. Images above this value will not be rendered. * Use -1 for no limit, which is also the default value. * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings * as JavaScript. Primarily used to improve performance of font rendering, and * when parsing PDF functions. The default value is `true`. * @property {boolean} [isOffscreenCanvasSupported] - Determines if we can use * `OffscreenCanvas` in the worker. Primarily used to improve performance of * image conversion/rendering. * The default value is `true` in web environments and `false` in Node.js. * @property {boolean} [canvasMaxAreaInBytes] - The integer value is used to * know when an image must be resized (uses `OffscreenCanvas` in the worker). * If it's -1 then a possibly slow algorithm is used to guess the max value. * @property {boolean} [disableFontFace] - By default fonts are converted to * OpenType fonts and loaded via the Font Loading API or `@font-face` rules. * If disabled, fonts will be rendered using a built-in font renderer that * constructs the glyphs with primitive path commands. * The default value is `false` in web environments and `true` in Node.js. * @property {boolean} [fontExtraProperties] - Include additional properties, * which are unused during rendering of PDF documents, when exporting the * parsed font data from the worker-thread. This may be useful for debugging * purposes (and backwards compatibility), but note that it will lead to * increased memory usage. The default value is `false`. * @property {boolean} [enableXfa] - Render Xfa forms if any. * The default value is `false`. * @property {HTMLDocument} [ownerDocument] - Specify an explicit document * context to create elements with and to load resources, such as fonts, * into. Defaults to the current document. * @property {boolean} [disableRange] - Disable range request loading of PDF * files. When enabled, and if the server supports partial content requests, * then the PDF will be fetched in chunks. The default value is `false`. * @property {boolean} [disableStream] - Disable streaming of PDF file data. * By default PDF.js attempts to load PDF files in chunks. The default value * is `false`. * @property {boolean} [disableAutoFetch] - Disable pre-fetching of PDF file * data. When range requests are enabled PDF.js will automatically keep * fetching more data even if it isn't needed to display the current page. * The default value is `false`. * * NOTE: It is also necessary to disable streaming, see above, in order for * disabling of pre-fetching to work correctly. * @property {boolean} [pdfBug] - Enables special hooks for debugging PDF.js * (see `web/debugger.js`). The default value is `false`. * @property {Object} [canvasFactory] - The factory instance that will be used * when creating canvases. The default value is {new DOMCanvasFactory()}. * @property {Object} [filterFactory] - A factory instance that will be used * to create SVG filters when rendering some images on the main canvas. */ /** * This is the main entry point for loading a PDF and interacting with it. * * NOTE: If a URL is used to fetch the PDF data a standard Fetch API call (or * XHR as fallback) is used, which means it must follow same origin rules, * e.g. no cross-domain requests without CORS. * * @param {string | URL | TypedArray | ArrayBuffer | DocumentInitParameters} * src - Can be a URL where a PDF file is located, a typed array (Uint8Array) * already populated with data, or a parameter object. * @returns {PDFDocumentLoadingTask} */ function getDocument(src) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { if (typeof src === "string" || src instanceof URL) { src = { url: src }; } else if (isArrayBuffer(src)) { src = { data: src }; } } if (typeof src !== "object") { throw new Error("Invalid parameter in getDocument, need parameter object."); } if (!src.url && !src.data && !src.range) { throw new Error( "Invalid parameter object: need either .data, .range or .url" ); } const task = new PDFDocumentLoadingTask(); const { docId } = task; const url = src.url ? getUrlProp(src.url) : null; const data = src.data ? getDataProp(src.data) : null; const httpHeaders = src.httpHeaders || null; const withCredentials = src.withCredentials === true; const password = src.password ?? null; const rangeTransport = src.range instanceof PDFDataRangeTransport ? src.range : null; const rangeChunkSize = Number.isInteger(src.rangeChunkSize) && src.rangeChunkSize > 0 ? src.rangeChunkSize : DEFAULT_RANGE_CHUNK_SIZE; let worker = src.worker instanceof PDFWorker ? src.worker : null; const verbosity = src.verbosity; // Ignore "data:"-URLs, since they can't be used to recover valid absolute // URLs anyway. We want to avoid sending them to the worker-thread, since // they contain the *entire* PDF document and can thus be arbitrarily long. const docBaseUrl = typeof src.docBaseUrl === "string" && !isDataScheme(src.docBaseUrl) ? src.docBaseUrl : null; const cMapUrl = typeof src.cMapUrl === "string" ? src.cMapUrl : null; const cMapPacked = src.cMapPacked !== false; const CMapReaderFactory = src.CMapReaderFactory || DefaultCMapReaderFactory; const standardFontDataUrl = typeof src.standardFontDataUrl === "string" ? src.standardFontDataUrl : null; const StandardFontDataFactory = src.StandardFontDataFactory || DefaultStandardFontDataFactory; const ignoreErrors = src.stopAtErrors !== true; const maxImageSize = Number.isInteger(src.maxImageSize) && src.maxImageSize > -1 ? src.maxImageSize : -1; const isEvalSupported = src.isEvalSupported !== false; const isOffscreenCanvasSupported = typeof src.isOffscreenCanvasSupported === "boolean" ? src.isOffscreenCanvasSupported : !isNodeJS; const canvasMaxAreaInBytes = Number.isInteger(src.canvasMaxAreaInBytes) ? src.canvasMaxAreaInBytes : -1; const disableFontFace = typeof src.disableFontFace === "boolean" ? src.disableFontFace : isNodeJS; const fontExtraProperties = src.fontExtraProperties === true; const enableXfa = src.enableXfa === true; const ownerDocument = src.ownerDocument || globalThis.document; const disableRange = src.disableRange === true; const disableStream = src.disableStream === true; const disableAutoFetch = src.disableAutoFetch === true; const pdfBug = src.pdfBug === true; // Parameters whose default values depend on other parameters. const length = rangeTransport ? rangeTransport.length : src.length ?? NaN; const useSystemFonts = typeof src.useSystemFonts === "boolean" ? src.useSystemFonts : !isNodeJS && !disableFontFace; const useWorkerFetch = typeof src.useWorkerFetch === "boolean" ? src.useWorkerFetch : (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) || (CMapReaderFactory === DOMCMapReaderFactory && StandardFontDataFactory === DOMStandardFontDataFactory && isValidFetchUrl(cMapUrl, document.baseURI) && isValidFetchUrl(standardFontDataUrl, document.baseURI)); const canvasFactory = src.canvasFactory || new DefaultCanvasFactory({ ownerDocument }); const filterFactory = src.filterFactory || new DefaultFilterFactory({ docId, ownerDocument }); // Parameters only intended for development/testing purposes. const styleElement = typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING") ? src.styleElement : null; // Set the main-thread verbosity level. setVerbosityLevel(verbosity); // Ensure that the various factories can be initialized, when necessary, // since the user may provide *custom* ones. const transportFactory = { canvasFactory, filterFactory, }; if (!useWorkerFetch) { transportFactory.cMapReaderFactory = new CMapReaderFactory({ baseUrl: cMapUrl, isCompressed: cMapPacked, }); transportFactory.standardFontDataFactory = new StandardFontDataFactory({ baseUrl: standardFontDataUrl, }); } if (!worker) { const workerParams = { verbosity, port: GlobalWorkerOptions.workerPort, }; // Worker was not provided -- creating and owning our own. If message port // is specified in global worker options, using it. worker = workerParams.port ? PDFWorker.fromPort(workerParams) : new PDFWorker(workerParams); task._worker = worker; } const fetchDocParams = { docId, apiVersion: typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING") ? PDFJSDev.eval("BUNDLE_VERSION") : null, data, password, disableAutoFetch, rangeChunkSize, length, docBaseUrl, enableXfa, evaluatorOptions: { maxImageSize, disableFontFace, ignoreErrors, isEvalSupported, isOffscreenCanvasSupported, canvasMaxAreaInBytes, fontExtraProperties, useSystemFonts, cMapUrl: useWorkerFetch ? cMapUrl : null, standardFontDataUrl: useWorkerFetch ? standardFontDataUrl : null, }, }; const transportParams = { ignoreErrors, isEvalSupported, disableFontFace, fontExtraProperties, enableXfa, ownerDocument, disableAutoFetch, pdfBug, styleElement, }; worker.promise .then(function () { if (task.destroyed) { throw new Error("Loading aborted"); } const workerIdPromise = _fetchDocument(worker, fetchDocParams); const networkStreamPromise = new Promise(function (resolve) { let networkStream; if (rangeTransport) { networkStream = new PDFDataTransportStream( { length, initialData: rangeTransport.initialData, progressiveDone: rangeTransport.progressiveDone, contentDispositionFilename: rangeTransport.contentDispositionFilename, disableRange, disableStream, }, rangeTransport ); } else if (!data) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { throw new Error("Not implemented: createPDFNetworkStream"); } networkStream = createPDFNetworkStream({ url, length, httpHeaders, withCredentials, rangeChunkSize, disableRange, disableStream, }); } resolve(networkStream); }); return Promise.all([workerIdPromise, networkStreamPromise]).then( function ([workerId, networkStream]) { if (task.destroyed) { throw new Error("Loading aborted"); } const messageHandler = new MessageHandler( docId, workerId, worker.port ); const transport = new WorkerTransport( messageHandler, task, networkStream, transportParams, transportFactory ); task._transport = transport; messageHandler.send("Ready", null); } ); }) .catch(task._capability.reject); return task; } /** * Starts fetching of specified PDF document/data. * * @param {PDFWorker} worker * @param {Object} source * @returns {Promise} A promise that is resolved when the worker ID of * the `MessageHandler` is known. * @private */ async function _fetchDocument(worker, source) { if (worker.destroyed) { throw new Error("Worker was destroyed"); } const workerId = await worker.messageHandler.sendWithPromise( "GetDocRequest", source, source.data ? [source.data.buffer] : null ); if (worker.destroyed) { throw new Error("Worker was destroyed"); } return workerId; } function getUrlProp(val) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { return null; // The 'url' is unused with `PDFDataRangeTransport`. } if (val instanceof URL) { return val.href; } try { // The full path is required in the 'url' field. return new URL(val, window.location).href; } catch (ex) { if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS && typeof val === "string" ) { return val; // Use the url as-is in Node.js environments. } } throw new Error( "Invalid PDF url data: " + "either string or URL-object is expected in the url property." ); } function getDataProp(val) { // Converting string or array-like data to Uint8Array. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && isNodeJS && typeof Buffer !== "undefined" && // eslint-disable-line no-undef val instanceof Buffer // eslint-disable-line no-undef ) { deprecated( "Please provide binary data as `Uint8Array`, rather than `Buffer`." ); return new Uint8Array(val); } if (val instanceof Uint8Array && val.byteLength === val.buffer.byteLength) { // Use the data as-is when it's already a Uint8Array that completely // "utilizes" its underlying ArrayBuffer, to prevent any possible // issues when transferring it to the worker-thread. return val; } if (typeof val === "string") { return stringToBytes(val); } if ((typeof val === "object" && !isNaN(val?.length)) || isArrayBuffer(val)) { return new Uint8Array(val); } throw new Error( "Invalid PDF binary data: either TypedArray, " + "string, or array-like object is expected in the data property." ); } /** * @typedef {Object} OnProgressParameters * @property {number} loaded - Currently loaded number of bytes. * @property {number} total - Total number of bytes in the PDF file. */ /** * The loading task controls the operations required to load a PDF document * (such as network requests) and provides a way to listen for completion, * after which individual pages can be rendered. */ class PDFDocumentLoadingTask { static #docId = 0; constructor() { this._capability = new PromiseCapability(); this._transport = null; this._worker = null; /** * Unique identifier for the document loading task. * @type {string} */ this.docId = `d${PDFDocumentLoadingTask.#docId++}`; /** * Whether the loading task is destroyed or not. * @type {boolean} */ this.destroyed = false; /** * Callback to request a password if a wrong or no password was provided. * The callback receives two parameters: a function that should be called * with the new password, and a reason (see {@link PasswordResponses}). * @type {function} */ this.onPassword = null; /** * Callback to be able to monitor the loading progress of the PDF file * (necessary to implement e.g. a loading bar). * The callback receives an {@link OnProgressParameters} argument. * @type {function} */ this.onProgress = null; } /** * Promise for document loading task completion. * @type {Promise} */ get promise() { return this._capability.promise; } /** * Abort all network requests and destroy the worker. * @returns {Promise} A promise that is resolved when destruction is * completed. */ async destroy() { this.destroyed = true; await this._transport?.destroy(); this._transport = null; if (this._worker) { this._worker.destroy(); this._worker = null; } } } /** * Abstract class to support range requests file loading. * * NOTE: The TypedArrays passed to the constructor and relevant methods below * will generally be transferred to the worker-thread. This will help reduce * main-thread memory usage, however it will take ownership of the TypedArrays. */ class PDFDataRangeTransport { /** * @param {number} length * @param {Uint8Array|null} initialData * @param {boolean} [progressiveDone] * @param {string} [contentDispositionFilename] */ constructor( length, initialData, progressiveDone = false, contentDispositionFilename = null ) { this.length = length; this.initialData = initialData; this.progressiveDone = progressiveDone; this.contentDispositionFilename = contentDispositionFilename; this._rangeListeners = []; this._progressListeners = []; this._progressiveReadListeners = []; this._progressiveDoneListeners = []; this._readyCapability = new PromiseCapability(); } /** * @param {function} listener */ addRangeListener(listener) { this._rangeListeners.push(listener); } /** * @param {function} listener */ addProgressListener(listener) { this._progressListeners.push(listener); } /** * @param {function} listener */ addProgressiveReadListener(listener) { this._progressiveReadListeners.push(listener); } /** * @param {function} listener */ addProgressiveDoneListener(listener) { this._progressiveDoneListeners.push(listener); } /** * @param {number} begin * @param {Uint8Array|null} chunk */ onDataRange(begin, chunk) { for (const listener of this._rangeListeners) { listener(begin, chunk); } } /** * @param {number} loaded * @param {number|undefined} total */ onDataProgress(loaded, total) { this._readyCapability.promise.then(() => { for (const listener of this._progressListeners) { listener(loaded, total); } }); } /** * @param {Uint8Array|null} chunk */ onDataProgressiveRead(chunk) { this._readyCapability.promise.then(() => { for (const listener of this._progressiveReadListeners) { listener(chunk); } }); } onDataProgressiveDone() { this._readyCapability.promise.then(() => { for (const listener of this._progressiveDoneListeners) { listener(); } }); } transportReady() { this._readyCapability.resolve(); } /** * @param {number} begin * @param {number} end */ requestDataRange(begin, end) { unreachable("Abstract method PDFDataRangeTransport.requestDataRange"); } abort() {} } /** * Proxy to a `PDFDocument` in the worker thread. */ class PDFDocumentProxy { constructor(pdfInfo, transport) { this._pdfInfo = pdfInfo; this._transport = transport; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { // For testing purposes. Object.defineProperty(this, "getXFADatasets", { value: () => { return this._transport.getXFADatasets(); }, }); Object.defineProperty(this, "getXRefPrevValue", { value: () => { return this._transport.getXRefPrevValue(); }, }); } } /** * @type {AnnotationStorage} Storage for annotation data in forms. */ get annotationStorage() { return this._transport.annotationStorage; } /** * @type {Object} The filter factory instance. */ get filterFactory() { return this._transport.filterFactory; } /** * @type {number} Total number of pages in the PDF file. */ get numPages() { return this._pdfInfo.numPages; } /** * @type {Array} A (not guaranteed to be) unique ID to * identify the PDF document. * NOTE: The first element will always be defined for all PDF documents, * whereas the second element is only defined for *modified* PDF documents. */ get fingerprints() { return this._pdfInfo.fingerprints; } /** * @type {boolean} True if only XFA form. */ get isPureXfa() { return shadow(this, "isPureXfa", !!this._transport._htmlForXfa); } /** * NOTE: This is (mostly) intended to support printing of XFA forms. * * @type {Object | null} An object representing a HTML tree structure * to render the XFA, or `null` when no XFA form exists. */ get allXfaHtml() { return this._transport._htmlForXfa; } /** * @param {number} pageNumber - The page number to get. The first page is 1. * @returns {Promise} A promise that is resolved with * a {@link PDFPageProxy} object. */ getPage(pageNumber) { return this._transport.getPage(pageNumber); } /** * @param {RefProxy} ref - The page reference. * @returns {Promise} A promise that is resolved with the page index, * starting from zero, that is associated with the reference. */ getPageIndex(ref) { return this._transport.getPageIndex(ref); } /** * @returns {Promise>>} A promise that is resolved * with a mapping from named destinations to references. * * This can be slow for large documents. Use `getDestination` instead. */ getDestinations() { return this._transport.getDestinations(); } /** * @param {string} id - The named destination to get. * @returns {Promise | null>} A promise that is resolved with all * information of the given named destination, or `null` when the named * destination is not present in the PDF file. */ getDestination(id) { return this._transport.getDestination(id); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} containing the page labels that correspond to the page * indexes, or `null` when no page labels are present in the PDF file. */ getPageLabels() { return this._transport.getPageLabels(); } /** * @returns {Promise} A promise that is resolved with a {string} * containing the page layout name. */ getPageLayout() { return this._transport.getPageLayout(); } /** * @returns {Promise} A promise that is resolved with a {string} * containing the page mode name. */ getPageMode() { return this._transport.getPageMode(); } /** * @returns {Promise} A promise that is resolved with an * {Object} containing the viewer preferences, or `null` when no viewer * preferences are present in the PDF file. */ getViewerPreferences() { return this._transport.getViewerPreferences(); } /** * @returns {Promise} A promise that is resolved with an {Array} * containing the destination, or `null` when no open action is present * in the PDF. */ getOpenAction() { return this._transport.getOpenAction(); } /** * @returns {Promise} A promise that is resolved with a lookup table * for mapping named attachments to their content. */ getAttachments() { return this._transport.getAttachments(); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} of all the JavaScript strings in the name tree, or `null` * if no JavaScript exists. */ getJavaScript() { return this._transport.getJavaScript(); } /** * @returns {Promise} A promise that is resolved with * an {Object} with the JavaScript actions: * - from the name tree (like getJavaScript); * - from A or AA entries in the catalog dictionary. * , or `null` if no JavaScript exists. */ getJSActions() { return this._transport.getDocJSActions(); } /** * @typedef {Object} OutlineNode * @property {string} title * @property {boolean} bold * @property {boolean} italic * @property {Uint8ClampedArray} color - The color in RGB format to use for * display purposes. * @property {string | Array | null} dest * @property {string | null} url * @property {string | undefined} unsafeUrl * @property {boolean | undefined} newWindow * @property {number | undefined} count * @property {Array} items */ /** * @returns {Promise>} A promise that is resolved with an * {Array} that is a tree outline (if it has one) of the PDF file. */ getOutline() { return this._transport.getOutline(); } /** * @returns {Promise} A promise that is resolved with * an {@link OptionalContentConfig} that contains all the optional content * groups (assuming that the document has any). */ getOptionalContentConfig() { return this._transport.getOptionalContentConfig(); } /** * @returns {Promise | null>} A promise that is resolved with * an {Array} that contains the permission flags for the PDF document, or * `null` when no permissions are present in the PDF file. */ getPermissions() { return this._transport.getPermissions(); } /** * @returns {Promise<{ info: Object, metadata: Metadata }>} A promise that is * resolved with an {Object} that has `info` and `metadata` properties. * `info` is an {Object} filled with anything available in the information * dictionary and similarly `metadata` is a {Metadata} object with * information from the metadata section of the PDF. */ getMetadata() { return this._transport.getMetadata(); } /** * @typedef {Object} MarkInfo * Properties correspond to Table 321 of the PDF 32000-1:2008 spec. * @property {boolean} Marked * @property {boolean} UserProperties * @property {boolean} Suspects */ /** * @returns {Promise} A promise that is resolved with * a {MarkInfo} object that contains the MarkInfo flags for the PDF * document, or `null` when no MarkInfo values are present in the PDF file. */ getMarkInfo() { return this._transport.getMarkInfo(); } /** * @returns {Promise} A promise that is resolved with a * {Uint8Array} containing the raw data of the PDF document. */ getData() { return this._transport.getData(); } /** * @returns {Promise} A promise that is resolved with a * {Uint8Array} containing the full data of the saved document. */ saveDocument() { return this._transport.saveDocument(); } /** * @returns {Promise<{ length: number }>} A promise that is resolved when the * document's data is loaded. It is resolved with an {Object} that contains * the `length` property that indicates size of the PDF data in bytes. */ getDownloadInfo() { return this._transport.downloadInfoCapability.promise; } /** * Cleans up resources allocated by the document on both the main and worker * threads. * * NOTE: Do not, under any circumstances, call this method when rendering is * currently ongoing since that may lead to rendering errors. * * @param {boolean} [keepLoadedFonts] - Let fonts remain attached to the DOM. * NOTE: This will increase persistent memory usage, hence don't use this * option unless absolutely necessary. The default value is `false`. * @returns {Promise} A promise that is resolved when clean-up has finished. */ cleanup(keepLoadedFonts = false) { return this._transport.startCleanup(keepLoadedFonts || this.isPureXfa); } /** * Destroys the current document instance and terminates the worker. */ destroy() { return this.loadingTask.destroy(); } /** * @type {DocumentInitParameters} A subset of the current * {DocumentInitParameters}, which are needed in the viewer. */ get loadingParams() { return this._transport.loadingParams; } /** * @type {PDFDocumentLoadingTask} The loadingTask for the current document. */ get loadingTask() { return this._transport.loadingTask; } /** * @returns {Promise> | null>} A promise that is * resolved with an {Object} containing /AcroForm field data for the JS * sandbox, or `null` when no field data is present in the PDF file. */ getFieldObjects() { return this._transport.getFieldObjects(); } /** * @returns {Promise} A promise that is resolved with `true` * if some /AcroForm fields have JavaScript actions. */ hasJSActions() { return this._transport.hasJSActions(); } /** * @returns {Promise | null>} A promise that is resolved with an * {Array} containing IDs of annotations that have a calculation * action, or `null` when no such annotations are present in the PDF file. */ getCalculationOrderIds() { return this._transport.getCalculationOrderIds(); } } /** * Page getViewport parameters. * * @typedef {Object} GetViewportParameters * @property {number} scale - The desired scale of the viewport. * @property {number} [rotation] - The desired rotation, in degrees, of * the viewport. If omitted it defaults to the page rotation. * @property {number} [offsetX] - The horizontal, i.e. x-axis, offset. * The default value is `0`. * @property {number} [offsetY] - The vertical, i.e. y-axis, offset. * The default value is `0`. * @property {boolean} [dontFlip] - If true, the y-axis will not be * flipped. The default value is `false`. */ /** * Page getTextContent parameters. * * @typedef {Object} getTextContentParameters * @property {boolean} [includeMarkedContent] - When true include marked * content items in the items array of TextContent. The default is `false`. * @property {boolean} [disableNormalization] - When true the text is *not* * normalized in the worker-thread. The default is `false`. */ /** * Page text content. * * @typedef {Object} TextContent * @property {Array} items - Array of * {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent * items are included when includeMarkedContent is true. * @property {Object} styles - {@link TextStyle} objects, * indexed by font name. */ /** * Page text content part. * * @typedef {Object} TextItem * @property {string} str - Text content. * @property {string} dir - Text direction: 'ttb', 'ltr' or 'rtl'. * @property {Array} transform - Transformation matrix. * @property {number} width - Width in device space. * @property {number} height - Height in device space. * @property {string} fontName - Font name used by PDF.js for converted font. * @property {boolean} hasEOL - Indicating if the text content is followed by a * line-break. */ /** * Page text marked content part. * * @typedef {Object} TextMarkedContent * @property {string} type - Either 'beginMarkedContent', * 'beginMarkedContentProps', or 'endMarkedContent'. * @property {string} id - The marked content identifier. Only used for type * 'beginMarkedContentProps'. */ /** * Text style. * * @typedef {Object} TextStyle * @property {number} ascent - Font ascent. * @property {number} descent - Font descent. * @property {boolean} vertical - Whether or not the text is in vertical mode. * @property {string} fontFamily - The possible font family. */ /** * Page annotation parameters. * * @typedef {Object} GetAnnotationsParameters * @property {string} [intent] - Determines the annotations that are fetched, * can be 'display' (viewable annotations), 'print' (printable annotations), * or 'any' (all annotations). The default value is 'display'. */ /** * Page render parameters. * * @typedef {Object} RenderParameters * @property {CanvasRenderingContext2D} canvasContext - A 2D context of a DOM * Canvas object. * @property {PageViewport} viewport - Rendering viewport obtained by calling * the `PDFPageProxy.getViewport` method. * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are rendered * onto the canvas, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {Array} [transform] - Additional transform, applied just * before viewport transform. * @property {CanvasGradient | CanvasPattern | string} [background] - Background * to use for the canvas. * Any valid `canvas.fillStyle` can be used: a `DOMString` parsed as CSS * value, a `CanvasGradient` object (a linear or radial gradient) or * a `CanvasPattern` object (a repetitive image). The default value is * 'rgb(255,255,255)'. * * NOTE: This option may be partially, or completely, ignored when the * `pageColors`-option is used. * @property {Object} [pageColors] - Overwrites background and foreground colors * with user defined ones in order to improve readability in high contrast * mode. * @property {Promise} [optionalContentConfigPromise] - * A promise that should resolve with an {@link OptionalContentConfig} * created from `PDFDocumentProxy.getOptionalContentConfig`. If `null`, * the configuration will be fetched automatically with the default visibility * states set. * @property {Map} [annotationCanvasMap] - Map some * annotation ids with canvases used to render them. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Page getOperatorList parameters. * * @typedef {Object} GetOperatorListParameters * @property {string} [intent] - Rendering intent, can be 'display', 'print', * or 'any'. The default value is 'display'. * @property {number} [annotationMode] Controls which annotations are included * in the operatorList, for annotations with appearance-data; the values from * {@link AnnotationMode} should be used. The following values are supported: * - `AnnotationMode.DISABLE`, which disables all annotations. * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus * it also depends on the `intent`-option, see above). * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain * interactive form elements (those will be rendered in the display layer). * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations * (as above) but where interactive form elements are updated with data * from the {@link AnnotationStorage}-instance; useful e.g. for printing. * The default value is `AnnotationMode.ENABLE`. * @property {PrintAnnotationStorage} [printAnnotationStorage] */ /** * Structure tree node. The root node will have a role "Root". * * @typedef {Object} StructTreeNode * @property {Array} children - Array of * {@link StructTreeNode} and {@link StructTreeContent} objects. * @property {string} role - element's role, already mapped if a role map exists * in the PDF. */ /** * Structure tree content. * * @typedef {Object} StructTreeContent * @property {string} type - either "content" for page and stream structure * elements or "object" for object references. * @property {string} id - unique id that will map to the text layer. */ /** * PDF page operator list. * * @typedef {Object} PDFOperatorList * @property {Array} fnArray - Array containing the operator functions. * @property {Array} argsArray - Array containing the arguments of the * functions. */ /** * Proxy to a `PDFPage` in the worker thread. */ class PDFPageProxy { #delayedCleanupTimeout = null; #pendingCleanup = false; constructor(pageIndex, pageInfo, transport, pdfBug = false) { this._pageIndex = pageIndex; this._pageInfo = pageInfo; this._transport = transport; this._stats = pdfBug ? new StatTimer() : null; this._pdfBug = pdfBug; /** @type {PDFObjects} */ this.commonObjs = transport.commonObjs; this.objs = new PDFObjects(); this._maybeCleanupAfterRender = false; this._intentStates = new Map(); this.destroyed = false; } /** * @type {number} Page number of the page. First page is 1. */ get pageNumber() { return this._pageIndex + 1; } /** * @type {number} The number of degrees the page is rotated clockwise. */ get rotate() { return this._pageInfo.rotate; } /** * @type {RefProxy | null} The reference that points to this page. */ get ref() { return this._pageInfo.ref; } /** * @type {number} The default size of units in 1/72nds of an inch. */ get userUnit() { return this._pageInfo.userUnit; } /** * @type {Array} An array of the visible portion of the PDF page in * user space units [x1, y1, x2, y2]. */ get view() { return this._pageInfo.view; } /** * @param {GetViewportParameters} params - Viewport parameters. * @returns {PageViewport} Contains 'width' and 'height' properties * along with transforms required for rendering. */ getViewport({ scale, rotation = this.rotate, offsetX = 0, offsetY = 0, dontFlip = false, } = {}) { return new PageViewport({ viewBox: this.view, scale, rotation, offsetX, offsetY, dontFlip, }); } /** * @param {GetAnnotationsParameters} params - Annotation parameters. * @returns {Promise>} A promise that is resolved with an * {Array} of the annotation objects. */ getAnnotations({ intent = "display" } = {}) { const intentArgs = this._transport.getRenderingIntent(intent); return this._transport.getAnnotations( this._pageIndex, intentArgs.renderingIntent ); } /** * @returns {Promise} A promise that is resolved with an * {Object} with JS actions. */ getJSActions() { return this._transport.getPageJSActions(this._pageIndex); } /** * @type {boolean} True if only XFA form. */ get isPureXfa() { return shadow(this, "isPureXfa", !!this._transport._htmlForXfa); } /** * @returns {Promise} A promise that is resolved with * an {Object} with a fake DOM object (a tree structure where elements * are {Object} with a name, attributes (class, style, ...), value and * children, very similar to a HTML DOM tree), or `null` if no XFA exists. */ async getXfa() { return this._transport._htmlForXfa?.children[this._pageIndex] || null; } /** * Begins the process of rendering a page to the desired context. * * @param {RenderParameters} params - Page render parameters. * @returns {RenderTask} An object that contains a promise that is * resolved when the page finishes rendering. */ render({ canvasContext, viewport, intent = "display", annotationMode = AnnotationMode.ENABLE, transform = null, background = null, optionalContentConfigPromise = null, annotationCanvasMap = null, pageColors = null, printAnnotationStorage = null, }) { if ( (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && arguments[0]?.canvasFactory ) { throw new Error( "render no longer accepts the `canvasFactory`-option, " + "please pass it to the `getDocument`-function instead." ); } this._stats?.time("Overall"); const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage ); // If there was a pending destroy, cancel it so no cleanup happens during // this call to render... this.#pendingCleanup = false; // ... and ensure that a delayed cleanup is always aborted. this.#abortDelayedCleanup(); if (!optionalContentConfigPromise) { optionalContentConfigPromise = this._transport.getOptionalContentConfig(); } let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } // Ensure that a pending `streamReader` cancel timeout is always aborted. if (intentState.streamReaderCancelTimeout) { clearTimeout(intentState.streamReaderCancelTimeout); intentState.streamReaderCancelTimeout = null; } const intentPrint = !!( intentArgs.renderingIntent & RenderingIntentFlag.PRINT ); // If there's no displayReadyCapability yet, then the operatorList // was never requested before. Make the request and create the promise. if (!intentState.displayReadyCapability) { intentState.displayReadyCapability = new PromiseCapability(); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; this._stats?.time("Page Request"); this._pumpOperatorList(intentArgs); } const complete = error => { intentState.renderTasks.delete(internalRenderTask); // Attempt to reduce memory usage during *printing*, by always running // cleanup immediately once rendering has finished. if (this._maybeCleanupAfterRender || intentPrint) { this.#pendingCleanup = true; } this.#tryCleanup(/* delayed = */ !intentPrint); if (error) { internalRenderTask.capability.reject(error); this._abortOperatorList({ intentState, reason: error instanceof Error ? error : new Error(error), }); } else { internalRenderTask.capability.resolve(); } this._stats?.timeEnd("Rendering"); this._stats?.timeEnd("Overall"); }; const internalRenderTask = new InternalRenderTask({ callback: complete, // Only include the required properties, and *not* the entire object. params: { canvasContext, viewport, transform, background, }, objs: this.objs, commonObjs: this.commonObjs, annotationCanvasMap, operatorList: intentState.operatorList, pageIndex: this._pageIndex, canvasFactory: this._transport.canvasFactory, filterFactory: this._transport.filterFactory, useRequestAnimationFrame: !intentPrint, pdfBug: this._pdfBug, pageColors, }); (intentState.renderTasks ||= new Set()).add(internalRenderTask); const renderTask = internalRenderTask.task; Promise.all([ intentState.displayReadyCapability.promise, optionalContentConfigPromise, ]) .then(([transparency, optionalContentConfig]) => { if (this.#pendingCleanup) { complete(); return; } this._stats?.time("Rendering"); internalRenderTask.initializeGraphics({ transparency, optionalContentConfig, }); internalRenderTask.operatorListChanged(); }) .catch(complete); return renderTask; } /** * @param {GetOperatorListParameters} params - Page getOperatorList * parameters. * @returns {Promise} A promise resolved with an * {@link PDFOperatorList} object that represents the page's operator list. */ getOperatorList({ intent = "display", annotationMode = AnnotationMode.ENABLE, printAnnotationStorage = null, } = {}) { function operatorListChanged() { if (intentState.operatorList.lastChunk) { intentState.opListReadCapability.resolve(intentState.operatorList); intentState.renderTasks.delete(opListTask); } } const intentArgs = this._transport.getRenderingIntent( intent, annotationMode, printAnnotationStorage, /* isOpList = */ true ); let intentState = this._intentStates.get(intentArgs.cacheKey); if (!intentState) { intentState = Object.create(null); this._intentStates.set(intentArgs.cacheKey, intentState); } let opListTask; if (!intentState.opListReadCapability) { opListTask = Object.create(null); opListTask.operatorListChanged = operatorListChanged; intentState.opListReadCapability = new PromiseCapability(); (intentState.renderTasks ||= new Set()).add(opListTask); intentState.operatorList = { fnArray: [], argsArray: [], lastChunk: false, separateAnnots: null, }; this._stats?.time("Page Request"); this._pumpOperatorList(intentArgs); } return intentState.opListReadCapability.promise; } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {ReadableStream} Stream for reading text content chunks. */ streamTextContent({ includeMarkedContent = false, disableNormalization = false, } = {}) { const TEXT_CONTENT_CHUNK_SIZE = 100; return this._transport.messageHandler.sendWithStream( "GetTextContent", { pageIndex: this._pageIndex, includeMarkedContent: includeMarkedContent === true, disableNormalization: disableNormalization === true, }, { highWaterMark: TEXT_CONTENT_CHUNK_SIZE, size(textContent) { return textContent.items.length; }, } ); } /** * NOTE: All occurrences of whitespace will be replaced by * standard spaces (0x20). * * @param {getTextContentParameters} params - getTextContent parameters. * @returns {Promise} A promise that is resolved with a * {@link TextContent} object that represents the page's text content. */ getTextContent(params = {}) { if (this._transport._htmlForXfa) { // TODO: We need to revisit this once the XFA foreground patch lands and // only do this for non-foreground XFA. return this.getXfa().then(xfa => { return XfaText.textContent(xfa); }); } const readableStream = this.streamTextContent(params); return new Promise(function (resolve, reject) { function pump() { reader.read().then(function ({ value, done }) { if (done) { resolve(textContent); return; } Object.assign(textContent.styles, value.styles); textContent.items.push(...value.items); pump(); }, reject); } const reader = readableStream.getReader(); const textContent = { items: [], styles: Object.create(null), }; pump(); }); } /** * @returns {Promise} A promise that is resolved with a * {@link StructTreeNode} object that represents the page's structure tree, * or `null` when no structure tree is present for the current page. */ getStructTree() { return this._transport.getStructTree(this._pageIndex); } /** * Destroys the page object. * @private */ _destroy() { this.destroyed = true; const waitOn = []; for (const intentState of this._intentStates.values()) { this._abortOperatorList({ intentState, reason: new Error("Page was destroyed."), force: true, }); if (intentState.opListReadCapability) { // Avoid errors below, since the renderTasks are just stubs. continue; } for (const internalRenderTask of intentState.renderTasks) { waitOn.push(internalRenderTask.completed); internalRenderTask.cancel(); } } this.objs.clear(); this.#pendingCleanup = false; this.#abortDelayedCleanup(); return Promise.all(waitOn); } /** * Cleans up resources allocated by the page. * * @param {boolean} [resetStats] - Reset page stats, if enabled. * The default value is `false`. * @returns {boolean} Indicates if clean-up was successfully run. */ cleanup(resetStats = false) { this.#pendingCleanup = true; const success = this.#tryCleanup(/* delayed = */ false); if (resetStats && success) { this._stats &&= new StatTimer(); } return success; } /** * Attempts to clean up if rendering is in a state where that's possible. * @param {boolean} [delayed] - Delay the cleanup, to e.g. improve zooming * performance in documents with large images. * The default value is `false`. * @returns {boolean} Indicates if clean-up was successfully run. */ #tryCleanup(delayed = false) { this.#abortDelayedCleanup(); if (!this.#pendingCleanup) { return false; } if (delayed) { this.#delayedCleanupTimeout = setTimeout(() => { this.#delayedCleanupTimeout = null; this.#tryCleanup(/* delayed = */ false); }, DELAYED_CLEANUP_TIMEOUT); return false; } for (const { renderTasks, operatorList } of this._intentStates.values()) { if (renderTasks.size > 0 || !operatorList.lastChunk) { return false; } } this._intentStates.clear(); this.objs.clear(); this.#pendingCleanup = false; return true; } #abortDelayedCleanup() { if (this.#delayedCleanupTimeout) { clearTimeout(this.#delayedCleanupTimeout); this.#delayedCleanupTimeout = null; } } /** * @private */ _startRenderPage(transparency, cacheKey) { const intentState = this._intentStates.get(cacheKey); if (!intentState) { return; // Rendering was cancelled. } this._stats?.timeEnd("Page Request"); // TODO Refactor RenderPageRequest to separate rendering // and operator list logic intentState.displayReadyCapability?.resolve(transparency); } /** * @private */ _renderPageChunk(operatorListChunk, intentState) { // Add the new chunk to the current operator list. for (let i = 0, ii = operatorListChunk.length; i < ii; i++) { intentState.operatorList.fnArray.push(operatorListChunk.fnArray[i]); intentState.operatorList.argsArray.push(operatorListChunk.argsArray[i]); } intentState.operatorList.lastChunk = operatorListChunk.lastChunk; intentState.operatorList.separateAnnots = operatorListChunk.separateAnnots; // Notify all the rendering tasks there are more operators to be consumed. for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } if (operatorListChunk.lastChunk) { this.#tryCleanup(/* delayed = */ true); } } /** * @private */ _pumpOperatorList({ renderingIntent, cacheKey, annotationStorageMap }) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( Number.isInteger(renderingIntent) && renderingIntent > 0, '_pumpOperatorList: Expected valid "renderingIntent" argument.' ); } const readableStream = this._transport.messageHandler.sendWithStream( "GetOperatorList", { pageIndex: this._pageIndex, intent: renderingIntent, cacheKey, annotationStorage: annotationStorageMap, } ); const reader = readableStream.getReader(); const intentState = this._intentStates.get(cacheKey); intentState.streamReader = reader; const pump = () => { reader.read().then( ({ value, done }) => { if (done) { intentState.streamReader = null; return; } if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } this._renderPageChunk(value, intentState); pump(); }, reason => { intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } if (intentState.operatorList) { // Mark operator list as complete. intentState.operatorList.lastChunk = true; for (const internalRenderTask of intentState.renderTasks) { internalRenderTask.operatorListChanged(); } this.#tryCleanup(/* delayed = */ true); } if (intentState.displayReadyCapability) { intentState.displayReadyCapability.reject(reason); } else if (intentState.opListReadCapability) { intentState.opListReadCapability.reject(reason); } else { throw reason; } } ); }; pump(); } /** * @private */ _abortOperatorList({ intentState, reason, force = false }) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( reason instanceof Error, '_abortOperatorList: Expected valid "reason" argument.' ); } if (!intentState.streamReader) { return; } // Ensure that a pending `streamReader` cancel timeout is always aborted. if (intentState.streamReaderCancelTimeout) { clearTimeout(intentState.streamReaderCancelTimeout); intentState.streamReaderCancelTimeout = null; } if (!force) { // Ensure that an Error occurring in *only* one `InternalRenderTask`, e.g. // multiple render() calls on the same canvas, won't break all rendering. if (intentState.renderTasks.size > 0) { return; } // Don't immediately abort parsing on the worker-thread when rendering is // cancelled, since that will unnecessarily delay re-rendering when (for // partially parsed pages) e.g. zooming/rotation occurs in the viewer. if (reason instanceof RenderingCancelledException) { let delay = RENDERING_CANCELLED_TIMEOUT; if (reason.extraDelay > 0 && reason.extraDelay < /* ms = */ 1000) { // Above, we prevent the total delay from becoming arbitrarily large. delay += reason.extraDelay; } intentState.streamReaderCancelTimeout = setTimeout(() => { intentState.streamReaderCancelTimeout = null; this._abortOperatorList({ intentState, reason, force: true }); }, delay); return; } } intentState.streamReader .cancel(new AbortException(reason.message)) .catch(() => { // Avoid "Uncaught promise" messages in the console. }); intentState.streamReader = null; if (this._transport.destroyed) { return; // Ignore any pending requests if the worker was terminated. } // Remove the current `intentState`, since a cancelled `getOperatorList` // call on the worker-thread cannot be re-started... for (const [curCacheKey, curIntentState] of this._intentStates) { if (curIntentState === intentState) { this._intentStates.delete(curCacheKey); break; } } // ... and force clean-up to ensure that any old state is always removed. this.cleanup(); } /** * @type {StatTimer | null} Returns page stats, if enabled; returns `null` * otherwise. */ get stats() { return this._stats; } } class LoopbackPort { #listeners = new Set(); #deferred = Promise.resolve(); postMessage(obj, transfer) { const event = { data: structuredClone( obj, (typeof PDFJSDev === "undefined" || PDFJSDev.test("SKIP_BABEL || TESTING")) && transfer ? { transfer } : null ), }; this.#deferred.then(() => { for (const listener of this.#listeners) { listener.call(this, event); } }); } addEventListener(name, listener) { this.#listeners.add(listener); } removeEventListener(name, listener) { this.#listeners.delete(listener); } terminate() { this.#listeners.clear(); } } /** * @typedef {Object} PDFWorkerParameters * @property {string} [name] - The name of the worker. * @property {Worker} [port] - The `workerPort` object. * @property {number} [verbosity] - Controls the logging level; * the constants from {@link VerbosityLevel} should be used. */ const PDFWorkerUtil = { isWorkerDisabled: false, fallbackWorkerSrc: null, fakeWorkerId: 0, }; if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { // eslint-disable-next-line no-undef if (isNodeJS && typeof __non_webpack_require__ === "function") { // Workers aren't supported in Node.js, force-disabling them there. PDFWorkerUtil.isWorkerDisabled = true; PDFWorkerUtil.fallbackWorkerSrc = PDFJSDev.test("LIB") ? "../pdf.worker.js" : "./pdf.worker.js"; } else if (typeof document === "object") { const pdfjsFilePath = document?.currentScript?.src; if (pdfjsFilePath) { PDFWorkerUtil.fallbackWorkerSrc = pdfjsFilePath.replace( /(\.(?:min\.)?js)(\?.*)?$/i, ".worker$1$2" ); } } // Check if URLs have the same origin. For non-HTTP based URLs, returns false. PDFWorkerUtil.isSameOrigin = function (baseUrl, otherUrl) { let base; try { base = new URL(baseUrl); if (!base.origin || base.origin === "null") { return false; // non-HTTP url } } catch (e) { return false; } const other = new URL(otherUrl, base); return base.origin === other.origin; }; PDFWorkerUtil.createCDNWrapper = function (url) { // We will rely on blob URL's property to specify origin. // We want this function to fail in case if createObjectURL or Blob do not // exist or fail for some reason -- our Worker creation will fail anyway. const wrapper = `importScripts("${url}");`; return URL.createObjectURL(new Blob([wrapper])); }; } /** * PDF.js web worker abstraction that controls the instantiation of PDF * documents. Message handlers are used to pass information from the main * thread to the worker thread and vice versa. If the creation of a web * worker is not possible, a "fake" worker will be used instead. * * @param {PDFWorkerParameters} params - The worker initialization parameters. */ class PDFWorker { static #workerPorts = new WeakMap(); constructor({ name = null, port = null, verbosity = getVerbosityLevel(), } = {}) { if (port && PDFWorker.#workerPorts.has(port)) { throw new Error("Cannot use more than one PDFWorker per port."); } this.name = name; this.destroyed = false; this.verbosity = verbosity; this._readyCapability = new PromiseCapability(); this._port = null; this._webWorker = null; this._messageHandler = null; if (port) { PDFWorker.#workerPorts.set(port, this); this._initializeFromPort(port); return; } this._initialize(); } /** * Promise for worker initialization completion. * @type {Promise} */ get promise() { return this._readyCapability.promise; } /** * The current `workerPort`, when it exists. * @type {Worker} */ get port() { return this._port; } /** * The current MessageHandler-instance. * @type {MessageHandler} */ get messageHandler() { return this._messageHandler; } _initializeFromPort(port) { this._port = port; this._messageHandler = new MessageHandler("main", "worker", port); this._messageHandler.on("ready", function () { // Ignoring "ready" event -- MessageHandler should already be initialized // and ready to accept messages. }); this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. this._messageHandler.send("configure", { verbosity: this.verbosity, }); } _initialize() { // If worker support isn't disabled explicit and the browser has worker // support, create a new web worker and test if it/the browser fulfills // all requirements to run parts of pdf.js in a web worker. // Right now, the requirement is, that an Uint8Array is still an // Uint8Array as it arrives on the worker. (Chrome added this with v.15.) if ( !PDFWorkerUtil.isWorkerDisabled && !PDFWorker._mainThreadWorkerMessageHandler ) { let { workerSrc } = PDFWorker; try { // Wraps workerSrc path into blob URL, if the former does not belong // to the same origin. if ( typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC") && !PDFWorkerUtil.isSameOrigin(window.location.href, workerSrc) ) { workerSrc = PDFWorkerUtil.createCDNWrapper( new URL(workerSrc, window.location).href ); } const worker = typeof PDFJSDev === "undefined" && !workerSrc.endsWith("/build/pdf.worker.js") ? new Worker(workerSrc, { type: "module" }) : new Worker(workerSrc); const messageHandler = new MessageHandler("main", "worker", worker); const terminateEarly = () => { worker.removeEventListener("error", onWorkerError); messageHandler.destroy(); worker.terminate(); if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); } else { // Fall back to fake worker if the termination is caused by an // error (e.g. NetworkError / SecurityError). this._setupFakeWorker(); } }; const onWorkerError = () => { if (!this._webWorker) { // Worker failed to initialize due to an error. Clean up and fall // back to the fake worker. terminateEarly(); } }; worker.addEventListener("error", onWorkerError); messageHandler.on("test", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } if (data) { this._messageHandler = messageHandler; this._port = worker; this._webWorker = worker; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); } else { this._setupFakeWorker(); messageHandler.destroy(); worker.terminate(); } }); messageHandler.on("ready", data => { worker.removeEventListener("error", onWorkerError); if (this.destroyed) { terminateEarly(); return; // worker was destroyed } try { sendTest(); } catch (e) { // We need fallback to a faked worker. this._setupFakeWorker(); } }); const sendTest = () => { const testObj = new Uint8Array(); // Ensure that we can use `postMessage` transfers. messageHandler.send("test", testObj, [testObj.buffer]); }; // It might take time for the worker to initialize. We will try to send // the "test" message immediately, and once the "ready" message arrives. // The worker shall process only the first received "test" message. sendTest(); return; } catch (e) { info("The worker has been disabled."); } } // Either workers are disabled, not supported or have thrown an exception. // Thus, we fallback to a faked worker. this._setupFakeWorker(); } _setupFakeWorker() { if (!PDFWorkerUtil.isWorkerDisabled) { warn("Setting up fake worker."); PDFWorkerUtil.isWorkerDisabled = true; } PDFWorker._setupFakeWorkerGlobal .then(WorkerMessageHandler => { if (this.destroyed) { this._readyCapability.reject(new Error("Worker was destroyed")); return; } const port = new LoopbackPort(); this._port = port; // All fake workers use the same port, making id unique. const id = `fake${PDFWorkerUtil.fakeWorkerId++}`; // If the main thread is our worker, setup the handling for the // messages -- the main thread sends to it self. const workerHandler = new MessageHandler(id + "_worker", id, port); WorkerMessageHandler.setup(workerHandler, port); const messageHandler = new MessageHandler(id, id + "_worker", port); this._messageHandler = messageHandler; this._readyCapability.resolve(); // Send global setting, e.g. verbosity level. messageHandler.send("configure", { verbosity: this.verbosity, }); }) .catch(reason => { this._readyCapability.reject( new Error(`Setting up fake worker failed: "${reason.message}".`) ); }); } /** * Destroys the worker instance. */ destroy() { this.destroyed = true; if (this._webWorker) { // We need to terminate only web worker created resource. this._webWorker.terminate(); this._webWorker = null; } PDFWorker.#workerPorts.delete(this._port); this._port = null; if (this._messageHandler) { this._messageHandler.destroy(); this._messageHandler = null; } } /** * @param {PDFWorkerParameters} params - The worker initialization parameters. */ static fromPort(params) { if (!params?.port) { throw new Error("PDFWorker.fromPort - invalid method signature."); } if (this.#workerPorts.has(params.port)) { return this.#workerPorts.get(params.port); } return new PDFWorker(params); } /** * The current `workerSrc`, when it exists. * @type {string} */ static get workerSrc() { if (GlobalWorkerOptions.workerSrc) { return GlobalWorkerOptions.workerSrc; } if ( (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && PDFWorkerUtil.fallbackWorkerSrc !== null ) { if (!isNodeJS) { deprecated('No "GlobalWorkerOptions.workerSrc" specified.'); } return PDFWorkerUtil.fallbackWorkerSrc; } throw new Error('No "GlobalWorkerOptions.workerSrc" specified.'); } static get _mainThreadWorkerMessageHandler() { try { return globalThis.pdfjsWorker?.WorkerMessageHandler || null; } catch (ex) { return null; } } // Loads worker code into the main-thread. static get _setupFakeWorkerGlobal() { const loader = async () => { const mainWorkerMessageHandler = this._mainThreadWorkerMessageHandler; if (mainWorkerMessageHandler) { // The worker was already loaded using e.g. a `