From c4c72161718f87bd5bce96a396f55c5c8db75fc6 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 16 Mar 2021 11:56:39 +0100 Subject: [PATCH] Improve memory usage around the `BasePdfManager.docBaseUrl` parameter (PR 7689 follow-up) While there is nothing *outright* wrong with the existing implementation, it can however lead to increased memory usage in one particular case (that I completely overlooked when implementing this): For "data:"-URLs, which by definition contains the entire PDF document and can thus be arbitrarily large, we obviously want to avoid sending, storing, and/or logging the "raw" docBaseUrl in that case. To address this, this patch makes the following changes: - Ignore any non-string in the `docBaseUrl` option passed to `getDocument`, since those are unsupported anyway, already on the main-thread. - Ignore "data:"-URLs in the `docBaseUrl` option passed to `getDocument`, to avoid having to send what could potentially be a *very* long string to the worker-thread. - Parse the `docBaseUrl` option *directly* in the `BasePdfManager`-constructors, on the worker-thread, to avoid having to store the "raw" docBaseUrl in the first place. --- src/core/pdf_manager.js | 33 +++++++++++++++------------------ src/display/api.js | 10 ++++++++++ src/display/display_utils.js | 1 + 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/core/pdf_manager.js b/src/core/pdf_manager.js index 1cdcc03fa..33dee308c 100644 --- a/src/core/pdf_manager.js +++ b/src/core/pdf_manager.js @@ -13,17 +13,23 @@ * limitations under the License. */ -import { - createValidAbsoluteUrl, - shadow, - unreachable, - warn, -} from "../shared/util.js"; +import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js"; import { ChunkedStreamManager } from "./chunked_stream.js"; import { MissingDataException } from "./core_utils.js"; import { PDFDocument } from "./document.js"; import { Stream } from "./stream.js"; +function parseDocBaseUrl(url) { + if (url) { + const absoluteUrl = createValidAbsoluteUrl(url); + if (absoluteUrl) { + return absoluteUrl.href; + } + warn(`Invalid absolute docBaseUrl: "${url}".`); + } + return null; +} + class BasePdfManager { constructor() { if (this.constructor === BasePdfManager) { @@ -40,16 +46,7 @@ class BasePdfManager { } get docBaseUrl() { - let docBaseUrl = null; - if (this._docBaseUrl) { - const absoluteUrl = createValidAbsoluteUrl(this._docBaseUrl); - if (absoluteUrl) { - docBaseUrl = absoluteUrl.href; - } else { - warn(`Invalid absolute docBaseUrl: "${this._docBaseUrl}".`); - } - } - return shadow(this, "docBaseUrl", docBaseUrl); + return this._docBaseUrl; } onLoadedStream() { @@ -111,7 +108,7 @@ class LocalPdfManager extends BasePdfManager { this._docId = docId; this._password = password; - this._docBaseUrl = docBaseUrl; + this._docBaseUrl = parseDocBaseUrl(docBaseUrl); this.evaluatorOptions = evaluatorOptions; const stream = new Stream(data); @@ -146,7 +143,7 @@ class NetworkPdfManager extends BasePdfManager { this._docId = docId; this._password = args.password; - this._docBaseUrl = docBaseUrl; + this._docBaseUrl = parseDocBaseUrl(docBaseUrl); this.msgHandler = args.msgHandler; this.evaluatorOptions = evaluatorOptions; diff --git a/src/display/api.js b/src/display/api.js index 690193ade..9f779214b 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -40,6 +40,7 @@ import { deprecated, DOMCanvasFactory, DOMCMapReaderFactory, + isDataScheme, loadScript, PageViewport, RenderingCancelledException, @@ -285,6 +286,15 @@ function getDocument(src) { params.fontExtraProperties = params.fontExtraProperties === true; params.pdfBug = params.pdfBug === true; + if ( + typeof params.docBaseUrl !== "string" || + isDataScheme(params.docBaseUrl) + ) { + // Ignore "data:"-URLs, since they can't be used to recover valid absolute + // URLs anyway. We want to avoid sending them to the worker-thread, since + // they contain the *entire* PDF document and can thus be arbitrarily long. + params.docBaseUrl = null; + } if (!Number.isInteger(params.maxImageSize)) { params.maxImageSize = -1; } diff --git a/src/display/display_utils.js b/src/display/display_utils.js index 3cb0da278..4e23ec876 100644 --- a/src/display/display_utils.js +++ b/src/display/display_utils.js @@ -708,6 +708,7 @@ export { DOMSVGFactory, getFilenameFromUrl, getPdfFilenameFromUrl, + isDataScheme, isFetchSupported, isPdfFile, isValidFetchUrl,