Improve memory usage around the BasePdfManager.docBaseUrl
parameter (PR 7689 follow-up)
While there is nothing *outright* wrong with the existing implementation, it can however lead to increased memory usage in one particular case (that I completely overlooked when implementing this): For "data:"-URLs, which by definition contains the entire PDF document and can thus be arbitrarily large, we obviously want to avoid sending, storing, and/or logging the "raw" docBaseUrl in that case. To address this, this patch makes the following changes: - Ignore any non-string in the `docBaseUrl` option passed to `getDocument`, since those are unsupported anyway, already on the main-thread. - Ignore "data:"-URLs in the `docBaseUrl` option passed to `getDocument`, to avoid having to send what could potentially be a *very* long string to the worker-thread. - Parse the `docBaseUrl` option *directly* in the `BasePdfManager`-constructors, on the worker-thread, to avoid having to store the "raw" docBaseUrl in the first place.
This commit is contained in:
parent
bd9dee1544
commit
c4c7216171
@ -13,17 +13,23 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {
|
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
|
||||||
createValidAbsoluteUrl,
|
|
||||||
shadow,
|
|
||||||
unreachable,
|
|
||||||
warn,
|
|
||||||
} from "../shared/util.js";
|
|
||||||
import { ChunkedStreamManager } from "./chunked_stream.js";
|
import { ChunkedStreamManager } from "./chunked_stream.js";
|
||||||
import { MissingDataException } from "./core_utils.js";
|
import { MissingDataException } from "./core_utils.js";
|
||||||
import { PDFDocument } from "./document.js";
|
import { PDFDocument } from "./document.js";
|
||||||
import { Stream } from "./stream.js";
|
import { Stream } from "./stream.js";
|
||||||
|
|
||||||
|
function parseDocBaseUrl(url) {
|
||||||
|
if (url) {
|
||||||
|
const absoluteUrl = createValidAbsoluteUrl(url);
|
||||||
|
if (absoluteUrl) {
|
||||||
|
return absoluteUrl.href;
|
||||||
|
}
|
||||||
|
warn(`Invalid absolute docBaseUrl: "${url}".`);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
class BasePdfManager {
|
class BasePdfManager {
|
||||||
constructor() {
|
constructor() {
|
||||||
if (this.constructor === BasePdfManager) {
|
if (this.constructor === BasePdfManager) {
|
||||||
@ -40,16 +46,7 @@ class BasePdfManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
get docBaseUrl() {
|
get docBaseUrl() {
|
||||||
let docBaseUrl = null;
|
return this._docBaseUrl;
|
||||||
if (this._docBaseUrl) {
|
|
||||||
const absoluteUrl = createValidAbsoluteUrl(this._docBaseUrl);
|
|
||||||
if (absoluteUrl) {
|
|
||||||
docBaseUrl = absoluteUrl.href;
|
|
||||||
} else {
|
|
||||||
warn(`Invalid absolute docBaseUrl: "${this._docBaseUrl}".`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return shadow(this, "docBaseUrl", docBaseUrl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
onLoadedStream() {
|
onLoadedStream() {
|
||||||
@ -111,7 +108,7 @@ class LocalPdfManager extends BasePdfManager {
|
|||||||
|
|
||||||
this._docId = docId;
|
this._docId = docId;
|
||||||
this._password = password;
|
this._password = password;
|
||||||
this._docBaseUrl = docBaseUrl;
|
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
||||||
this.evaluatorOptions = evaluatorOptions;
|
this.evaluatorOptions = evaluatorOptions;
|
||||||
|
|
||||||
const stream = new Stream(data);
|
const stream = new Stream(data);
|
||||||
@ -146,7 +143,7 @@ class NetworkPdfManager extends BasePdfManager {
|
|||||||
|
|
||||||
this._docId = docId;
|
this._docId = docId;
|
||||||
this._password = args.password;
|
this._password = args.password;
|
||||||
this._docBaseUrl = docBaseUrl;
|
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
||||||
this.msgHandler = args.msgHandler;
|
this.msgHandler = args.msgHandler;
|
||||||
this.evaluatorOptions = evaluatorOptions;
|
this.evaluatorOptions = evaluatorOptions;
|
||||||
|
|
||||||
|
@ -40,6 +40,7 @@ import {
|
|||||||
deprecated,
|
deprecated,
|
||||||
DOMCanvasFactory,
|
DOMCanvasFactory,
|
||||||
DOMCMapReaderFactory,
|
DOMCMapReaderFactory,
|
||||||
|
isDataScheme,
|
||||||
loadScript,
|
loadScript,
|
||||||
PageViewport,
|
PageViewport,
|
||||||
RenderingCancelledException,
|
RenderingCancelledException,
|
||||||
@ -285,6 +286,15 @@ function getDocument(src) {
|
|||||||
params.fontExtraProperties = params.fontExtraProperties === true;
|
params.fontExtraProperties = params.fontExtraProperties === true;
|
||||||
params.pdfBug = params.pdfBug === true;
|
params.pdfBug = params.pdfBug === true;
|
||||||
|
|
||||||
|
if (
|
||||||
|
typeof params.docBaseUrl !== "string" ||
|
||||||
|
isDataScheme(params.docBaseUrl)
|
||||||
|
) {
|
||||||
|
// Ignore "data:"-URLs, since they can't be used to recover valid absolute
|
||||||
|
// URLs anyway. We want to avoid sending them to the worker-thread, since
|
||||||
|
// they contain the *entire* PDF document and can thus be arbitrarily long.
|
||||||
|
params.docBaseUrl = null;
|
||||||
|
}
|
||||||
if (!Number.isInteger(params.maxImageSize)) {
|
if (!Number.isInteger(params.maxImageSize)) {
|
||||||
params.maxImageSize = -1;
|
params.maxImageSize = -1;
|
||||||
}
|
}
|
||||||
|
@ -708,6 +708,7 @@ export {
|
|||||||
DOMSVGFactory,
|
DOMSVGFactory,
|
||||||
getFilenameFromUrl,
|
getFilenameFromUrl,
|
||||||
getPdfFilenameFromUrl,
|
getPdfFilenameFromUrl,
|
||||||
|
isDataScheme,
|
||||||
isFetchSupported,
|
isFetchSupported,
|
||||||
isPdfFile,
|
isPdfFile,
|
||||||
isValidFetchUrl,
|
isValidFetchUrl,
|
||||||
|
Loading…
Reference in New Issue
Block a user