Improve memory usage around the BasePdfManager.docBaseUrl parameter (PR 7689 follow-up)
				
					
				
			While there is nothing *outright* wrong with the existing implementation, it can however lead to increased memory usage in one particular case (that I completely overlooked when implementing this): For "data:"-URLs, which by definition contains the entire PDF document and can thus be arbitrarily large, we obviously want to avoid sending, storing, and/or logging the "raw" docBaseUrl in that case. To address this, this patch makes the following changes: - Ignore any non-string in the `docBaseUrl` option passed to `getDocument`, since those are unsupported anyway, already on the main-thread. - Ignore "data:"-URLs in the `docBaseUrl` option passed to `getDocument`, to avoid having to send what could potentially be a *very* long string to the worker-thread. - Parse the `docBaseUrl` option *directly* in the `BasePdfManager`-constructors, on the worker-thread, to avoid having to store the "raw" docBaseUrl in the first place.
This commit is contained in:
		
							parent
							
								
									bd9dee1544
								
							
						
					
					
						commit
						c4c7216171
					
				| @ -13,17 +13,23 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| import { | ||||
|   createValidAbsoluteUrl, | ||||
|   shadow, | ||||
|   unreachable, | ||||
|   warn, | ||||
| } from "../shared/util.js"; | ||||
| import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js"; | ||||
| import { ChunkedStreamManager } from "./chunked_stream.js"; | ||||
| import { MissingDataException } from "./core_utils.js"; | ||||
| import { PDFDocument } from "./document.js"; | ||||
| import { Stream } from "./stream.js"; | ||||
| 
 | ||||
| function parseDocBaseUrl(url) { | ||||
|   if (url) { | ||||
|     const absoluteUrl = createValidAbsoluteUrl(url); | ||||
|     if (absoluteUrl) { | ||||
|       return absoluteUrl.href; | ||||
|     } | ||||
|     warn(`Invalid absolute docBaseUrl: "${url}".`); | ||||
|   } | ||||
|   return null; | ||||
| } | ||||
| 
 | ||||
| class BasePdfManager { | ||||
|   constructor() { | ||||
|     if (this.constructor === BasePdfManager) { | ||||
| @ -40,16 +46,7 @@ class BasePdfManager { | ||||
|   } | ||||
| 
 | ||||
|   get docBaseUrl() { | ||||
|     let docBaseUrl = null; | ||||
|     if (this._docBaseUrl) { | ||||
|       const absoluteUrl = createValidAbsoluteUrl(this._docBaseUrl); | ||||
|       if (absoluteUrl) { | ||||
|         docBaseUrl = absoluteUrl.href; | ||||
|       } else { | ||||
|         warn(`Invalid absolute docBaseUrl: "${this._docBaseUrl}".`); | ||||
|       } | ||||
|     } | ||||
|     return shadow(this, "docBaseUrl", docBaseUrl); | ||||
|     return this._docBaseUrl; | ||||
|   } | ||||
| 
 | ||||
|   onLoadedStream() { | ||||
| @ -111,7 +108,7 @@ class LocalPdfManager extends BasePdfManager { | ||||
| 
 | ||||
|     this._docId = docId; | ||||
|     this._password = password; | ||||
|     this._docBaseUrl = docBaseUrl; | ||||
|     this._docBaseUrl = parseDocBaseUrl(docBaseUrl); | ||||
|     this.evaluatorOptions = evaluatorOptions; | ||||
| 
 | ||||
|     const stream = new Stream(data); | ||||
| @ -146,7 +143,7 @@ class NetworkPdfManager extends BasePdfManager { | ||||
| 
 | ||||
|     this._docId = docId; | ||||
|     this._password = args.password; | ||||
|     this._docBaseUrl = docBaseUrl; | ||||
|     this._docBaseUrl = parseDocBaseUrl(docBaseUrl); | ||||
|     this.msgHandler = args.msgHandler; | ||||
|     this.evaluatorOptions = evaluatorOptions; | ||||
| 
 | ||||
|  | ||||
| @ -40,6 +40,7 @@ import { | ||||
|   deprecated, | ||||
|   DOMCanvasFactory, | ||||
|   DOMCMapReaderFactory, | ||||
|   isDataScheme, | ||||
|   loadScript, | ||||
|   PageViewport, | ||||
|   RenderingCancelledException, | ||||
| @ -285,6 +286,15 @@ function getDocument(src) { | ||||
|   params.fontExtraProperties = params.fontExtraProperties === true; | ||||
|   params.pdfBug = params.pdfBug === true; | ||||
| 
 | ||||
|   if ( | ||||
|     typeof params.docBaseUrl !== "string" || | ||||
|     isDataScheme(params.docBaseUrl) | ||||
|   ) { | ||||
|     // Ignore "data:"-URLs, since they can't be used to recover valid absolute
 | ||||
|     // URLs anyway. We want to avoid sending them to the worker-thread, since
 | ||||
|     // they contain the *entire* PDF document and can thus be arbitrarily long.
 | ||||
|     params.docBaseUrl = null; | ||||
|   } | ||||
|   if (!Number.isInteger(params.maxImageSize)) { | ||||
|     params.maxImageSize = -1; | ||||
|   } | ||||
|  | ||||
| @ -708,6 +708,7 @@ export { | ||||
|   DOMSVGFactory, | ||||
|   getFilenameFromUrl, | ||||
|   getPdfFilenameFromUrl, | ||||
|   isDataScheme, | ||||
|   isFetchSupported, | ||||
|   isPdfFile, | ||||
|   isValidFetchUrl, | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user