From 664f7de54097c799b25979cd2af5141fd36f7e66 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 30 Mar 2020 12:28:57 +0200 Subject: [PATCH 1/4] Change `Font.exportData` to use an explicit white-list of exportable properties This patch addresses an existing, and very long standing, TODO in the code such that it's no longer possible to send arbitrary/unnecessary font properties to the main-thread. Furthermore, by having a white-list it's also very easy to see *exactly* which font properties are being exported. Please note that in its current form, the list of exported properties contains *every* possible enumerable property that may exist in a `Font` instance. In practice no single font will contain *all* of these properties, and e.g. embedded/non-embedded/Type3 fonts will all differ slightly with respect to what properties are being defined. Hence why only explicitly set properties are included in the exported data, to avoid half of them being `undefined`, which however should not be a problem for any existing consumer (since they'd already need to handle those cases). Since a fair number of these font properties are completely *internal* functionality, and doesn't make any sense to expose on the main-thread and/or in the API, follow-up patch(es) will be required to trim down the list. (I purposely included all properties here for brevity and future documentation purposes.) --- src/core/fonts.js | 57 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/src/core/fonts.js b/src/core/fonts.js index c21387c45..20fa51224 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -87,6 +87,49 @@ var PDF_GLYPH_SPACE_UNITS = 1000; // custom one. Windows just refuses to draw glyphs with seac operators. var SEAC_ANALYSIS_ENABLED = true; +const EXPORT_DATA_PROPERTIES = [ + "_shadowWidth", + "ascent", + "bbox", + "black", + "bold", + "cMap", + "charProcOperatorList", + "charsCache", + "cidEncoding", + "composite", + "data", + "defaultEncoding", + "defaultVMetrics", + "defaultWidth", + "descent", + "differences", + "fallbackName", + "fallbackToUnicode", + "fontMatrix", + "fontType", + "glyphCache", + "isMonospace", + "isOpenType", + "isSerifFont", + "isSymbolicFont", + "isType3Font", + "italic", + "loadedName", + "mimetype", + "missingFile", + "name", + "remeasure", + "seacMap", + "subtype", + "toFontChar", + "toUnicode", + "type", + "vertical", + "vmetrics", + "widths", +]; + var FontFlags = { FixedPitch: 1, Serif: 2, @@ -1258,12 +1301,14 @@ var Font = (function FontClosure() { return shadow(this, "renderer", renderer); }, - exportData: function Font_exportData() { - // TODO remove enumerating of the properties, e.g. hardcode exact names. - var data = {}; - for (var i in this) { - if (this.hasOwnProperty(i)) { - data[i] = this[i]; + exportData() { + const data = Object.create(null); + let property, value; + for (property of EXPORT_DATA_PROPERTIES) { + value = this[property]; + // Ignore properties that haven't been explicitly set. + if (value !== undefined) { + data[property] = value; } } return data; From a5e4cccf13266dc5f9c6b9aa41e4ad6e51e65f4d Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 30 Mar 2020 13:19:36 +0200 Subject: [PATCH 2/4] [api-minor] Prevent `Font.exportData` from exporting internal/unused properties A number of *internal* font properties, which only make sense on the worker-thread, were previously exported. Some of these properties could also contain potentially large Arrays/Objects, which thus unnecessarily increases memory usage since we're forced to copy these to the main-thread and also store them there. This patch stops exporting the following font properties: - "_shadowWidth": An internal property, which was never intended to be exported. - "charsCache": An internal cache, which was never intended to be exported and doesn't make any sense on the main-thread. Furthermore, by the time `Font.exportData` is called it's usually `undefined` or a mostly empty Object as well. - "cidEncoding": An internal property used with (some) composite fonts. As can be seen in the `PartialEvaluator.translateFont` method, `cidEncoding` will only be assigned a value when the font dictionary has an "Encoding" entry which is a `Name` (and not in the `Stream` case, since those obviously cannot be cloned). All-in-all this property doesn't really make sense on the main-thread and/or in the API, and note also that the resulting `cMap` property is (partially) available already. - "fallbackToUnicode": An internal map, part of the heuristics used to improve text-selection in (some) badly generated PDF documents with simple fonts. This was never intended to be exposed on the main-thread and/or in the API. - "glyphCache": An internal cache, which was never intended to be exported and which doesn't make any sense on the main-thread. Furthermore, by the time `Font.exportData` is called it's usually a mostly empty Object as well. - "isOpenType": An internal property, used only during font parsing on the worker-thread. In the *very* unlikely event that an API consumer actually needs that information, then `fontType` should be a (generally) much better property to use. Finally, in the (hopefully) unlikely event that any of these properties become necessary on the main-thread, re-adding them to the white-list is easy to do. --- src/core/fonts.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/core/fonts.js b/src/core/fonts.js index 20fa51224..391ce45db 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -88,15 +88,12 @@ var PDF_GLYPH_SPACE_UNITS = 1000; var SEAC_ANALYSIS_ENABLED = true; const EXPORT_DATA_PROPERTIES = [ - "_shadowWidth", "ascent", "bbox", "black", "bold", "cMap", "charProcOperatorList", - "charsCache", - "cidEncoding", "composite", "data", "defaultEncoding", @@ -105,12 +102,9 @@ const EXPORT_DATA_PROPERTIES = [ "descent", "differences", "fallbackName", - "fallbackToUnicode", "fontMatrix", "fontType", - "glyphCache", "isMonospace", - "isOpenType", "isSerifFont", "isSymbolicFont", "isType3Font", From c5e1fd3fde5f07b8412a08d6e3fa79693192483e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 31 Mar 2020 13:45:20 +0200 Subject: [PATCH 3/4] Use "standard" shadowing in the `Font.spaceWidth` method With `Font.exportData` now only exporting white-listed properties, there should no longer be any reason to not use standard shadowing in the `Font.spaceWidth` method. Furthermore, considering the amount of other changes to the code-base over the years it's not even clear to me that the special-case was necessary any more (regardless of the preceding patches). --- src/core/fonts.js | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/core/fonts.js b/src/core/fonts.js index 391ce45db..00a721aa9 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -3158,10 +3158,6 @@ var Font = (function FontClosure() { }, get spaceWidth() { - if ("_shadowWidth" in this) { - return this._shadowWidth; - } - // trying to estimate space character width var possibleSpaceReplacements = ["space", "minus", "one", "i", "I"]; var width; @@ -3176,10 +3172,8 @@ var Font = (function FontClosure() { var glyphUnicode = glyphsUnicodeMap[glyphName]; // finding the charcode via unicodeToCID map var charcode = 0; - if (this.composite) { - if (this.cMap.contains(glyphUnicode)) { - charcode = this.cMap.lookup(glyphUnicode); - } + if (this.composite && this.cMap.contains(glyphUnicode)) { + charcode = this.cMap.lookup(glyphUnicode); } // ... via toUnicode map if (!charcode && this.toUnicode) { @@ -3196,10 +3190,7 @@ var Font = (function FontClosure() { } } width = width || this.defaultWidth; - // Do not shadow the property here. See discussion: - // https://github.com/mozilla/pdf.js/pull/2127#discussion_r1662280 - this._shadowWidth = width; - return width; + return shadow(this, "spaceWidth", width); }, charToGlyph: function Font_charToGlyph(charcode, isSpace) { From 59f54b946d4fb54ce7cbad89f9b7e8fb7d61b473 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 1 Apr 2020 13:40:35 +0200 Subject: [PATCH 4/4] Ensure that all `Font` instances have the `vertical` property set to a boolean Given that the `vertical` property is always accessed on the main-thread, ensuring that the property is explicitly defined seems like the correct thing to do since it also avoids boolean casting elsewhere in the code-base. --- src/core/evaluator.js | 2 +- src/core/fonts.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index b06e8b0a1..ec05ce27c 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1761,7 +1761,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { fontFamily: font.fallbackName, ascent: font.ascent, descent: font.descent, - vertical: !!font.vertical, + vertical: font.vertical, }; } textContentItem.fontName = font.loadedName; diff --git a/src/core/fonts.js b/src/core/fonts.js index 00a721aa9..81136c650 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -607,7 +607,7 @@ var Font = (function FontClosure() { } this.cidEncoding = properties.cidEncoding; - this.vertical = properties.vertical; + this.vertical = !!properties.vertical; if (this.vertical) { this.vmetrics = properties.vmetrics; this.defaultVMetrics = properties.defaultVMetrics;