From c8868a1c7a8e70ffc5844799f4e8273690699224 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 5 Nov 2022 10:04:31 +0100 Subject: [PATCH] [api-minor] Initialize the unicode-category *lazily* on the `Glyph`-instance The purpose of this patch is twofold: - Initialize the unicode-category data *lazily* during text-extraction, since this is completely unused during general parsing/rendering. - Stop exposing this data in the API, since it's unused on the main-thread and it seems like it was *accidentally* included. Obviously these changes are API-observable, but hopefully no user is depending on this. Furthermore, it's trivial for a user to re-create this unicode-category data manually with a regular expression (from the exposed `unicode` property). --- src/core/evaluator.js | 8 +++++--- src/core/fonts.js | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 84d8950d3..ec615d972 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2775,7 +2775,9 @@ class PartialEvaluator { for (let i = 0, ii = glyphs.length; i < ii; i++) { const glyph = glyphs[i]; - if (glyph.isInvisibleFormatMark) { + const { category } = glyph; + + if (category.isInvisibleFormatMark) { continue; } let charSpacing = @@ -2787,7 +2789,7 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; - if (glyph.isWhitespace) { + if (category.isWhitespace) { // Don't push a " " in the textContentItem // (except when it's between two non-spaces chars), // it will be done (if required) in next call to @@ -2815,7 +2817,7 @@ class PartialEvaluator { // Must be called after compareWithLastPosition because // the textContentItem could have been flushed. const textChunk = ensureTextContentItem(); - if (glyph.isZeroWidthDiacritic) { + if (category.isZeroWidthDiacritic) { scaledDim = 0; } diff --git a/src/core/fonts.js b/src/core/fonts.js index e44cfe863..fc56ea055 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -214,11 +214,20 @@ class Glyph { this.operatorListId = operatorListId; this.isSpace = isSpace; this.isInFont = isInFont; + } - const category = getCharUnicodeCategory(unicode); - this.isWhitespace = category.isWhitespace; - this.isZeroWidthDiacritic = category.isZeroWidthDiacritic; - this.isInvisibleFormatMark = category.isInvisibleFormatMark; + /** + * This property, which is only used by `PartialEvaluator.getTextContent`, + * is purposely made non-serializable. + * @type {Object} + */ + get category() { + return shadow( + this, + "category", + getCharUnicodeCategory(this.unicode), + /* nonSerializable = */ true + ); } /**