diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 606208d8f..650f9125a 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2175,7 +2175,6 @@ class PartialEvaluator { stateManager = stateManager || new StateManager(new TextState()); const WhitespaceRegexp = /\s/g; - const DiacriticRegExp = new RegExp("^\\p{Mn}$", "u"); const NormalizedUnicodes = getNormalizedUnicodes(); const textContent = { @@ -2480,7 +2479,7 @@ class PartialEvaluator { return; } - if (Math.abs(advanceX) > textContentItem.height) { + if (Math.abs(advanceX) > textContentItem.width) { appendEOL(); return; } @@ -2576,6 +2575,7 @@ class PartialEvaluator { const glyphs = font.charsToGlyphs(chars); const scale = textState.fontMatrix[0] * textState.fontSize; + for (let i = 0, ii = glyphs.length; i < ii; i++) { const glyph = glyphs[i]; let charSpacing = @@ -2587,13 +2587,12 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; - let glyphUnicode = glyph.unicode; if ( - glyphUnicode === " " && + glyph.isWhitespace && (i === 0 || i + 1 === ii || - glyphs[i - 1].unicode === " " || - glyphs[i + 1].unicode === " " || + glyphs[i - 1].isWhitespace || + glyphs[i + 1].isWhitespace || extraSpacing) ) { // Don't push a " " in the textContentItem @@ -2616,10 +2615,12 @@ class PartialEvaluator { compareWithLastPosition(); + let glyphUnicode = glyph.unicode; + // Must be called after compareWithLastPosition because // the textContentItem could have been flushed. const textChunk = ensureTextContentItem(); - if (DiacriticRegExp.test(glyph.unicode)) { + if (glyph.isDiacritic) { scaledDim = 0; } diff --git a/src/core/fonts.js b/src/core/fonts.js index 5946b6d49..3e20bbc18 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -34,6 +34,12 @@ import { recoverGlyphName, SEAC_ANALYSIS_ENABLED, } from "./fonts_utils.js"; +import { + getCharUnicodeCategory, + getUnicodeForGlyph, + getUnicodeRangeFor, + mapSpecialUnicodeValues, +} from "./unicode.js"; import { getDingbatsGlyphsUnicode, getGlyphsUnicode } from "./glyphlist.js"; import { getEncoding, @@ -50,11 +56,6 @@ import { getSupplementalGlyphMapForArialBlack, getSupplementalGlyphMapForCalibri, } from "./standard_fonts.js"; -import { - getUnicodeForGlyph, - getUnicodeRangeFor, - mapSpecialUnicodeValues, -} from "./unicode.js"; import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js"; import { CFFFont } from "./cff_font.js"; import { FontRendererFactory } from "./font_renderer.js"; @@ -212,6 +213,10 @@ class Glyph { this.operatorListId = operatorListId; this.isSpace = isSpace; this.isInFont = isInFont; + + const category = getCharUnicodeCategory(unicode); + this.isWhitespace = category.isWhitespace; + this.isDiacritic = category.isDiacritic; } matchesForCache( diff --git a/src/core/unicode.js b/src/core/unicode.js index b9623308c..71c1c8d4f 100644 --- a/src/core/unicode.js +++ b/src/core/unicode.js @@ -1640,7 +1640,17 @@ function reverseIfRtl(chars) { return buf.join(""); } +const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})$", "u"); +function getCharUnicodeCategory(char) { + const groups = char.match(SpecialCharRegExp); + return { + isWhitespace: !!(groups && groups[1]), + isDiacritic: !!(groups && groups[2]), + }; +} + export { + getCharUnicodeCategory, getNormalizedUnicodes, getUnicodeForGlyph, getUnicodeRangeFor,