From 9bb636402a1c1d2eedf458e78deb98fc9739c848 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 7 Jan 2022 14:02:28 +0100 Subject: [PATCH 1/2] Use the correct dimension to know if we have to add an EOL in vertical mode --- src/core/evaluator.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 3e0dd8492..6a0d54035 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2476,7 +2476,7 @@ class PartialEvaluator { return; } - if (Math.abs(advanceX) > textContentItem.height) { + if (Math.abs(advanceX) > textContentItem.width) { appendEOL(); return; } From 9dae421a0d2e060fd0b066cc8ae4d5e58e70266f Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 7 Jan 2022 21:20:53 +0100 Subject: [PATCH 2/2] Handle all the whitespaces the same way when creating text chunks --- src/core/evaluator.js | 13 +++++++------ src/core/fonts.js | 15 ++++++++++----- src/core/unicode.js | 10 ++++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 6a0d54035..a189281e0 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2171,7 +2171,6 @@ class PartialEvaluator { stateManager = stateManager || new StateManager(new TextState()); const WhitespaceRegexp = /\s/g; - const DiacriticRegExp = new RegExp("^\\p{Mn}$", "u"); const NormalizedUnicodes = getNormalizedUnicodes(); const textContent = { @@ -2572,6 +2571,7 @@ class PartialEvaluator { const glyphs = font.charsToGlyphs(chars); const scale = textState.fontMatrix[0] * textState.fontSize; + for (let i = 0, ii = glyphs.length; i < ii; i++) { const glyph = glyphs[i]; let charSpacing = @@ -2583,13 +2583,12 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; - let glyphUnicode = glyph.unicode; if ( - glyphUnicode === " " && + glyph.isWhitespace && (i === 0 || i + 1 === ii || - glyphs[i - 1].unicode === " " || - glyphs[i + 1].unicode === " " || + glyphs[i - 1].isWhitespace || + glyphs[i + 1].isWhitespace || extraSpacing) ) { // Don't push a " " in the textContentItem @@ -2612,10 +2611,12 @@ class PartialEvaluator { compareWithLastPosition(); + let glyphUnicode = glyph.unicode; + // Must be called after compareWithLastPosition because // the textContentItem could have been flushed. const textChunk = ensureTextContentItem(); - if (DiacriticRegExp.test(glyph.unicode)) { + if (glyph.isDiacritic) { scaledDim = 0; } diff --git a/src/core/fonts.js b/src/core/fonts.js index 5946b6d49..3e20bbc18 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -34,6 +34,12 @@ import { recoverGlyphName, SEAC_ANALYSIS_ENABLED, } from "./fonts_utils.js"; +import { + getCharUnicodeCategory, + getUnicodeForGlyph, + getUnicodeRangeFor, + mapSpecialUnicodeValues, +} from "./unicode.js"; import { getDingbatsGlyphsUnicode, getGlyphsUnicode } from "./glyphlist.js"; import { getEncoding, @@ -50,11 +56,6 @@ import { getSupplementalGlyphMapForArialBlack, getSupplementalGlyphMapForCalibri, } from "./standard_fonts.js"; -import { - getUnicodeForGlyph, - getUnicodeRangeFor, - mapSpecialUnicodeValues, -} from "./unicode.js"; import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js"; import { CFFFont } from "./cff_font.js"; import { FontRendererFactory } from "./font_renderer.js"; @@ -212,6 +213,10 @@ class Glyph { this.operatorListId = operatorListId; this.isSpace = isSpace; this.isInFont = isInFont; + + const category = getCharUnicodeCategory(unicode); + this.isWhitespace = category.isWhitespace; + this.isDiacritic = category.isDiacritic; } matchesForCache( diff --git a/src/core/unicode.js b/src/core/unicode.js index b9623308c..71c1c8d4f 100644 --- a/src/core/unicode.js +++ b/src/core/unicode.js @@ -1640,7 +1640,17 @@ function reverseIfRtl(chars) { return buf.join(""); } +const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})$", "u"); +function getCharUnicodeCategory(char) { + const groups = char.match(SpecialCharRegExp); + return { + isWhitespace: !!(groups && groups[1]), + isDiacritic: !!(groups && groups[2]), + }; +} + export { + getCharUnicodeCategory, getNormalizedUnicodes, getUnicodeForGlyph, getUnicodeRangeFor,