From 117bbf7cd9ff624607f37a4f106499171eaabb73 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 23 Mar 2023 10:15:14 +0100 Subject: [PATCH] [api-minor] Don't normalize the text used in the text layer. Some arabic chars like \ufe94 could be searched in a pdf, hence it must be normalized when creating the search query. So to avoid to duplicate the normalization code, everything is moved in the find controller. The previous code to normalize text was using NFKC but with a hardcoded map, hence it has been replaced by the use of normalize("NFKC") (it helps to reduce the bundle size by 30kb). In playing with this \ufe94 char, I noticed that the bidi algorithm wasn't taking into account some RTL unicode ranges, the generated font wasn't embedding the mapping this char and the unicode ranges in the OS/2 table weren't up-to-date. When normalized some chars can be replaced by several ones and it induced to have some extra chars in the text layer. To avoid any regression, when copying some text from the text layer, a copied string is normalized (NFKC) before being put in the clipboard (it works like this in either Acrobat or Chrome). --- src/core/bidi.js | 6 +- src/core/document.js | 9 +- src/core/evaluator.js | 10 +- src/core/fonts.js | 32 +- src/core/unicode.js | 1685 +++---------------------- src/core/worker.js | 3 +- src/display/api.js | 8 +- src/pdf.js | 2 + src/shared/util.js | 20 + test/driver.js | 1 + test/integration/copy_paste_spec.js | 45 +- test/pdfs/.gitignore | 1 + test/pdfs/copy_paste_ligatures.pdf | Bin 0 -> 32408 bytes test/unit/api_spec.js | 78 +- test/unit/pdf_find_controller_spec.js | 71 +- test/unit/unicode_spec.js | 65 +- web/pdf_find_controller.js | 12 +- web/pdf_find_utils.js | 44 +- web/pdf_page_view.js | 1 + web/pdf_viewer.js | 2 + web/text_highlighter.js | 11 + web/text_layer_builder.js | 13 +- 22 files changed, 447 insertions(+), 1672 deletions(-) create mode 100755 test/pdfs/copy_paste_ligatures.pdf diff --git a/src/core/bidi.js b/src/core/bidi.js index ece975154..2596748e0 100644 --- a/src/core/bidi.js +++ b/src/core/bidi.js @@ -147,7 +147,11 @@ function bidi(str, startLevel = -1, vertical = false) { if (!charType) { warn("Bidi: invalid Unicode character " + charCode.toString(16)); } - } else if (0x0700 <= charCode && charCode <= 0x08ac) { + } else if ( + (0x0700 <= charCode && charCode <= 0x08ac) || + (0xfb50 <= charCode && charCode <= 0xfdff) || + (0xfe70 <= charCode && charCode <= 0xfeff) + ) { charType = "AL"; } if (charType === "R" || charType === "AL" || charType === "AN") { diff --git a/src/core/document.js b/src/core/document.js index 6e4ca7cff..77c552ea4 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -511,7 +511,13 @@ class Page { }); } - extractTextContent({ handler, task, includeMarkedContent, sink }) { + extractTextContent({ + handler, + task, + includeMarkedContent, + disableNormalization, + sink, + }) { const contentStreamPromise = this.getContentStream(); const resourcesPromise = this.loadResources([ "ExtGState", @@ -539,6 +545,7 @@ class Page { task, resources: this.resources, includeMarkedContent, + disableNormalization, sink, viewBox: this.view, }); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 4354130dd..ddf6bbbe7 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -24,6 +24,7 @@ import { IDENTITY_MATRIX, info, isArrayEqual, + normalizeUnicode, OPS, shadow, stringToPDFString, @@ -2271,6 +2272,7 @@ class PartialEvaluator { seenStyles = new Set(), viewBox, markedContentData = null, + disableNormalization = false, }) { // Ensure that `resources`/`stateManager` is correctly initialized, // even if the provided parameter is e.g. `null`. @@ -2524,7 +2526,10 @@ class PartialEvaluator { } function runBidiTransform(textChunk) { - const text = textChunk.str.join(""); + let text = textChunk.str.join(""); + if (!disableNormalization) { + text = normalizeUnicode(text); + } const bidiResult = bidi(text, -1, textChunk.vertical); return { str: bidiResult.str, @@ -2859,7 +2864,7 @@ class PartialEvaluator { textChunk.prevTransform = getCurrentTextTransform(); } - const glyphUnicode = glyph.normalizedUnicode; + const glyphUnicode = glyph.unicode; if (saveLastChar(glyphUnicode)) { // The two last chars are a non-whitespace followed by a whitespace // and then this non-whitespace, so we insert a whitespace here. @@ -3242,6 +3247,7 @@ class PartialEvaluator { seenStyles, viewBox, markedContentData, + disableNormalization, }) .then(function () { if (!sinkWrapper.enqueueInvoked) { diff --git a/src/core/fonts.js b/src/core/fonts.js index 37b91682a..ad463dce2 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -33,11 +33,9 @@ import { } from "./fonts_utils.js"; import { getCharUnicodeCategory, - getNormalizedUnicodes, getUnicodeForGlyph, getUnicodeRangeFor, mapSpecialUnicodeValues, - reverseIfRtl, } from "./unicode.js"; import { getDingbatsGlyphsUnicode, getGlyphsUnicode } from "./glyphlist.js"; import { @@ -277,24 +275,6 @@ class Glyph { /* nonSerializable = */ true ); } - - /** - * This property, which is only used by `PartialEvaluator.getTextContent`, - * is purposely made non-serializable. - * @type {string} - */ - get normalizedUnicode() { - return shadow( - this, - "normalizedUnicode", - reverseIfRtl(Glyph._NormalizedUnicodes[this.unicode] || this.unicode), - /* nonSerializable = */ true - ); - } - - static get _NormalizedUnicodes() { - return shadow(this, "_NormalizedUnicodes", getNormalizedUnicodes()); - } } function int16(b0, b1) { @@ -507,6 +487,9 @@ function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId, toUnicode) { const privateUseOffetStart = PRIVATE_USE_AREAS[privateUseAreaIndex][0]; let nextAvailableFontCharCode = privateUseOffetStart; let privateUseOffetEnd = PRIVATE_USE_AREAS[privateUseAreaIndex][1]; + const isInPrivateArea = code => + (PRIVATE_USE_AREAS[0][0] <= code && code <= PRIVATE_USE_AREAS[0][1]) || + (PRIVATE_USE_AREAS[1][0] <= code && code <= PRIVATE_USE_AREAS[1][1]); for (let originalCharCode in charCodeToGlyphId) { originalCharCode |= 0; let glyphId = charCodeToGlyphId[originalCharCode]; @@ -539,11 +522,7 @@ function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId, toUnicode) { if (typeof unicode === "string") { unicode = unicode.codePointAt(0); } - if ( - unicode && - unicode < privateUseOffetStart && - !usedGlyphIds.has(glyphId) - ) { + if (unicode && !isInPrivateArea(unicode) && !usedGlyphIds.has(glyphId)) { toUnicodeExtraMap.set(unicode, glyphId); usedGlyphIds.add(glyphId); } @@ -785,6 +764,7 @@ function createOS2Table(properties, charstrings, override) { let firstCharIndex = null; let lastCharIndex = 0; + let position = -1; if (charstrings) { for (let code in charstrings) { @@ -796,7 +776,7 @@ function createOS2Table(properties, charstrings, override) { lastCharIndex = code; } - const position = getUnicodeRangeFor(code); + position = getUnicodeRangeFor(code, position); if (position < 32) { ulUnicodeRange1 |= 1 << position; } else if (position < 64) { diff --git a/src/core/unicode.js b/src/core/unicode.js index 6116fab6d..42fe49b85 100644 --- a/src/core/unicode.js +++ b/src/core/unicode.js @@ -14,10 +14,7 @@ */ /* no-babel-preset */ -import { - getArrayLookupTableFactory, - getLookupTableFactory, -} from "./core_utils.js"; +import { getLookupTableFactory } from "./core_utils.js"; // Some characters, e.g. copyrightserif, are mapped to the private use area // and might not be displayed using standard fonts. Mapping/hacking well-known @@ -94,1552 +91,158 @@ function getUnicodeForGlyph(name, glyphsUnicodeMap) { return -1; } +// See https://learn.microsoft.com/en-us/typography/opentype/spec/os2#ulunicoderange1-bits-031ulunicoderange2-bits-3263ulunicoderange3-bits-6495ulunicoderange4-bits-96127 const UnicodeRanges = [ - { begin: 0x0000, end: 0x007f }, // Basic Latin - { begin: 0x0080, end: 0x00ff }, // Latin-1 Supplement - { begin: 0x0100, end: 0x017f }, // Latin Extended-A - { begin: 0x0180, end: 0x024f }, // Latin Extended-B - { begin: 0x0250, end: 0x02af }, // IPA Extensions - { begin: 0x02b0, end: 0x02ff }, // Spacing Modifier Letters - { begin: 0x0300, end: 0x036f }, // Combining Diacritical Marks - { begin: 0x0370, end: 0x03ff }, // Greek and Coptic - { begin: 0x2c80, end: 0x2cff }, // Coptic - { begin: 0x0400, end: 0x04ff }, // Cyrillic - { begin: 0x0530, end: 0x058f }, // Armenian - { begin: 0x0590, end: 0x05ff }, // Hebrew - { begin: 0xa500, end: 0xa63f }, // Vai - { begin: 0x0600, end: 0x06ff }, // Arabic - { begin: 0x07c0, end: 0x07ff }, // NKo - { begin: 0x0900, end: 0x097f }, // Devanagari - { begin: 0x0980, end: 0x09ff }, // Bengali - { begin: 0x0a00, end: 0x0a7f }, // Gurmukhi - { begin: 0x0a80, end: 0x0aff }, // Gujarati - { begin: 0x0b00, end: 0x0b7f }, // Oriya - { begin: 0x0b80, end: 0x0bff }, // Tamil - { begin: 0x0c00, end: 0x0c7f }, // Telugu - { begin: 0x0c80, end: 0x0cff }, // Kannada - { begin: 0x0d00, end: 0x0d7f }, // Malayalam - { begin: 0x0e00, end: 0x0e7f }, // Thai - { begin: 0x0e80, end: 0x0eff }, // Lao - { begin: 0x10a0, end: 0x10ff }, // Georgian - { begin: 0x1b00, end: 0x1b7f }, // Balinese - { begin: 0x1100, end: 0x11ff }, // Hangul Jamo - { begin: 0x1e00, end: 0x1eff }, // Latin Extended Additional - { begin: 0x1f00, end: 0x1fff }, // Greek Extended - { begin: 0x2000, end: 0x206f }, // General Punctuation - { begin: 0x2070, end: 0x209f }, // Superscripts And Subscripts - { begin: 0x20a0, end: 0x20cf }, // Currency Symbol - { begin: 0x20d0, end: 0x20ff }, // Combining Diacritical Marks - { begin: 0x2100, end: 0x214f }, // Letterlike Symbols - { begin: 0x2150, end: 0x218f }, // Number Forms - { begin: 0x2190, end: 0x21ff }, // Arrows - { begin: 0x2200, end: 0x22ff }, // Mathematical Operators - { begin: 0x2300, end: 0x23ff }, // Miscellaneous Technical - { begin: 0x2400, end: 0x243f }, // Control Pictures - { begin: 0x2440, end: 0x245f }, // Optical Character Recognition - { begin: 0x2460, end: 0x24ff }, // Enclosed Alphanumerics - { begin: 0x2500, end: 0x257f }, // Box Drawing - { begin: 0x2580, end: 0x259f }, // Block Elements - { begin: 0x25a0, end: 0x25ff }, // Geometric Shapes - { begin: 0x2600, end: 0x26ff }, // Miscellaneous Symbols - { begin: 0x2700, end: 0x27bf }, // Dingbats - { begin: 0x3000, end: 0x303f }, // CJK Symbols And Punctuation - { begin: 0x3040, end: 0x309f }, // Hiragana - { begin: 0x30a0, end: 0x30ff }, // Katakana - { begin: 0x3100, end: 0x312f }, // Bopomofo - { begin: 0x3130, end: 0x318f }, // Hangul Compatibility Jamo - { begin: 0xa840, end: 0xa87f }, // Phags-pa - { begin: 0x3200, end: 0x32ff }, // Enclosed CJK Letters And Months - { begin: 0x3300, end: 0x33ff }, // CJK Compatibility - { begin: 0xac00, end: 0xd7af }, // Hangul Syllables - { begin: 0xd800, end: 0xdfff }, // Non-Plane 0 * - { begin: 0x10900, end: 0x1091f }, // Phoenicia - { begin: 0x4e00, end: 0x9fff }, // CJK Unified Ideographs - { begin: 0xe000, end: 0xf8ff }, // Private Use Area (plane 0) - { begin: 0x31c0, end: 0x31ef }, // CJK Strokes - { begin: 0xfb00, end: 0xfb4f }, // Alphabetic Presentation Forms - { begin: 0xfb50, end: 0xfdff }, // Arabic Presentation Forms-A - { begin: 0xfe20, end: 0xfe2f }, // Combining Half Marks - { begin: 0xfe10, end: 0xfe1f }, // Vertical Forms - { begin: 0xfe50, end: 0xfe6f }, // Small Form Variants - { begin: 0xfe70, end: 0xfeff }, // Arabic Presentation Forms-B - { begin: 0xff00, end: 0xffef }, // Halfwidth And Fullwidth Forms - { begin: 0xfff0, end: 0xffff }, // Specials - { begin: 0x0f00, end: 0x0fff }, // Tibetan - { begin: 0x0700, end: 0x074f }, // Syriac - { begin: 0x0780, end: 0x07bf }, // Thaana - { begin: 0x0d80, end: 0x0dff }, // Sinhala - { begin: 0x1000, end: 0x109f }, // Myanmar - { begin: 0x1200, end: 0x137f }, // Ethiopic - { begin: 0x13a0, end: 0x13ff }, // Cherokee - { begin: 0x1400, end: 0x167f }, // Unified Canadian Aboriginal Syllabics - { begin: 0x1680, end: 0x169f }, // Ogham - { begin: 0x16a0, end: 0x16ff }, // Runic - { begin: 0x1780, end: 0x17ff }, // Khmer - { begin: 0x1800, end: 0x18af }, // Mongolian - { begin: 0x2800, end: 0x28ff }, // Braille Patterns - { begin: 0xa000, end: 0xa48f }, // Yi Syllables - { begin: 0x1700, end: 0x171f }, // Tagalog - { begin: 0x10300, end: 0x1032f }, // Old Italic - { begin: 0x10330, end: 0x1034f }, // Gothic - { begin: 0x10400, end: 0x1044f }, // Deseret - { begin: 0x1d000, end: 0x1d0ff }, // Byzantine Musical Symbols - { begin: 0x1d400, end: 0x1d7ff }, // Mathematical Alphanumeric Symbols - { begin: 0xff000, end: 0xffffd }, // Private Use (plane 15) - { begin: 0xfe00, end: 0xfe0f }, // Variation Selectors - { begin: 0xe0000, end: 0xe007f }, // Tags - { begin: 0x1900, end: 0x194f }, // Limbu - { begin: 0x1950, end: 0x197f }, // Tai Le - { begin: 0x1980, end: 0x19df }, // New Tai Lue - { begin: 0x1a00, end: 0x1a1f }, // Buginese - { begin: 0x2c00, end: 0x2c5f }, // Glagolitic - { begin: 0x2d30, end: 0x2d7f }, // Tifinagh - { begin: 0x4dc0, end: 0x4dff }, // Yijing Hexagram Symbols - { begin: 0xa800, end: 0xa82f }, // Syloti Nagri - { begin: 0x10000, end: 0x1007f }, // Linear B Syllabary - { begin: 0x10140, end: 0x1018f }, // Ancient Greek Numbers - { begin: 0x10380, end: 0x1039f }, // Ugaritic - { begin: 0x103a0, end: 0x103df }, // Old Persian - { begin: 0x10450, end: 0x1047f }, // Shavian - { begin: 0x10480, end: 0x104af }, // Osmanya - { begin: 0x10800, end: 0x1083f }, // Cypriot Syllabary - { begin: 0x10a00, end: 0x10a5f }, // Kharoshthi - { begin: 0x1d300, end: 0x1d35f }, // Tai Xuan Jing Symbols - { begin: 0x12000, end: 0x123ff }, // Cuneiform - { begin: 0x1d360, end: 0x1d37f }, // Counting Rod Numerals - { begin: 0x1b80, end: 0x1bbf }, // Sundanese - { begin: 0x1c00, end: 0x1c4f }, // Lepcha - { begin: 0x1c50, end: 0x1c7f }, // Ol Chiki - { begin: 0xa880, end: 0xa8df }, // Saurashtra - { begin: 0xa900, end: 0xa92f }, // Kayah Li - { begin: 0xa930, end: 0xa95f }, // Rejang - { begin: 0xaa00, end: 0xaa5f }, // Cham - { begin: 0x10190, end: 0x101cf }, // Ancient Symbols - { begin: 0x101d0, end: 0x101ff }, // Phaistos Disc - { begin: 0x102a0, end: 0x102df }, // Carian - { begin: 0x1f030, end: 0x1f09f }, // Domino Tiles + [0x0000, 0x007f], // 0 - Basic Latin + [0x0080, 0x00ff], // 1 - Latin-1 Supplement + [0x0100, 0x017f], // 2 - Latin Extended-A + [0x0180, 0x024f], // 3 - Latin Extended-B + [0x0250, 0x02af, 0x1d00, 0x1d7f, 0x1d80, 0x1dbf], // 4 - IPA Extensions - Phonetic Extensions - Phonetic Extensions Supplement + [0x02b0, 0x02ff, 0xa700, 0xa71f], // 5 - Spacing Modifier Letters - Modifier Tone Letters + [0x0300, 0x036f, 0x1dc0, 0x1dff], // 6 - Combining Diacritical Marks - Combining Diacritical Marks Supplement + [0x0370, 0x03ff], // 7 - Greek and Coptic + [0x2c80, 0x2cff], // 8 - Coptic + [0x0400, 0x04ff, 0x0500, 0x052f, 0x2de0, 0x2dff, 0xa640, 0xa69f], // 9 - Cyrillic - Cyrillic Supplement - Cyrillic Extended-A - Cyrillic Extended-B + [0x0530, 0x058f], // 10 - Armenian + [0x0590, 0x05ff], // 11 - Hebrew + [0xa500, 0xa63f], // 12 - Vai + [0x0600, 0x06ff, 0x0750, 0x077f], // 13 - Arabic - Arabic Supplement + [0x07c0, 0x07ff], // 14 - NKo + [0x0900, 0x097f], // 15 - Devanagari + [0x0980, 0x09ff], // 16 - Bengali + [0x0a00, 0x0a7f], // 17 - Gurmukhi + [0x0a80, 0x0aff], // 18 - Gujarati + [0x0b00, 0x0b7f], // 19 - Oriya + [0x0b80, 0x0bff], // 20 - Tamil + [0x0c00, 0x0c7f], // 21 - Telugu + [0x0c80, 0x0cff], // 22 - Kannada + [0x0d00, 0x0d7f], // 23 - Malayalam + [0x0e00, 0x0e7f], // 24 - Thai + [0x0e80, 0x0eff], // 25 - Lao + [0x10a0, 0x10ff, 0x2d00, 0x2d2f], // 26 - Georgian - Georgian Supplement + [0x1b00, 0x1b7f], // 27 - Balinese + [0x1100, 0x11ff], // 28 - Hangul Jamo + [0x1e00, 0x1eff, 0x2c60, 0x2c7f, 0xa720, 0xa7ff], // 29 - Latin Extended Additional - Latin Extended-C - Latin Extended-D + [0x1f00, 0x1fff], // 30 - Greek Extended + [0x2000, 0x206f, 0x2e00, 0x2e7f], // 31 - General Punctuation - Supplemental Punctuation + [0x2070, 0x209f], // 32 - Superscripts And Subscripts + [0x20a0, 0x20cf], // 33 - Currency Symbol + [0x20d0, 0x20ff], // 34 - Combining Diacritical Marks + [0x2100, 0x214f], // 35 - Letterlike Symbols + [0x2150, 0x218f], // 36 - Number Forms + [0x2190, 0x21ff, 0x27f0, 0x27ff, 0x2900, 0x297f, 0x2b00, 0x2bff], // 37 - Arrows - Supplemental Arrows-A - Supplemental Arrows-B - Miscellaneous Symbols and Arrows + [0x2200, 0x22ff, 0x2a00, 0x2aff, 0x27c0, 0x27ef, 0x2980, 0x29ff], // 38 - Mathematical Operators - Supplemental Mathematical Operators - Miscellaneous Mathematical Symbols-A - Miscellaneous Mathematical Symbols-B + [0x2300, 0x23ff], // 39 - Miscellaneous Technical + [0x2400, 0x243f], // 40 - Control Pictures + [0x2440, 0x245f], // 41 - Optical Character Recognition + [0x2460, 0x24ff], // 42 - Enclosed Alphanumerics + [0x2500, 0x257f], // 43 - Box Drawing + [0x2580, 0x259f], // 44 - Block Elements + [0x25a0, 0x25ff], // 45 - Geometric Shapes + [0x2600, 0x26ff], // 46 - Miscellaneous Symbols + [0x2700, 0x27bf], // 47 - Dingbats + [0x3000, 0x303f], // 48 - CJK Symbols And Punctuation + [0x3040, 0x309f], // 49 - Hiragana + [0x30a0, 0x30ff, 0x31f0, 0x31ff], // 50 - Katakana - Katakana Phonetic Extensions + [0x3100, 0x312f, 0x31a0, 0x31bf], // 51 - Bopomofo - Bopomofo Extended + [0x3130, 0x318f], // 52 - Hangul Compatibility Jamo + [0xa840, 0xa87f], // 53 - Phags-pa + [0x3200, 0x32ff], // 54 - Enclosed CJK Letters And Months + [0x3300, 0x33ff], // 55 - CJK Compatibility + [0xac00, 0xd7af], // 56 - Hangul Syllables + [0xd800, 0xdfff], // 57 - Non-Plane 0 * + [0x10900, 0x1091f], // 58 - Phoenicia + [ + 0x4e00, 0x9fff, 0x2e80, 0x2eff, 0x2f00, 0x2fdf, 0x2ff0, 0x2fff, 0x3400, + 0x4dbf, 0x20000, 0x2a6df, 0x3190, 0x319f, + ], // 59 - CJK Unified Ideographs - CJK Radicals Supplement - Kangxi Radicals - Ideographic Description Characters - CJK Unified Ideographs Extension A - CJK Unified Ideographs Extension B - Kanbun + [0xe000, 0xf8ff], // 60 - Private Use Area (plane 0) + [0x31c0, 0x31ef, 0xf900, 0xfaff, 0x2f800, 0x2fa1f], // 61 - CJK Strokes - CJK Compatibility Ideographs - CJK Compatibility Ideographs Supplement + [0xfb00, 0xfb4f], // 62 - Alphabetic Presentation Forms + [0xfb50, 0xfdff], // 63 - Arabic Presentation Forms-A + [0xfe20, 0xfe2f], // 64 - Combining Half Marks + [0xfe10, 0xfe1f], // 65 - Vertical Forms + [0xfe50, 0xfe6f], // 66 - Small Form Variants + [0xfe70, 0xfeff], // 67 - Arabic Presentation Forms-B + [0xff00, 0xffef], // 68 - Halfwidth And Fullwidth Forms + [0xfff0, 0xffff], // 69 - Specials + [0x0f00, 0x0fff], // 70 - Tibetan + [0x0700, 0x074f], // 71 - Syriac + [0x0780, 0x07bf], // 72 - Thaana + [0x0d80, 0x0dff], // 73 - Sinhala + [0x1000, 0x109f], // 74 - Myanmar + [0x1200, 0x137f, 0x1380, 0x139f, 0x2d80, 0x2ddf], // 75 - Ethiopic - Ethiopic Supplement - Ethiopic Extended + [0x13a0, 0x13ff], // 76 - Cherokee + [0x1400, 0x167f], // 77 - Unified Canadian Aboriginal Syllabics + [0x1680, 0x169f], // 78 - Ogham + [0x16a0, 0x16ff], // 79 - Runic + [0x1780, 0x17ff], // 80 - Khmer + [0x1800, 0x18af], // 81 - Mongolian + [0x2800, 0x28ff], // 82 - Braille Patterns + [0xa000, 0xa48f], // 83 - Yi Syllables + [0x1700, 0x171f, 0x1720, 0x173f, 0x1740, 0x175f, 0x1760, 0x177f], // 84 - Tagalog - Hanunoo - Buhid - Tagbanwa + [0x10300, 0x1032f], // 85 - Old Italic + [0x10330, 0x1034f], // 86 - Gothic + [0x10400, 0x1044f], // 87 - Deseret + [0x1d000, 0x1d0ff, 0x1d100, 0x1d1ff, 0x1d200, 0x1d24f], // 88 - Byzantine Musical Symbols - Musical Symbols - Ancient Greek Musical Notation + [0x1d400, 0x1d7ff], // 89 - Mathematical Alphanumeric Symbols + [0xff000, 0xffffd], // 90 - Private Use (plane 15) + [0xfe00, 0xfe0f, 0xe0100, 0xe01ef], // 91 - Variation Selectors - Variation Selectors Supplement + [0xe0000, 0xe007f], // 92 - Tags + [0x1900, 0x194f], // 93 - Limbu + [0x1950, 0x197f], // 94 - Tai Le + [0x1980, 0x19df], // 95 - New Tai Lue + [0x1a00, 0x1a1f], // 96 - Buginese + [0x2c00, 0x2c5f], // 97 - Glagolitic + [0x2d30, 0x2d7f], // 98 - Tifinagh + [0x4dc0, 0x4dff], // 99 - Yijing Hexagram Symbols + [0xa800, 0xa82f], // 100 - Syloti Nagri + [0x10000, 0x1007f, 0x10080, 0x100ff, 0x10100, 0x1013f], // 101 - Linear B Syllabary - Linear B Ideograms - Aegean Numbers + [0x10140, 0x1018f], // 102 - Ancient Greek Numbers + [0x10380, 0x1039f], // 103 - Ugaritic + [0x103a0, 0x103df], // 104 - Old Persian + [0x10450, 0x1047f], // 105 - Shavian + [0x10480, 0x104af], // 106 - Osmanya + [0x10800, 0x1083f], // 107 - Cypriot Syllabary + [0x10a00, 0x10a5f], // 108 - Kharoshthi + [0x1d300, 0x1d35f], // 109 - Tai Xuan Jing Symbols + [0x12000, 0x123ff, 0x12400, 0x1247f], // 110 - Cuneiform - Cuneiform Numbers and Punctuation + [0x1d360, 0x1d37f], // 111 - Counting Rod Numerals + [0x1b80, 0x1bbf], // 112 - Sundanese + [0x1c00, 0x1c4f], // 113 - Lepcha + [0x1c50, 0x1c7f], // 114 - Ol Chiki + [0xa880, 0xa8df], // 115 - Saurashtra + [0xa900, 0xa92f], // 116 - Kayah Li + [0xa930, 0xa95f], // 117 - Rejang + [0xaa00, 0xaa5f], // 118 - Cham + [0x10190, 0x101cf], // 119 - Ancient Symbols + [0x101d0, 0x101ff], // 120 - Phaistos Disc + [0x102a0, 0x102df, 0x10280, 0x1029f, 0x10920, 0x1093f], // 121 - Carian - Lycian - Lydian + [0x1f030, 0x1f09f, 0x1f000, 0x1f02f], // 122 - Domino Tiles - Mahjong Tiles ]; -function getUnicodeRangeFor(value) { +function getUnicodeRangeFor(value, lastPosition = -1) { + // TODO: create a map range => position, sort the ranges and cache it. + // Then we can make a binary search for finding a range for a given unicode. + if (lastPosition !== -1) { + const range = UnicodeRanges[lastPosition]; + for (let i = 0, ii = range.length; i < ii; i += 2) { + if (value >= range[i] && value <= range[i + 1]) { + return lastPosition; + } + } + } for (let i = 0, ii = UnicodeRanges.length; i < ii; i++) { const range = UnicodeRanges[i]; - if (value >= range.begin && value < range.end) { - return i; + for (let j = 0, jj = range.length; j < jj; j += 2) { + if (value >= range[j] && value <= range[j + 1]) { + return i; + } } } return -1; } -function isRTLRangeFor(value) { - let range = UnicodeRanges[13]; - if (value >= range.begin && value < range.end) { - return true; - } - range = UnicodeRanges[11]; - if (value >= range.begin && value < range.end) { - return true; - } - return false; -} - -// The normalization table is obtained by filtering the Unicode characters -// database with entries. -const getNormalizedUnicodes = getArrayLookupTableFactory(function () { - // prettier-ignore - return [ - "\u00A8", "\u0020\u0308", - "\u00AF", "\u0020\u0304", - "\u00B4", "\u0020\u0301", - "\u00B5", "\u03BC", - "\u00B8", "\u0020\u0327", - "\u0132", "\u0049\u004A", - "\u0133", "\u0069\u006A", - "\u013F", "\u004C\u00B7", - "\u0140", "\u006C\u00B7", - "\u0149", "\u02BC\u006E", - "\u017F", "\u0073", - "\u01C4", "\u0044\u017D", - "\u01C5", "\u0044\u017E", - "\u01C6", "\u0064\u017E", - "\u01C7", "\u004C\u004A", - "\u01C8", "\u004C\u006A", - "\u01C9", "\u006C\u006A", - "\u01CA", "\u004E\u004A", - "\u01CB", "\u004E\u006A", - "\u01CC", "\u006E\u006A", - "\u01F1", "\u0044\u005A", - "\u01F2", "\u0044\u007A", - "\u01F3", "\u0064\u007A", - "\u02D8", "\u0020\u0306", - "\u02D9", "\u0020\u0307", - "\u02DA", "\u0020\u030A", - "\u02DB", "\u0020\u0328", - "\u02DC", "\u0020\u0303", - "\u02DD", "\u0020\u030B", - "\u037A", "\u0020\u0345", - "\u0384", "\u0020\u0301", - "\u03D0", "\u03B2", - "\u03D1", "\u03B8", - "\u03D2", "\u03A5", - "\u03D5", "\u03C6", - "\u03D6", "\u03C0", - "\u03F0", "\u03BA", - "\u03F1", "\u03C1", - "\u03F2", "\u03C2", - "\u03F4", "\u0398", - "\u03F5", "\u03B5", - "\u03F9", "\u03A3", - "\u0587", "\u0565\u0582", - "\u0675", "\u0627\u0674", - "\u0676", "\u0648\u0674", - "\u0677", "\u06C7\u0674", - "\u0678", "\u064A\u0674", - "\u0E33", "\u0E4D\u0E32", - "\u0EB3", "\u0ECD\u0EB2", - "\u0EDC", "\u0EAB\u0E99", - "\u0EDD", "\u0EAB\u0EA1", - "\u0F77", "\u0FB2\u0F81", - "\u0F79", "\u0FB3\u0F81", - "\u1E9A", "\u0061\u02BE", - "\u1FBD", "\u0020\u0313", - "\u1FBF", "\u0020\u0313", - "\u1FC0", "\u0020\u0342", - "\u1FFE", "\u0020\u0314", - "\u2002", "\u0020", - "\u2003", "\u0020", - "\u2004", "\u0020", - "\u2005", "\u0020", - "\u2006", "\u0020", - "\u2008", "\u0020", - "\u2009", "\u0020", - "\u200A", "\u0020", - "\u2017", "\u0020\u0333", - "\u2024", "\u002E", - "\u2025", "\u002E\u002E", - "\u2026", "\u002E\u002E\u002E", - "\u2033", "\u2032\u2032", - "\u2034", "\u2032\u2032\u2032", - "\u2036", "\u2035\u2035", - "\u2037", "\u2035\u2035\u2035", - "\u203C", "\u0021\u0021", - "\u203E", "\u0020\u0305", - "\u2047", "\u003F\u003F", - "\u2048", "\u003F\u0021", - "\u2049", "\u0021\u003F", - "\u2057", "\u2032\u2032\u2032\u2032", - "\u205F", "\u0020", - "\u20A8", "\u0052\u0073", - "\u2100", "\u0061\u002F\u0063", - "\u2101", "\u0061\u002F\u0073", - "\u2103", "\u00B0\u0043", - "\u2105", "\u0063\u002F\u006F", - "\u2106", "\u0063\u002F\u0075", - "\u2107", "\u0190", - "\u2109", "\u00B0\u0046", - "\u2116", "\u004E\u006F", - "\u2121", "\u0054\u0045\u004C", - "\u2135", "\u05D0", - "\u2136", "\u05D1", - "\u2137", "\u05D2", - "\u2138", "\u05D3", - "\u213B", "\u0046\u0041\u0058", - "\u2160", "\u0049", - "\u2161", "\u0049\u0049", - "\u2162", "\u0049\u0049\u0049", - "\u2163", "\u0049\u0056", - "\u2164", "\u0056", - "\u2165", "\u0056\u0049", - "\u2166", "\u0056\u0049\u0049", - "\u2167", "\u0056\u0049\u0049\u0049", - "\u2168", "\u0049\u0058", - "\u2169", "\u0058", - "\u216A", "\u0058\u0049", - "\u216B", "\u0058\u0049\u0049", - "\u216C", "\u004C", - "\u216D", "\u0043", - "\u216E", "\u0044", - "\u216F", "\u004D", - "\u2170", "\u0069", - "\u2171", "\u0069\u0069", - "\u2172", "\u0069\u0069\u0069", - "\u2173", "\u0069\u0076", - "\u2174", "\u0076", - "\u2175", "\u0076\u0069", - "\u2176", "\u0076\u0069\u0069", - "\u2177", "\u0076\u0069\u0069\u0069", - "\u2178", "\u0069\u0078", - "\u2179", "\u0078", - "\u217A", "\u0078\u0069", - "\u217B", "\u0078\u0069\u0069", - "\u217C", "\u006C", - "\u217D", "\u0063", - "\u217E", "\u0064", - "\u217F", "\u006D", - "\u222C", "\u222B\u222B", - "\u222D", "\u222B\u222B\u222B", - "\u222F", "\u222E\u222E", - "\u2230", "\u222E\u222E\u222E", - "\u2474", "\u0028\u0031\u0029", - "\u2475", "\u0028\u0032\u0029", - "\u2476", "\u0028\u0033\u0029", - "\u2477", "\u0028\u0034\u0029", - "\u2478", "\u0028\u0035\u0029", - "\u2479", "\u0028\u0036\u0029", - "\u247A", "\u0028\u0037\u0029", - "\u247B", "\u0028\u0038\u0029", - "\u247C", "\u0028\u0039\u0029", - "\u247D", "\u0028\u0031\u0030\u0029", - "\u247E", "\u0028\u0031\u0031\u0029", - "\u247F", "\u0028\u0031\u0032\u0029", - "\u2480", "\u0028\u0031\u0033\u0029", - "\u2481", "\u0028\u0031\u0034\u0029", - "\u2482", "\u0028\u0031\u0035\u0029", - "\u2483", "\u0028\u0031\u0036\u0029", - "\u2484", "\u0028\u0031\u0037\u0029", - "\u2485", "\u0028\u0031\u0038\u0029", - "\u2486", "\u0028\u0031\u0039\u0029", - "\u2487", "\u0028\u0032\u0030\u0029", - "\u2488", "\u0031\u002E", - "\u2489", "\u0032\u002E", - "\u248A", "\u0033\u002E", - "\u248B", "\u0034\u002E", - "\u248C", "\u0035\u002E", - "\u248D", "\u0036\u002E", - "\u248E", "\u0037\u002E", - "\u248F", "\u0038\u002E", - "\u2490", "\u0039\u002E", - "\u2491", "\u0031\u0030\u002E", - "\u2492", "\u0031\u0031\u002E", - "\u2493", "\u0031\u0032\u002E", - "\u2494", "\u0031\u0033\u002E", - "\u2495", "\u0031\u0034\u002E", - "\u2496", "\u0031\u0035\u002E", - "\u2497", "\u0031\u0036\u002E", - "\u2498", "\u0031\u0037\u002E", - "\u2499", "\u0031\u0038\u002E", - "\u249A", "\u0031\u0039\u002E", - "\u249B", "\u0032\u0030\u002E", - "\u249C", "\u0028\u0061\u0029", - "\u249D", "\u0028\u0062\u0029", - "\u249E", "\u0028\u0063\u0029", - "\u249F", "\u0028\u0064\u0029", - "\u24A0", "\u0028\u0065\u0029", - "\u24A1", "\u0028\u0066\u0029", - "\u24A2", "\u0028\u0067\u0029", - "\u24A3", "\u0028\u0068\u0029", - "\u24A4", "\u0028\u0069\u0029", - "\u24A5", "\u0028\u006A\u0029", - "\u24A6", "\u0028\u006B\u0029", - "\u24A7", "\u0028\u006C\u0029", - "\u24A8", "\u0028\u006D\u0029", - "\u24A9", "\u0028\u006E\u0029", - "\u24AA", "\u0028\u006F\u0029", - "\u24AB", "\u0028\u0070\u0029", - "\u24AC", "\u0028\u0071\u0029", - "\u24AD", "\u0028\u0072\u0029", - "\u24AE", "\u0028\u0073\u0029", - "\u24AF", "\u0028\u0074\u0029", - "\u24B0", "\u0028\u0075\u0029", - "\u24B1", "\u0028\u0076\u0029", - "\u24B2", "\u0028\u0077\u0029", - "\u24B3", "\u0028\u0078\u0029", - "\u24B4", "\u0028\u0079\u0029", - "\u24B5", "\u0028\u007A\u0029", - "\u2A0C", "\u222B\u222B\u222B\u222B", - "\u2A74", "\u003A\u003A\u003D", - "\u2A75", "\u003D\u003D", - "\u2A76", "\u003D\u003D\u003D", - "\u2E9F", "\u6BCD", - "\u2EF3", "\u9F9F", - "\u2F00", "\u4E00", - "\u2F01", "\u4E28", - "\u2F02", "\u4E36", - "\u2F03", "\u4E3F", - "\u2F04", "\u4E59", - "\u2F05", "\u4E85", - "\u2F06", "\u4E8C", - "\u2F07", "\u4EA0", - "\u2F08", "\u4EBA", - "\u2F09", "\u513F", - "\u2F0A", "\u5165", - "\u2F0B", "\u516B", - "\u2F0C", "\u5182", - "\u2F0D", "\u5196", - "\u2F0E", "\u51AB", - "\u2F0F", "\u51E0", - "\u2F10", "\u51F5", - "\u2F11", "\u5200", - "\u2F12", "\u529B", - "\u2F13", "\u52F9", - "\u2F14", "\u5315", - "\u2F15", "\u531A", - "\u2F16", "\u5338", - "\u2F17", "\u5341", - "\u2F18", "\u535C", - "\u2F19", "\u5369", - "\u2F1A", "\u5382", - "\u2F1B", "\u53B6", - "\u2F1C", "\u53C8", - "\u2F1D", "\u53E3", - "\u2F1E", "\u56D7", - "\u2F1F", "\u571F", - "\u2F20", "\u58EB", - "\u2F21", "\u5902", - "\u2F22", "\u590A", - "\u2F23", "\u5915", - "\u2F24", "\u5927", - "\u2F25", "\u5973", - "\u2F26", "\u5B50", - "\u2F27", "\u5B80", - "\u2F28", "\u5BF8", - "\u2F29", "\u5C0F", - "\u2F2A", "\u5C22", - "\u2F2B", "\u5C38", - "\u2F2C", "\u5C6E", - "\u2F2D", "\u5C71", - "\u2F2E", "\u5DDB", - "\u2F2F", "\u5DE5", - "\u2F30", "\u5DF1", - "\u2F31", "\u5DFE", - "\u2F32", "\u5E72", - "\u2F33", "\u5E7A", - "\u2F34", "\u5E7F", - "\u2F35", "\u5EF4", - "\u2F36", "\u5EFE", - "\u2F37", "\u5F0B", - "\u2F38", "\u5F13", - "\u2F39", "\u5F50", - "\u2F3A", "\u5F61", - "\u2F3B", "\u5F73", - "\u2F3C", "\u5FC3", - "\u2F3D", "\u6208", - "\u2F3E", "\u6236", - "\u2F3F", "\u624B", - "\u2F40", "\u652F", - "\u2F41", "\u6534", - "\u2F42", "\u6587", - "\u2F43", "\u6597", - "\u2F44", "\u65A4", - "\u2F45", "\u65B9", - "\u2F46", "\u65E0", - "\u2F47", "\u65E5", - "\u2F48", "\u66F0", - "\u2F49", "\u6708", - "\u2F4A", "\u6728", - "\u2F4B", "\u6B20", - "\u2F4C", "\u6B62", - "\u2F4D", "\u6B79", - "\u2F4E", "\u6BB3", - "\u2F4F", "\u6BCB", - "\u2F50", "\u6BD4", - "\u2F51", "\u6BDB", - "\u2F52", "\u6C0F", - "\u2F53", "\u6C14", - "\u2F54", "\u6C34", - "\u2F55", "\u706B", - "\u2F56", "\u722A", - "\u2F57", "\u7236", - "\u2F58", "\u723B", - "\u2F59", "\u723F", - "\u2F5A", "\u7247", - "\u2F5B", "\u7259", - "\u2F5C", "\u725B", - "\u2F5D", "\u72AC", - "\u2F5E", "\u7384", - "\u2F5F", "\u7389", - "\u2F60", "\u74DC", - "\u2F61", "\u74E6", - "\u2F62", "\u7518", - "\u2F63", "\u751F", - "\u2F64", "\u7528", - "\u2F65", "\u7530", - "\u2F66", "\u758B", - "\u2F67", "\u7592", - "\u2F68", "\u7676", - "\u2F69", "\u767D", - "\u2F6A", "\u76AE", - "\u2F6B", "\u76BF", - "\u2F6C", "\u76EE", - "\u2F6D", "\u77DB", - "\u2F6E", "\u77E2", - "\u2F6F", "\u77F3", - "\u2F70", "\u793A", - "\u2F71", "\u79B8", - "\u2F72", "\u79BE", - "\u2F73", "\u7A74", - "\u2F74", "\u7ACB", - "\u2F75", "\u7AF9", - "\u2F76", "\u7C73", - "\u2F77", "\u7CF8", - "\u2F78", "\u7F36", - "\u2F79", "\u7F51", - "\u2F7A", "\u7F8A", - "\u2F7B", "\u7FBD", - "\u2F7C", "\u8001", - "\u2F7D", "\u800C", - "\u2F7E", "\u8012", - "\u2F7F", "\u8033", - "\u2F80", "\u807F", - "\u2F81", "\u8089", - "\u2F82", "\u81E3", - "\u2F83", "\u81EA", - "\u2F84", "\u81F3", - "\u2F85", "\u81FC", - "\u2F86", "\u820C", - "\u2F87", "\u821B", - "\u2F88", "\u821F", - "\u2F89", "\u826E", - "\u2F8A", "\u8272", - "\u2F8B", "\u8278", - "\u2F8C", "\u864D", - "\u2F8D", "\u866B", - "\u2F8E", "\u8840", - "\u2F8F", "\u884C", - "\u2F90", "\u8863", - "\u2F91", "\u897E", - "\u2F92", "\u898B", - "\u2F93", "\u89D2", - "\u2F94", "\u8A00", - "\u2F95", "\u8C37", - "\u2F96", "\u8C46", - "\u2F97", "\u8C55", - "\u2F98", "\u8C78", - "\u2F99", "\u8C9D", - "\u2F9A", "\u8D64", - "\u2F9B", "\u8D70", - "\u2F9C", "\u8DB3", - "\u2F9D", "\u8EAB", - "\u2F9E", "\u8ECA", - "\u2F9F", "\u8F9B", - "\u2FA0", "\u8FB0", - "\u2FA1", "\u8FB5", - "\u2FA2", "\u9091", - "\u2FA3", "\u9149", - "\u2FA4", "\u91C6", - "\u2FA5", "\u91CC", - "\u2FA6", "\u91D1", - "\u2FA7", "\u9577", - "\u2FA8", "\u9580", - "\u2FA9", "\u961C", - "\u2FAA", "\u96B6", - "\u2FAB", "\u96B9", - "\u2FAC", "\u96E8", - "\u2FAD", "\u9751", - "\u2FAE", "\u975E", - "\u2FAF", "\u9762", - "\u2FB0", "\u9769", - "\u2FB1", "\u97CB", - "\u2FB2", "\u97ED", - "\u2FB3", "\u97F3", - "\u2FB4", "\u9801", - "\u2FB5", "\u98A8", - "\u2FB6", "\u98DB", - "\u2FB7", "\u98DF", - "\u2FB8", "\u9996", - "\u2FB9", "\u9999", - "\u2FBA", "\u99AC", - "\u2FBB", "\u9AA8", - "\u2FBC", "\u9AD8", - "\u2FBD", "\u9ADF", - "\u2FBE", "\u9B25", - "\u2FBF", "\u9B2F", - "\u2FC0", "\u9B32", - "\u2FC1", "\u9B3C", - "\u2FC2", "\u9B5A", - "\u2FC3", "\u9CE5", - "\u2FC4", "\u9E75", - "\u2FC5", "\u9E7F", - "\u2FC6", "\u9EA5", - "\u2FC7", "\u9EBB", - "\u2FC8", "\u9EC3", - "\u2FC9", "\u9ECD", - "\u2FCA", "\u9ED1", - "\u2FCB", "\u9EF9", - "\u2FCC", "\u9EFD", - "\u2FCD", "\u9F0E", - "\u2FCE", "\u9F13", - "\u2FCF", "\u9F20", - "\u2FD0", "\u9F3B", - "\u2FD1", "\u9F4A", - "\u2FD2", "\u9F52", - "\u2FD3", "\u9F8D", - "\u2FD4", "\u9F9C", - "\u2FD5", "\u9FA0", - "\u3036", "\u3012", - "\u3038", "\u5341", - "\u3039", "\u5344", - "\u303A", "\u5345", - "\u309B", "\u0020\u3099", - "\u309C", "\u0020\u309A", - "\u3131", "\u1100", - "\u3132", "\u1101", - "\u3133", "\u11AA", - "\u3134", "\u1102", - "\u3135", "\u11AC", - "\u3136", "\u11AD", - "\u3137", "\u1103", - "\u3138", "\u1104", - "\u3139", "\u1105", - "\u313A", "\u11B0", - "\u313B", "\u11B1", - "\u313C", "\u11B2", - "\u313D", "\u11B3", - "\u313E", "\u11B4", - "\u313F", "\u11B5", - "\u3140", "\u111A", - "\u3141", "\u1106", - "\u3142", "\u1107", - "\u3143", "\u1108", - "\u3144", "\u1121", - "\u3145", "\u1109", - "\u3146", "\u110A", - "\u3147", "\u110B", - "\u3148", "\u110C", - "\u3149", "\u110D", - "\u314A", "\u110E", - "\u314B", "\u110F", - "\u314C", "\u1110", - "\u314D", "\u1111", - "\u314E", "\u1112", - "\u314F", "\u1161", - "\u3150", "\u1162", - "\u3151", "\u1163", - "\u3152", "\u1164", - "\u3153", "\u1165", - "\u3154", "\u1166", - "\u3155", "\u1167", - "\u3156", "\u1168", - "\u3157", "\u1169", - "\u3158", "\u116A", - "\u3159", "\u116B", - "\u315A", "\u116C", - "\u315B", "\u116D", - "\u315C", "\u116E", - "\u315D", "\u116F", - "\u315E", "\u1170", - "\u315F", "\u1171", - "\u3160", "\u1172", - "\u3161", "\u1173", - "\u3162", "\u1174", - "\u3163", "\u1175", - "\u3164", "\u1160", - "\u3165", "\u1114", - "\u3166", "\u1115", - "\u3167", "\u11C7", - "\u3168", "\u11C8", - "\u3169", "\u11CC", - "\u316A", "\u11CE", - "\u316B", "\u11D3", - "\u316C", "\u11D7", - "\u316D", "\u11D9", - "\u316E", "\u111C", - "\u316F", "\u11DD", - "\u3170", "\u11DF", - "\u3171", "\u111D", - "\u3172", "\u111E", - "\u3173", "\u1120", - "\u3174", "\u1122", - "\u3175", "\u1123", - "\u3176", "\u1127", - "\u3177", "\u1129", - "\u3178", "\u112B", - "\u3179", "\u112C", - "\u317A", "\u112D", - "\u317B", "\u112E", - "\u317C", "\u112F", - "\u317D", "\u1132", - "\u317E", "\u1136", - "\u317F", "\u1140", - "\u3180", "\u1147", - "\u3181", "\u114C", - "\u3182", "\u11F1", - "\u3183", "\u11F2", - "\u3184", "\u1157", - "\u3185", "\u1158", - "\u3186", "\u1159", - "\u3187", "\u1184", - "\u3188", "\u1185", - "\u3189", "\u1188", - "\u318A", "\u1191", - "\u318B", "\u1192", - "\u318C", "\u1194", - "\u318D", "\u119E", - "\u318E", "\u11A1", - "\u3200", "\u0028\u1100\u0029", - "\u3201", "\u0028\u1102\u0029", - "\u3202", "\u0028\u1103\u0029", - "\u3203", "\u0028\u1105\u0029", - "\u3204", "\u0028\u1106\u0029", - "\u3205", "\u0028\u1107\u0029", - "\u3206", "\u0028\u1109\u0029", - "\u3207", "\u0028\u110B\u0029", - "\u3208", "\u0028\u110C\u0029", - "\u3209", "\u0028\u110E\u0029", - "\u320A", "\u0028\u110F\u0029", - "\u320B", "\u0028\u1110\u0029", - "\u320C", "\u0028\u1111\u0029", - "\u320D", "\u0028\u1112\u0029", - "\u320E", "\u0028\u1100\u1161\u0029", - "\u320F", "\u0028\u1102\u1161\u0029", - "\u3210", "\u0028\u1103\u1161\u0029", - "\u3211", "\u0028\u1105\u1161\u0029", - "\u3212", "\u0028\u1106\u1161\u0029", - "\u3213", "\u0028\u1107\u1161\u0029", - "\u3214", "\u0028\u1109\u1161\u0029", - "\u3215", "\u0028\u110B\u1161\u0029", - "\u3216", "\u0028\u110C\u1161\u0029", - "\u3217", "\u0028\u110E\u1161\u0029", - "\u3218", "\u0028\u110F\u1161\u0029", - "\u3219", "\u0028\u1110\u1161\u0029", - "\u321A", "\u0028\u1111\u1161\u0029", - "\u321B", "\u0028\u1112\u1161\u0029", - "\u321C", "\u0028\u110C\u116E\u0029", - "\u321D", "\u0028\u110B\u1169\u110C\u1165\u11AB\u0029", - "\u321E", "\u0028\u110B\u1169\u1112\u116E\u0029", - "\u3220", "\u0028\u4E00\u0029", - "\u3221", "\u0028\u4E8C\u0029", - "\u3222", "\u0028\u4E09\u0029", - "\u3223", "\u0028\u56DB\u0029", - "\u3224", "\u0028\u4E94\u0029", - "\u3225", "\u0028\u516D\u0029", - "\u3226", "\u0028\u4E03\u0029", - "\u3227", "\u0028\u516B\u0029", - "\u3228", "\u0028\u4E5D\u0029", - "\u3229", "\u0028\u5341\u0029", - "\u322A", "\u0028\u6708\u0029", - "\u322B", "\u0028\u706B\u0029", - "\u322C", "\u0028\u6C34\u0029", - "\u322D", "\u0028\u6728\u0029", - "\u322E", "\u0028\u91D1\u0029", - "\u322F", "\u0028\u571F\u0029", - "\u3230", "\u0028\u65E5\u0029", - "\u3231", "\u0028\u682A\u0029", - "\u3232", "\u0028\u6709\u0029", - "\u3233", "\u0028\u793E\u0029", - "\u3234", "\u0028\u540D\u0029", - "\u3235", "\u0028\u7279\u0029", - "\u3236", "\u0028\u8CA1\u0029", - "\u3237", "\u0028\u795D\u0029", - "\u3238", "\u0028\u52B4\u0029", - "\u3239", "\u0028\u4EE3\u0029", - "\u323A", "\u0028\u547C\u0029", - "\u323B", "\u0028\u5B66\u0029", - "\u323C", "\u0028\u76E3\u0029", - "\u323D", "\u0028\u4F01\u0029", - "\u323E", "\u0028\u8CC7\u0029", - "\u323F", "\u0028\u5354\u0029", - "\u3240", "\u0028\u796D\u0029", - "\u3241", "\u0028\u4F11\u0029", - "\u3242", "\u0028\u81EA\u0029", - "\u3243", "\u0028\u81F3\u0029", - "\u32C0", "\u0031\u6708", - "\u32C1", "\u0032\u6708", - "\u32C2", "\u0033\u6708", - "\u32C3", "\u0034\u6708", - "\u32C4", "\u0035\u6708", - "\u32C5", "\u0036\u6708", - "\u32C6", "\u0037\u6708", - "\u32C7", "\u0038\u6708", - "\u32C8", "\u0039\u6708", - "\u32C9", "\u0031\u0030\u6708", - "\u32CA", "\u0031\u0031\u6708", - "\u32CB", "\u0031\u0032\u6708", - "\u3358", "\u0030\u70B9", - "\u3359", "\u0031\u70B9", - "\u335A", "\u0032\u70B9", - "\u335B", "\u0033\u70B9", - "\u335C", "\u0034\u70B9", - "\u335D", "\u0035\u70B9", - "\u335E", "\u0036\u70B9", - "\u335F", "\u0037\u70B9", - "\u3360", "\u0038\u70B9", - "\u3361", "\u0039\u70B9", - "\u3362", "\u0031\u0030\u70B9", - "\u3363", "\u0031\u0031\u70B9", - "\u3364", "\u0031\u0032\u70B9", - "\u3365", "\u0031\u0033\u70B9", - "\u3366", "\u0031\u0034\u70B9", - "\u3367", "\u0031\u0035\u70B9", - "\u3368", "\u0031\u0036\u70B9", - "\u3369", "\u0031\u0037\u70B9", - "\u336A", "\u0031\u0038\u70B9", - "\u336B", "\u0031\u0039\u70B9", - "\u336C", "\u0032\u0030\u70B9", - "\u336D", "\u0032\u0031\u70B9", - "\u336E", "\u0032\u0032\u70B9", - "\u336F", "\u0032\u0033\u70B9", - "\u3370", "\u0032\u0034\u70B9", - "\u33E0", "\u0031\u65E5", - "\u33E1", "\u0032\u65E5", - "\u33E2", "\u0033\u65E5", - "\u33E3", "\u0034\u65E5", - "\u33E4", "\u0035\u65E5", - "\u33E5", "\u0036\u65E5", - "\u33E6", "\u0037\u65E5", - "\u33E7", "\u0038\u65E5", - "\u33E8", "\u0039\u65E5", - "\u33E9", "\u0031\u0030\u65E5", - "\u33EA", "\u0031\u0031\u65E5", - "\u33EB", "\u0031\u0032\u65E5", - "\u33EC", "\u0031\u0033\u65E5", - "\u33ED", "\u0031\u0034\u65E5", - "\u33EE", "\u0031\u0035\u65E5", - "\u33EF", "\u0031\u0036\u65E5", - "\u33F0", "\u0031\u0037\u65E5", - "\u33F1", "\u0031\u0038\u65E5", - "\u33F2", "\u0031\u0039\u65E5", - "\u33F3", "\u0032\u0030\u65E5", - "\u33F4", "\u0032\u0031\u65E5", - "\u33F5", "\u0032\u0032\u65E5", - "\u33F6", "\u0032\u0033\u65E5", - "\u33F7", "\u0032\u0034\u65E5", - "\u33F8", "\u0032\u0035\u65E5", - "\u33F9", "\u0032\u0036\u65E5", - "\u33FA", "\u0032\u0037\u65E5", - "\u33FB", "\u0032\u0038\u65E5", - "\u33FC", "\u0032\u0039\u65E5", - "\u33FD", "\u0033\u0030\u65E5", - "\u33FE", "\u0033\u0031\u65E5", - "\uFB00", "\u0066\u0066", - "\uFB01", "\u0066\u0069", - "\uFB02", "\u0066\u006C", - "\uFB03", "\u0066\u0066\u0069", - "\uFB04", "\u0066\u0066\u006C", - "\uFB05", "\u017F\u0074", - "\uFB06", "\u0073\u0074", - "\uFB13", "\u0574\u0576", - "\uFB14", "\u0574\u0565", - "\uFB15", "\u0574\u056B", - "\uFB16", "\u057E\u0576", - "\uFB17", "\u0574\u056D", - "\uFB4F", "\u05D0\u05DC", - "\uFB50", "\u0671", - "\uFB51", "\u0671", - "\uFB52", "\u067B", - "\uFB53", "\u067B", - "\uFB54", "\u067B", - "\uFB55", "\u067B", - "\uFB56", "\u067E", - "\uFB57", "\u067E", - "\uFB58", "\u067E", - "\uFB59", "\u067E", - "\uFB5A", "\u0680", - "\uFB5B", "\u0680", - "\uFB5C", "\u0680", - "\uFB5D", "\u0680", - "\uFB5E", "\u067A", - "\uFB5F", "\u067A", - "\uFB60", "\u067A", - "\uFB61", "\u067A", - "\uFB62", "\u067F", - "\uFB63", "\u067F", - "\uFB64", "\u067F", - "\uFB65", "\u067F", - "\uFB66", "\u0679", - "\uFB67", "\u0679", - "\uFB68", "\u0679", - "\uFB69", "\u0679", - "\uFB6A", "\u06A4", - "\uFB6B", "\u06A4", - "\uFB6C", "\u06A4", - "\uFB6D", "\u06A4", - "\uFB6E", "\u06A6", - "\uFB6F", "\u06A6", - "\uFB70", "\u06A6", - "\uFB71", "\u06A6", - "\uFB72", "\u0684", - "\uFB73", "\u0684", - "\uFB74", "\u0684", - "\uFB75", "\u0684", - "\uFB76", "\u0683", - "\uFB77", "\u0683", - "\uFB78", "\u0683", - "\uFB79", "\u0683", - "\uFB7A", "\u0686", - "\uFB7B", "\u0686", - "\uFB7C", "\u0686", - "\uFB7D", "\u0686", - "\uFB7E", "\u0687", - "\uFB7F", "\u0687", - "\uFB80", "\u0687", - "\uFB81", "\u0687", - "\uFB82", "\u068D", - "\uFB83", "\u068D", - "\uFB84", "\u068C", - "\uFB85", "\u068C", - "\uFB86", "\u068E", - "\uFB87", "\u068E", - "\uFB88", "\u0688", - "\uFB89", "\u0688", - "\uFB8A", "\u0698", - "\uFB8B", "\u0698", - "\uFB8C", "\u0691", - "\uFB8D", "\u0691", - "\uFB8E", "\u06A9", - "\uFB8F", "\u06A9", - "\uFB90", "\u06A9", - "\uFB91", "\u06A9", - "\uFB92", "\u06AF", - "\uFB93", "\u06AF", - "\uFB94", "\u06AF", - "\uFB95", "\u06AF", - "\uFB96", "\u06B3", - "\uFB97", "\u06B3", - "\uFB98", "\u06B3", - "\uFB99", "\u06B3", - "\uFB9A", "\u06B1", - "\uFB9B", "\u06B1", - "\uFB9C", "\u06B1", - "\uFB9D", "\u06B1", - "\uFB9E", "\u06BA", - "\uFB9F", "\u06BA", - "\uFBA0", "\u06BB", - "\uFBA1", "\u06BB", - "\uFBA2", "\u06BB", - "\uFBA3", "\u06BB", - "\uFBA4", "\u06C0", - "\uFBA5", "\u06C0", - "\uFBA6", "\u06C1", - "\uFBA7", "\u06C1", - "\uFBA8", "\u06C1", - "\uFBA9", "\u06C1", - "\uFBAA", "\u06BE", - "\uFBAB", "\u06BE", - "\uFBAC", "\u06BE", - "\uFBAD", "\u06BE", - "\uFBAE", "\u06D2", - "\uFBAF", "\u06D2", - "\uFBB0", "\u06D3", - "\uFBB1", "\u06D3", - "\uFBD3", "\u06AD", - "\uFBD4", "\u06AD", - "\uFBD5", "\u06AD", - "\uFBD6", "\u06AD", - "\uFBD7", "\u06C7", - "\uFBD8", "\u06C7", - "\uFBD9", "\u06C6", - "\uFBDA", "\u06C6", - "\uFBDB", "\u06C8", - "\uFBDC", "\u06C8", - "\uFBDD", "\u0677", - "\uFBDE", "\u06CB", - "\uFBDF", "\u06CB", - "\uFBE0", "\u06C5", - "\uFBE1", "\u06C5", - "\uFBE2", "\u06C9", - "\uFBE3", "\u06C9", - "\uFBE4", "\u06D0", - "\uFBE5", "\u06D0", - "\uFBE6", "\u06D0", - "\uFBE7", "\u06D0", - "\uFBE8", "\u0649", - "\uFBE9", "\u0649", - "\uFBEA", "\u0626\u0627", - "\uFBEB", "\u0626\u0627", - "\uFBEC", "\u0626\u06D5", - "\uFBED", "\u0626\u06D5", - "\uFBEE", "\u0626\u0648", - "\uFBEF", "\u0626\u0648", - "\uFBF0", "\u0626\u06C7", - "\uFBF1", "\u0626\u06C7", - "\uFBF2", "\u0626\u06C6", - "\uFBF3", "\u0626\u06C6", - "\uFBF4", "\u0626\u06C8", - "\uFBF5", "\u0626\u06C8", - "\uFBF6", "\u0626\u06D0", - "\uFBF7", "\u0626\u06D0", - "\uFBF8", "\u0626\u06D0", - "\uFBF9", "\u0626\u0649", - "\uFBFA", "\u0626\u0649", - "\uFBFB", "\u0626\u0649", - "\uFBFC", "\u06CC", - "\uFBFD", "\u06CC", - "\uFBFE", "\u06CC", - "\uFBFF", "\u06CC", - "\uFC00", "\u0626\u062C", - "\uFC01", "\u0626\u062D", - "\uFC02", "\u0626\u0645", - "\uFC03", "\u0626\u0649", - "\uFC04", "\u0626\u064A", - "\uFC05", "\u0628\u062C", - "\uFC06", "\u0628\u062D", - "\uFC07", "\u0628\u062E", - "\uFC08", "\u0628\u0645", - "\uFC09", "\u0628\u0649", - "\uFC0A", "\u0628\u064A", - "\uFC0B", "\u062A\u062C", - "\uFC0C", "\u062A\u062D", - "\uFC0D", "\u062A\u062E", - "\uFC0E", "\u062A\u0645", - "\uFC0F", "\u062A\u0649", - "\uFC10", "\u062A\u064A", - "\uFC11", "\u062B\u062C", - "\uFC12", "\u062B\u0645", - "\uFC13", "\u062B\u0649", - "\uFC14", "\u062B\u064A", - "\uFC15", "\u062C\u062D", - "\uFC16", "\u062C\u0645", - "\uFC17", "\u062D\u062C", - "\uFC18", "\u062D\u0645", - "\uFC19", "\u062E\u062C", - "\uFC1A", "\u062E\u062D", - "\uFC1B", "\u062E\u0645", - "\uFC1C", "\u0633\u062C", - "\uFC1D", "\u0633\u062D", - "\uFC1E", "\u0633\u062E", - "\uFC1F", "\u0633\u0645", - "\uFC20", "\u0635\u062D", - "\uFC21", "\u0635\u0645", - "\uFC22", "\u0636\u062C", - "\uFC23", "\u0636\u062D", - "\uFC24", "\u0636\u062E", - "\uFC25", "\u0636\u0645", - "\uFC26", "\u0637\u062D", - "\uFC27", "\u0637\u0645", - "\uFC28", "\u0638\u0645", - "\uFC29", "\u0639\u062C", - "\uFC2A", "\u0639\u0645", - "\uFC2B", "\u063A\u062C", - "\uFC2C", "\u063A\u0645", - "\uFC2D", "\u0641\u062C", - "\uFC2E", "\u0641\u062D", - "\uFC2F", "\u0641\u062E", - "\uFC30", "\u0641\u0645", - "\uFC31", "\u0641\u0649", - "\uFC32", "\u0641\u064A", - "\uFC33", "\u0642\u062D", - "\uFC34", "\u0642\u0645", - "\uFC35", "\u0642\u0649", - "\uFC36", "\u0642\u064A", - "\uFC37", "\u0643\u0627", - "\uFC38", "\u0643\u062C", - "\uFC39", "\u0643\u062D", - "\uFC3A", "\u0643\u062E", - "\uFC3B", "\u0643\u0644", - "\uFC3C", "\u0643\u0645", - "\uFC3D", "\u0643\u0649", - "\uFC3E", "\u0643\u064A", - "\uFC3F", "\u0644\u062C", - "\uFC40", "\u0644\u062D", - "\uFC41", "\u0644\u062E", - "\uFC42", "\u0644\u0645", - "\uFC43", "\u0644\u0649", - "\uFC44", "\u0644\u064A", - "\uFC45", "\u0645\u062C", - "\uFC46", "\u0645\u062D", - "\uFC47", "\u0645\u062E", - "\uFC48", "\u0645\u0645", - "\uFC49", "\u0645\u0649", - "\uFC4A", "\u0645\u064A", - "\uFC4B", "\u0646\u062C", - "\uFC4C", "\u0646\u062D", - "\uFC4D", "\u0646\u062E", - "\uFC4E", "\u0646\u0645", - "\uFC4F", "\u0646\u0649", - "\uFC50", "\u0646\u064A", - "\uFC51", "\u0647\u062C", - "\uFC52", "\u0647\u0645", - "\uFC53", "\u0647\u0649", - "\uFC54", "\u0647\u064A", - "\uFC55", "\u064A\u062C", - "\uFC56", "\u064A\u062D", - "\uFC57", "\u064A\u062E", - "\uFC58", "\u064A\u0645", - "\uFC59", "\u064A\u0649", - "\uFC5A", "\u064A\u064A", - "\uFC5B", "\u0630\u0670", - "\uFC5C", "\u0631\u0670", - "\uFC5D", "\u0649\u0670", - "\uFC5E", "\u0020\u064C\u0651", - "\uFC5F", "\u0020\u064D\u0651", - "\uFC60", "\u0020\u064E\u0651", - "\uFC61", "\u0020\u064F\u0651", - "\uFC62", "\u0020\u0650\u0651", - "\uFC63", "\u0020\u0651\u0670", - "\uFC64", "\u0626\u0631", - "\uFC65", "\u0626\u0632", - "\uFC66", "\u0626\u0645", - "\uFC67", "\u0626\u0646", - "\uFC68", "\u0626\u0649", - "\uFC69", "\u0626\u064A", - "\uFC6A", "\u0628\u0631", - "\uFC6B", "\u0628\u0632", - "\uFC6C", "\u0628\u0645", - "\uFC6D", "\u0628\u0646", - "\uFC6E", "\u0628\u0649", - "\uFC6F", "\u0628\u064A", - "\uFC70", "\u062A\u0631", - "\uFC71", "\u062A\u0632", - "\uFC72", "\u062A\u0645", - "\uFC73", "\u062A\u0646", - "\uFC74", "\u062A\u0649", - "\uFC75", "\u062A\u064A", - "\uFC76", "\u062B\u0631", - "\uFC77", "\u062B\u0632", - "\uFC78", "\u062B\u0645", - "\uFC79", "\u062B\u0646", - "\uFC7A", "\u062B\u0649", - "\uFC7B", "\u062B\u064A", - "\uFC7C", "\u0641\u0649", - "\uFC7D", "\u0641\u064A", - "\uFC7E", "\u0642\u0649", - "\uFC7F", "\u0642\u064A", - "\uFC80", "\u0643\u0627", - "\uFC81", "\u0643\u0644", - "\uFC82", "\u0643\u0645", - "\uFC83", "\u0643\u0649", - "\uFC84", "\u0643\u064A", - "\uFC85", "\u0644\u0645", - "\uFC86", "\u0644\u0649", - "\uFC87", "\u0644\u064A", - "\uFC88", "\u0645\u0627", - "\uFC89", "\u0645\u0645", - "\uFC8A", "\u0646\u0631", - "\uFC8B", "\u0646\u0632", - "\uFC8C", "\u0646\u0645", - "\uFC8D", "\u0646\u0646", - "\uFC8E", "\u0646\u0649", - "\uFC8F", "\u0646\u064A", - "\uFC90", "\u0649\u0670", - "\uFC91", "\u064A\u0631", - "\uFC92", "\u064A\u0632", - "\uFC93", "\u064A\u0645", - "\uFC94", "\u064A\u0646", - "\uFC95", "\u064A\u0649", - "\uFC96", "\u064A\u064A", - "\uFC97", "\u0626\u062C", - "\uFC98", "\u0626\u062D", - "\uFC99", "\u0626\u062E", - "\uFC9A", "\u0626\u0645", - "\uFC9B", "\u0626\u0647", - "\uFC9C", "\u0628\u062C", - "\uFC9D", "\u0628\u062D", - "\uFC9E", "\u0628\u062E", - "\uFC9F", "\u0628\u0645", - "\uFCA0", "\u0628\u0647", - "\uFCA1", "\u062A\u062C", - "\uFCA2", "\u062A\u062D", - "\uFCA3", "\u062A\u062E", - "\uFCA4", "\u062A\u0645", - "\uFCA5", "\u062A\u0647", - "\uFCA6", "\u062B\u0645", - "\uFCA7", "\u062C\u062D", - "\uFCA8", "\u062C\u0645", - "\uFCA9", "\u062D\u062C", - "\uFCAA", "\u062D\u0645", - "\uFCAB", "\u062E\u062C", - "\uFCAC", "\u062E\u0645", - "\uFCAD", "\u0633\u062C", - "\uFCAE", "\u0633\u062D", - "\uFCAF", "\u0633\u062E", - "\uFCB0", "\u0633\u0645", - "\uFCB1", "\u0635\u062D", - "\uFCB2", "\u0635\u062E", - "\uFCB3", "\u0635\u0645", - "\uFCB4", "\u0636\u062C", - "\uFCB5", "\u0636\u062D", - "\uFCB6", "\u0636\u062E", - "\uFCB7", "\u0636\u0645", - "\uFCB8", "\u0637\u062D", - "\uFCB9", "\u0638\u0645", - "\uFCBA", "\u0639\u062C", - "\uFCBB", "\u0639\u0645", - "\uFCBC", "\u063A\u062C", - "\uFCBD", "\u063A\u0645", - "\uFCBE", "\u0641\u062C", - "\uFCBF", "\u0641\u062D", - "\uFCC0", "\u0641\u062E", - "\uFCC1", "\u0641\u0645", - "\uFCC2", "\u0642\u062D", - "\uFCC3", "\u0642\u0645", - "\uFCC4", "\u0643\u062C", - "\uFCC5", "\u0643\u062D", - "\uFCC6", "\u0643\u062E", - "\uFCC7", "\u0643\u0644", - "\uFCC8", "\u0643\u0645", - "\uFCC9", "\u0644\u062C", - "\uFCCA", "\u0644\u062D", - "\uFCCB", "\u0644\u062E", - "\uFCCC", "\u0644\u0645", - "\uFCCD", "\u0644\u0647", - "\uFCCE", "\u0645\u062C", - "\uFCCF", "\u0645\u062D", - "\uFCD0", "\u0645\u062E", - "\uFCD1", "\u0645\u0645", - "\uFCD2", "\u0646\u062C", - "\uFCD3", "\u0646\u062D", - "\uFCD4", "\u0646\u062E", - "\uFCD5", "\u0646\u0645", - "\uFCD6", "\u0646\u0647", - "\uFCD7", "\u0647\u062C", - "\uFCD8", "\u0647\u0645", - "\uFCD9", "\u0647\u0670", - "\uFCDA", "\u064A\u062C", - "\uFCDB", "\u064A\u062D", - "\uFCDC", "\u064A\u062E", - "\uFCDD", "\u064A\u0645", - "\uFCDE", "\u064A\u0647", - "\uFCDF", "\u0626\u0645", - "\uFCE0", "\u0626\u0647", - "\uFCE1", "\u0628\u0645", - "\uFCE2", "\u0628\u0647", - "\uFCE3", "\u062A\u0645", - "\uFCE4", "\u062A\u0647", - "\uFCE5", "\u062B\u0645", - "\uFCE6", "\u062B\u0647", - "\uFCE7", "\u0633\u0645", - "\uFCE8", "\u0633\u0647", - "\uFCE9", "\u0634\u0645", - "\uFCEA", "\u0634\u0647", - "\uFCEB", "\u0643\u0644", - "\uFCEC", "\u0643\u0645", - "\uFCED", "\u0644\u0645", - "\uFCEE", "\u0646\u0645", - "\uFCEF", "\u0646\u0647", - "\uFCF0", "\u064A\u0645", - "\uFCF1", "\u064A\u0647", - "\uFCF2", "\u0640\u064E\u0651", - "\uFCF3", "\u0640\u064F\u0651", - "\uFCF4", "\u0640\u0650\u0651", - "\uFCF5", "\u0637\u0649", - "\uFCF6", "\u0637\u064A", - "\uFCF7", "\u0639\u0649", - "\uFCF8", "\u0639\u064A", - "\uFCF9", "\u063A\u0649", - "\uFCFA", "\u063A\u064A", - "\uFCFB", "\u0633\u0649", - "\uFCFC", "\u0633\u064A", - "\uFCFD", "\u0634\u0649", - "\uFCFE", "\u0634\u064A", - "\uFCFF", "\u062D\u0649", - "\uFD00", "\u062D\u064A", - "\uFD01", "\u062C\u0649", - "\uFD02", "\u062C\u064A", - "\uFD03", "\u062E\u0649", - "\uFD04", "\u062E\u064A", - "\uFD05", "\u0635\u0649", - "\uFD06", "\u0635\u064A", - "\uFD07", "\u0636\u0649", - "\uFD08", "\u0636\u064A", - "\uFD09", "\u0634\u062C", - "\uFD0A", "\u0634\u062D", - "\uFD0B", "\u0634\u062E", - "\uFD0C", "\u0634\u0645", - "\uFD0D", "\u0634\u0631", - "\uFD0E", "\u0633\u0631", - "\uFD0F", "\u0635\u0631", - "\uFD10", "\u0636\u0631", - "\uFD11", "\u0637\u0649", - "\uFD12", "\u0637\u064A", - "\uFD13", "\u0639\u0649", - "\uFD14", "\u0639\u064A", - "\uFD15", "\u063A\u0649", - "\uFD16", "\u063A\u064A", - "\uFD17", "\u0633\u0649", - "\uFD18", "\u0633\u064A", - "\uFD19", "\u0634\u0649", - "\uFD1A", "\u0634\u064A", - "\uFD1B", "\u062D\u0649", - "\uFD1C", "\u062D\u064A", - "\uFD1D", "\u062C\u0649", - "\uFD1E", "\u062C\u064A", - "\uFD1F", "\u062E\u0649", - "\uFD20", "\u062E\u064A", - "\uFD21", "\u0635\u0649", - "\uFD22", "\u0635\u064A", - "\uFD23", "\u0636\u0649", - "\uFD24", "\u0636\u064A", - "\uFD25", "\u0634\u062C", - "\uFD26", "\u0634\u062D", - "\uFD27", "\u0634\u062E", - "\uFD28", "\u0634\u0645", - "\uFD29", "\u0634\u0631", - "\uFD2A", "\u0633\u0631", - "\uFD2B", "\u0635\u0631", - "\uFD2C", "\u0636\u0631", - "\uFD2D", "\u0634\u062C", - "\uFD2E", "\u0634\u062D", - "\uFD2F", "\u0634\u062E", - "\uFD30", "\u0634\u0645", - "\uFD31", "\u0633\u0647", - "\uFD32", "\u0634\u0647", - "\uFD33", "\u0637\u0645", - "\uFD34", "\u0633\u062C", - "\uFD35", "\u0633\u062D", - "\uFD36", "\u0633\u062E", - "\uFD37", "\u0634\u062C", - "\uFD38", "\u0634\u062D", - "\uFD39", "\u0634\u062E", - "\uFD3A", "\u0637\u0645", - "\uFD3B", "\u0638\u0645", - "\uFD3C", "\u0627\u064B", - "\uFD3D", "\u0627\u064B", - "\uFD50", "\u062A\u062C\u0645", - "\uFD51", "\u062A\u062D\u062C", - "\uFD52", "\u062A\u062D\u062C", - "\uFD53", "\u062A\u062D\u0645", - "\uFD54", "\u062A\u062E\u0645", - "\uFD55", "\u062A\u0645\u062C", - "\uFD56", "\u062A\u0645\u062D", - "\uFD57", "\u062A\u0645\u062E", - "\uFD58", "\u062C\u0645\u062D", - "\uFD59", "\u062C\u0645\u062D", - "\uFD5A", "\u062D\u0645\u064A", - "\uFD5B", "\u062D\u0645\u0649", - "\uFD5C", "\u0633\u062D\u062C", - "\uFD5D", "\u0633\u062C\u062D", - "\uFD5E", "\u0633\u062C\u0649", - "\uFD5F", "\u0633\u0645\u062D", - "\uFD60", "\u0633\u0645\u062D", - "\uFD61", "\u0633\u0645\u062C", - "\uFD62", "\u0633\u0645\u0645", - "\uFD63", "\u0633\u0645\u0645", - "\uFD64", "\u0635\u062D\u062D", - "\uFD65", "\u0635\u062D\u062D", - "\uFD66", "\u0635\u0645\u0645", - "\uFD67", "\u0634\u062D\u0645", - "\uFD68", "\u0634\u062D\u0645", - "\uFD69", "\u0634\u062C\u064A", - "\uFD6A", "\u0634\u0645\u062E", - "\uFD6B", "\u0634\u0645\u062E", - "\uFD6C", "\u0634\u0645\u0645", - "\uFD6D", "\u0634\u0645\u0645", - "\uFD6E", "\u0636\u062D\u0649", - "\uFD6F", "\u0636\u062E\u0645", - "\uFD70", "\u0636\u062E\u0645", - "\uFD71", "\u0637\u0645\u062D", - "\uFD72", "\u0637\u0645\u062D", - "\uFD73", "\u0637\u0645\u0645", - "\uFD74", "\u0637\u0645\u064A", - "\uFD75", "\u0639\u062C\u0645", - "\uFD76", "\u0639\u0645\u0645", - "\uFD77", "\u0639\u0645\u0645", - "\uFD78", "\u0639\u0645\u0649", - "\uFD79", "\u063A\u0645\u0645", - "\uFD7A", "\u063A\u0645\u064A", - "\uFD7B", "\u063A\u0645\u0649", - "\uFD7C", "\u0641\u062E\u0645", - "\uFD7D", "\u0641\u062E\u0645", - "\uFD7E", "\u0642\u0645\u062D", - "\uFD7F", "\u0642\u0645\u0645", - "\uFD80", "\u0644\u062D\u0645", - "\uFD81", "\u0644\u062D\u064A", - "\uFD82", "\u0644\u062D\u0649", - "\uFD83", "\u0644\u062C\u062C", - "\uFD84", "\u0644\u062C\u062C", - "\uFD85", "\u0644\u062E\u0645", - "\uFD86", "\u0644\u062E\u0645", - "\uFD87", "\u0644\u0645\u062D", - "\uFD88", "\u0644\u0645\u062D", - "\uFD89", "\u0645\u062D\u062C", - "\uFD8A", "\u0645\u062D\u0645", - "\uFD8B", "\u0645\u062D\u064A", - "\uFD8C", "\u0645\u062C\u062D", - "\uFD8D", "\u0645\u062C\u0645", - "\uFD8E", "\u0645\u062E\u062C", - "\uFD8F", "\u0645\u062E\u0645", - "\uFD92", "\u0645\u062C\u062E", - "\uFD93", "\u0647\u0645\u062C", - "\uFD94", "\u0647\u0645\u0645", - "\uFD95", "\u0646\u062D\u0645", - "\uFD96", "\u0646\u062D\u0649", - "\uFD97", "\u0646\u062C\u0645", - "\uFD98", "\u0646\u062C\u0645", - "\uFD99", "\u0646\u062C\u0649", - "\uFD9A", "\u0646\u0645\u064A", - "\uFD9B", "\u0646\u0645\u0649", - "\uFD9C", "\u064A\u0645\u0645", - "\uFD9D", "\u064A\u0645\u0645", - "\uFD9E", "\u0628\u062E\u064A", - "\uFD9F", "\u062A\u062C\u064A", - "\uFDA0", "\u062A\u062C\u0649", - "\uFDA1", "\u062A\u062E\u064A", - "\uFDA2", "\u062A\u062E\u0649", - "\uFDA3", "\u062A\u0645\u064A", - "\uFDA4", "\u062A\u0645\u0649", - "\uFDA5", "\u062C\u0645\u064A", - "\uFDA6", "\u062C\u062D\u0649", - "\uFDA7", "\u062C\u0645\u0649", - "\uFDA8", "\u0633\u062E\u0649", - "\uFDA9", "\u0635\u062D\u064A", - "\uFDAA", "\u0634\u062D\u064A", - "\uFDAB", "\u0636\u062D\u064A", - "\uFDAC", "\u0644\u062C\u064A", - "\uFDAD", "\u0644\u0645\u064A", - "\uFDAE", "\u064A\u062D\u064A", - "\uFDAF", "\u064A\u062C\u064A", - "\uFDB0", "\u064A\u0645\u064A", - "\uFDB1", "\u0645\u0645\u064A", - "\uFDB2", "\u0642\u0645\u064A", - "\uFDB3", "\u0646\u062D\u064A", - "\uFDB4", "\u0642\u0645\u062D", - "\uFDB5", "\u0644\u062D\u0645", - "\uFDB6", "\u0639\u0645\u064A", - "\uFDB7", "\u0643\u0645\u064A", - "\uFDB8", "\u0646\u062C\u062D", - "\uFDB9", "\u0645\u062E\u064A", - "\uFDBA", "\u0644\u062C\u0645", - "\uFDBB", "\u0643\u0645\u0645", - "\uFDBC", "\u0644\u062C\u0645", - "\uFDBD", "\u0646\u062C\u062D", - "\uFDBE", "\u062C\u062D\u064A", - "\uFDBF", "\u062D\u062C\u064A", - "\uFDC0", "\u0645\u062C\u064A", - "\uFDC1", "\u0641\u0645\u064A", - "\uFDC2", "\u0628\u062D\u064A", - "\uFDC3", "\u0643\u0645\u0645", - "\uFDC4", "\u0639\u062C\u0645", - "\uFDC5", "\u0635\u0645\u0645", - "\uFDC6", "\u0633\u062E\u064A", - "\uFDC7", "\u0646\u062C\u064A", - "\uFE49", "\u203E", - "\uFE4A", "\u203E", - "\uFE4B", "\u203E", - "\uFE4C", "\u203E", - "\uFE4D", "\u005F", - "\uFE4E", "\u005F", - "\uFE4F", "\u005F", - "\uFE80", "\u0621", - "\uFE81", "\u0622", - "\uFE82", "\u0622", - "\uFE83", "\u0623", - "\uFE84", "\u0623", - "\uFE85", "\u0624", - "\uFE86", "\u0624", - "\uFE87", "\u0625", - "\uFE88", "\u0625", - "\uFE89", "\u0626", - "\uFE8A", "\u0626", - "\uFE8B", "\u0626", - "\uFE8C", "\u0626", - "\uFE8D", "\u0627", - "\uFE8E", "\u0627", - "\uFE8F", "\u0628", - "\uFE90", "\u0628", - "\uFE91", "\u0628", - "\uFE92", "\u0628", - "\uFE93", "\u0629", - "\uFE94", "\u0629", - "\uFE95", "\u062A", - "\uFE96", "\u062A", - "\uFE97", "\u062A", - "\uFE98", "\u062A", - "\uFE99", "\u062B", - "\uFE9A", "\u062B", - "\uFE9B", "\u062B", - "\uFE9C", "\u062B", - "\uFE9D", "\u062C", - "\uFE9E", "\u062C", - "\uFE9F", "\u062C", - "\uFEA0", "\u062C", - "\uFEA1", "\u062D", - "\uFEA2", "\u062D", - "\uFEA3", "\u062D", - "\uFEA4", "\u062D", - "\uFEA5", "\u062E", - "\uFEA6", "\u062E", - "\uFEA7", "\u062E", - "\uFEA8", "\u062E", - "\uFEA9", "\u062F", - "\uFEAA", "\u062F", - "\uFEAB", "\u0630", - "\uFEAC", "\u0630", - "\uFEAD", "\u0631", - "\uFEAE", "\u0631", - "\uFEAF", "\u0632", - "\uFEB0", "\u0632", - "\uFEB1", "\u0633", - "\uFEB2", "\u0633", - "\uFEB3", "\u0633", - "\uFEB4", "\u0633", - "\uFEB5", "\u0634", - "\uFEB6", "\u0634", - "\uFEB7", "\u0634", - "\uFEB8", "\u0634", - "\uFEB9", "\u0635", - "\uFEBA", "\u0635", - "\uFEBB", "\u0635", - "\uFEBC", "\u0635", - "\uFEBD", "\u0636", - "\uFEBE", "\u0636", - "\uFEBF", "\u0636", - "\uFEC0", "\u0636", - "\uFEC1", "\u0637", - "\uFEC2", "\u0637", - "\uFEC3", "\u0637", - "\uFEC4", "\u0637", - "\uFEC5", "\u0638", - "\uFEC6", "\u0638", - "\uFEC7", "\u0638", - "\uFEC8", "\u0638", - "\uFEC9", "\u0639", - "\uFECA", "\u0639", - "\uFECB", "\u0639", - "\uFECC", "\u0639", - "\uFECD", "\u063A", - "\uFECE", "\u063A", - "\uFECF", "\u063A", - "\uFED0", "\u063A", - "\uFED1", "\u0641", - "\uFED2", "\u0641", - "\uFED3", "\u0641", - "\uFED4", "\u0641", - "\uFED5", "\u0642", - "\uFED6", "\u0642", - "\uFED7", "\u0642", - "\uFED8", "\u0642", - "\uFED9", "\u0643", - "\uFEDA", "\u0643", - "\uFEDB", "\u0643", - "\uFEDC", "\u0643", - "\uFEDD", "\u0644", - "\uFEDE", "\u0644", - "\uFEDF", "\u0644", - "\uFEE0", "\u0644", - "\uFEE1", "\u0645", - "\uFEE2", "\u0645", - "\uFEE3", "\u0645", - "\uFEE4", "\u0645", - "\uFEE5", "\u0646", - "\uFEE6", "\u0646", - "\uFEE7", "\u0646", - "\uFEE8", "\u0646", - "\uFEE9", "\u0647", - "\uFEEA", "\u0647", - "\uFEEB", "\u0647", - "\uFEEC", "\u0647", - "\uFEED", "\u0648", - "\uFEEE", "\u0648", - "\uFEEF", "\u0649", - "\uFEF0", "\u0649", - "\uFEF1", "\u064A", - "\uFEF2", "\u064A", - "\uFEF3", "\u064A", - "\uFEF4", "\u064A", - "\uFEF5", "\u0644\u0622", - "\uFEF6", "\u0644\u0622", - "\uFEF7", "\u0644\u0623", - "\uFEF8", "\u0644\u0623", - "\uFEF9", "\u0644\u0625", - "\uFEFA", "\u0644\u0625", - "\uFEFB", "\u0644\u0627", - "\uFEFC", "\u0644\u0627", - ]; -}); - -function reverseIfRtl(chars) { - const charsLength = chars.length; - // Reverse an arabic ligature. - if (charsLength <= 1 || !isRTLRangeFor(chars.charCodeAt(0))) { - return chars; - } - const buf = []; - for (let ii = charsLength - 1; ii >= 0; ii--) { - buf.push(chars[ii]); - } - return buf.join(""); -} - const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})|(\\p{Cf})$", "u"); const CategoryCache = new Map(); @@ -1665,9 +268,7 @@ function clearUnicodeCaches() { export { clearUnicodeCaches, getCharUnicodeCategory, - getNormalizedUnicodes, getUnicodeForGlyph, getUnicodeRangeFor, mapSpecialUnicodeValues, - reverseIfRtl, }; diff --git a/src/core/worker.js b/src/core/worker.js index 13a1af4e8..1056c9690 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -745,7 +745,7 @@ class WorkerMessageHandler { }); handler.on("GetTextContent", function (data, sink) { - const { pageIndex, includeMarkedContent } = data; + const { pageIndex, includeMarkedContent, disableNormalization } = data; pdfManager.getPage(pageIndex).then(function (page) { const task = new WorkerTask("GetTextContent: page " + pageIndex); @@ -760,6 +760,7 @@ class WorkerMessageHandler { task, sink, includeMarkedContent, + disableNormalization, }) .then( function () { diff --git a/src/display/api.js b/src/display/api.js index de5ad9e6c..dba9fc826 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1122,6 +1122,8 @@ class PDFDocumentProxy { * @typedef {Object} getTextContentParameters * @property {boolean} [includeMarkedContent] - When true include marked * content items in the items array of TextContent. The default is `false`. + * @property {boolean} [disableNormalization] - When true the text is *not* + * normalized in the worker-thread. The default is `false`. */ /** @@ -1598,7 +1600,10 @@ class PDFPageProxy { * @param {getTextContentParameters} params - getTextContent parameters. * @returns {ReadableStream} Stream for reading text content chunks. */ - streamTextContent({ includeMarkedContent = false } = {}) { + streamTextContent({ + includeMarkedContent = false, + disableNormalization = false, + } = {}) { const TEXT_CONTENT_CHUNK_SIZE = 100; return this._transport.messageHandler.sendWithStream( @@ -1606,6 +1611,7 @@ class PDFPageProxy { { pageIndex: this._pageIndex, includeMarkedContent: includeMarkedContent === true, + disableNormalization: disableNormalization === true, }, { highWaterMark: TEXT_CONTENT_CHUNK_SIZE, diff --git a/src/pdf.js b/src/pdf.js index dcad1a231..e2162ee11 100644 --- a/src/pdf.js +++ b/src/pdf.js @@ -35,6 +35,7 @@ import { FeatureTest, InvalidPDFException, MissingPDFException, + normalizeUnicode, OPS, PasswordResponses, PermissionFlag, @@ -100,6 +101,7 @@ export { isPdfFile, loadScript, MissingPDFException, + normalizeUnicode, OPS, PasswordResponses, PDFDataRangeTransport, diff --git a/src/shared/util.js b/src/shared/util.js index 873e26e0c..756cff9aa 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -1026,6 +1026,25 @@ function createPromiseCapability() { return capability; } +let NormalizeRegex = null; +let NormalizationMap = null; +function normalizeUnicode(str) { + if (!NormalizeRegex) { + // In order to generate the following regex: + // - create a PDF containing all the chars in the range 0000-FFFF with + // a NFKC which is different of the char. + // - copy and paste all those chars and get the ones where NFKC is + // required. + // It appears that most the chars here contain some ligatures. + NormalizeRegex = + /([\u00a0\u00b5\u037e\u0eb3\u2000-\u200a\u202f\u2126\ufb00-\ufb04\ufb06\ufb20-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufba1\ufba4-\ufba9\ufbae-\ufbb1\ufbd3-\ufbdc\ufbde-\ufbe7\ufbea-\ufbf8\ufbfc-\ufbfd\ufc00-\ufc5d\ufc64-\ufcf1\ufcf5-\ufd3d\ufd88\ufdf4\ufdfa-\ufdfb\ufe71\ufe77\ufe79\ufe7b\ufe7d]+)|(\ufb05+)/gu; + NormalizationMap = new Map([["ſt", "ſt"]]); + } + return str.replaceAll(NormalizeRegex, (_, p1, p2) => { + return p1 ? p1.normalize("NFKC") : NormalizationMap.get(p2); + }); +} + export { AbortException, AnnotationActionEventType, @@ -1064,6 +1083,7 @@ export { LINE_FACTOR, MAX_IMAGE_SIZE_TO_CACHE, MissingPDFException, + normalizeUnicode, objectFromMap, objectSize, OPS, diff --git a/test/driver.js b/test/driver.js index 56d507dde..124bdf1e9 100644 --- a/test/driver.js +++ b/test/driver.js @@ -693,6 +693,7 @@ class Driver { initPromise = page .getTextContent({ includeMarkedContent: true, + disableNormalization: true, }) .then(function (textContent) { return Rasterize.textLayer( diff --git a/test/integration/copy_paste_spec.js b/test/integration/copy_paste_spec.js index 7de6f34eb..4f7d29bcf 100644 --- a/test/integration/copy_paste_spec.js +++ b/test/integration/copy_paste_spec.js @@ -28,7 +28,7 @@ describe("Copy and paste", () => { await closePages(pages); }); - it("must check that we've all the contents", async () => { + it("must check that we've all the contents on copy/paste", async () => { await Promise.all( pages.map(async ([browserName, page]) => { await page.keyboard.down("Control"); @@ -117,4 +117,47 @@ describe("Copy and paste", () => { ); }); }); + describe("all text", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait("copy_paste_ligatures.pdf", ".textLayer"); + await mockClipboard(pages); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must check that we've all the contents on copy/paste", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await page.keyboard.down("Control"); + await page.keyboard.press("a"); + await page.keyboard.up("Control"); + + await page.waitForTimeout(100); + + await page.keyboard.down("Control"); + await page.keyboard.press("c"); + await page.keyboard.up("Control"); + + await page.waitForTimeout(100); + + await page.waitForFunction( + `document.querySelector('#viewerContainer').style.cursor !== "wait"` + ); + + const text = await page.evaluate(() => + navigator.clipboard.readText() + ); + + expect(!!text).withContext(`In ${browserName}`).toEqual(true); + expect(text) + .withContext(`In ${browserName}`) + .toEqual("abcdeffffiflffifflſtstghijklmno"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index b9e823515..f8d8573f5 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -585,3 +585,4 @@ !issue16221.pdf !issue16224.pdf !issue16278.pdf +!copy_paste_ligatures.pdf diff --git a/test/pdfs/copy_paste_ligatures.pdf b/test/pdfs/copy_paste_ligatures.pdf new file mode 100755 index 0000000000000000000000000000000000000000..97359312935b962e2e5ca6f9f1e6fb553ba8f1b9 GIT binary patch literal 32408 zcmaI8V{~rK)-4*_cAhv{v2EM7ZQC|hY}>Z&tk}ki?c8^NXYYN_ZRfT#TeZR%r9MXe zDfCIEARcE; zsg0qFsfekuy@@FdA0Ldfi<7CLEsRGtYYZ=9kN{lR%Xd^9gO0^burmV!Ih`H(b_^&n zWMLWdkG~PR%!iA|!?PhN{*ag2j#1V=c+75wS2fPyH+-$wwgK;`1cz&SfSy{fo>bT`Y^GuiNd=bE=fx7GQO*-iJyc|BHODCRASG(Y2YUKA;|0XkJGZ%bbI&B+ zOE836;4)IG78p}IlYh_bpWMGN{R_yy(tn^ZvM~Qk|F07Be-Zx=Mo|wJab=f(@Bw&u z{w4V5qc>#urvRWA76dQ?=#35kiT^?UkK%vD^kVjQF8^C?Gg<~(1_0ZCtoE-C{v~1r zu>VK&PrHA@zsKkJ@Av?E6;B6K0KL4Cm9mTN|IFs!F5vu6OL{o~C34}>>)l=kLCqoV zsSj7s9fxWdTnVI9U(EB1Ap#E5YsR>Y-$TS`4gZCMg*EqW&HC7QyWs5wvMLz(l&bi1 zbG&nrwi5q0v!yg0b>6;hKI>D1YvBuxB%JPG>&y>0Cw?_)xb05NCwN#X)5rhmn|~qv zuWva1V;e@s|IIl6wgn^8e~t0~w;umx?*CPj5x~U4$;R^kS)o_mur9dE9Sof>(-vlr z1QV^K+KnuYq?`9^srrqy;v$EyiNkJ2r~AWF42nBEGREBTAn?OzO8o`H;4q=Bq|NXS z0c?$jVBi*xoK&BGm15y7udErEL3JWi5t)g$dD3#F8;P{ zBjKBDD_jRPa@CuDX@wB(Kp%G%YFFGxXE@@g2JuRYy@I1#8Syr8O94-mb$Hx)S2ypQMA`zE>rczyYtQ)b076{SB zSb*U20nQsD}^$Xt@N_Xtj8cP-u+5hYRr6YEoLAzrc`zn*RhrJQT@4y)FbM%uD*)n#PNaPB<8LWA0f z7jtLFv$RFLpL{xf^G0UznncV}XD5rUPu7;&pXK7w>tATDk9Sx;RpZGY`nv(h&0nizA}GO?18tQB>enKTq%@Z5+&nTpHeEJ(tXe1`>A^ybl0 z;dmj$LMaUmOfxST9Ky;;e)}oIUyy9avBuFNupO-(8{oBhkjkj(psN>- z(hnR70M9kn3P5{UR7TB}(1Wp8+n!Y@)>4o~);vQN15-n}#Rh(X(y++byypxDFV#p4GmL{hfW(|d&QiufgxPRIuABQieEWSCOY z1z=gQGE@auaPn9kZ{?il(!s$_SXav9=QdzK;W!n*&IvTfoQ@Kk465oymx&%{#Ob5?~A3-Tx64HRKBa^aQ`(X{4Xnf4c6>T>rj-(=}r^)pWFB)4~e>1n&u^ zXB5chqemBz{-*!Ds^4$|d%Y*muh&6?L1`v55~$#`D;s^{GP ztShil2>74-oc2|aJI@Yc&Y!V<+97w(zfX6lXgJ2N2E03aR2ndl9U=bz_ z4cx1#QRv5D4V_HLUdWDCWSTn|BiNo$djz|*2z$PoQsSdhK zxeHNb2>SFO$6+G}p?6bKBfZ4bCC#CzcJKA~$eeZIFl!L!%is&=FPK)%q;K*n&@kI( zqMPT`q1-KCZx^BT?c)80;GQjo#mmypz{5zGU+tvV0YSQjty>w5n3x)ztd!n=76WKbY zm`rteL|#?(nc$)zQbOnP_*tdHE}vKh#8vT7e``$9UB|_9>$lsLhT5G{ou^o)UXO+H zn&!xCRxamb@{wMpPDl3ZgwU%ZodsBWNdsBW<97P=~Ki?Or(ZD1|HHFmO4JCKvgV^^SPlx}>N zjr+I5dLTxTk`&OD6hYt%8B;`*&(Ic8fP!dd7my`ou(D#DS*veO!dBnPhr+m=s z?s0lc%8w}@&_yEcJuDVnes1UA3citzL_J<~bYx*%tMOKG_vdF~t)L_xG(;>A?23;di$d9C>qNSM*nAu5a*5mrK+I$av zV<>K3^<3ZZ+C*<2X0{zhlEiPc+S&^c^gUm98U62NZ8o~fIaIRSh7PYaUe4V4tSh<` z*Q9ICm$D&Dav@b2hSJugM~HsU8z#6bprA1yNl2njgtGktvGvM;;({&UTNfBtDqREm z%94WCa7(~w{HyuQUY>x3sPOAiBa$J-Nu?(8d}cxv*kk(mn5HEtf(A~oU$g2|x8Adq zk}7zcnHnigs{lReiAFlJ69TqDE1l8=hS%#%%)UrzVH6n|ig}u>Vmf06iMd!TRrt4H zlrZMl%e_x|(&WUUl$g#hId%M1aCc-bM zIsHfb-QvZUCU?S|o5t5Ly;1vC1hLM#?+NO@$yXR%KzLe%WJ)y5`h>JapGtFJEo*w9 zMq(s6`L76BPt{+8Oc8hx-3$>1(iL#z*vm!@0tn|hz|jFgAF`mqCIPmW7NOkrO74 z9QEz^D97ZHU<-dV{QH!IPhptJVhT>QS2^AvP$?5XAgU(PbPY^8iP@IX?-Df6wYV-s z5EXd|q+&{3(YmXv8)qj@tv{S<8*3WsoT}-N7^;rJYb@rEXpe4;u1{_lVx+oY`(}8K zIkF$P9#Y?I&uJdi*~T)FP!0gq*K}8ip?T?ZQ2B!3hgWb_WOyT>;;w(JH#6Qg?B<-z zI^6J?r?-APtA7h~?k@O=r=cKPCwCGu`y0q%&t5o}L~t~D3;!?HVf zluxFLmu4ErYOEezO>U;SGsRC$Ie#Y&-Ga#n7#&0e<_IUTFN3c@tshe7r1K!}t1hTr z#H8DLMBAp>*2T8u%*@~;JO1V%XHq@TyDJ`-D*QDlHC*f=bu>aD>xtJHJ&&$kt5qda zSur+DoNSULdx?$8mY88-NH0r!om#yNhA>_DAUte>a}jQW(~TBMS#Kz$G|5nobUA-JgBO?6OUSgjlo^BX+@TY|c^NAvk7+7rSgBYTP*eC>mBCCdj z%mkfCjyfwdkx?;~>XMN}r@XkBWVI-EU;HW2YZHk@q)_0P5syr( zX4W3~?`6>haCH8uQlYt#fWIR1EEhUwcTTz~{Bw0&GYUTTZIBY7 z4z)h_?LuBW&*$PB_bn<8mA~&-<7PT%o!w^Zci2{!U+kSWq2Yd+JloS7?y@%Wn)x`{^3}N?*ZQx$s$RrNvPU^U~dhTb`b;J0?RqZ4eemE z9nnZs<=E-Yso%g%JMDJ>J~i*&z4~wP8Vo%&7Ze6D7<%4j%&kwfhjTDGWhD)z*b+e- zR_$W%8M8|Y5kJbr@*+!RW%Ae&ri#AI(#o0R8eC?>K^$wD;8Ln3XjJ=2djHg8O(Nrk zC-Vr;1(pP!r_-W@&)%O$hO-W=@~(%Wg*DIHQ$v#SuqFs8C)<(0?t_4uKcP}iUv$~a z)*=~1Py+dfZvIlQ8p{an+ZY~U7!CpaTueyKw>XNhIT=14>u6fr^oU#kN5Z?T@R0%SUjOS9h;uNcnr zLTKi5sPC$M7A;Ra8lMWh(Z7n9r{fy&r?6B;)$gTW#&xO-yq{9Tu112ZUYxvH+t%&f zFX7{7dunX$Pt!X4tbQsF3$D@JRhTXFT)K5$oKF8-pjOG(@HK5{0#28u_P4y|tY!|4 z#C@kDZESklh9d8^t1%{Qm~bAsYS<_!)T!gZ>RAQkma6k#K#mwsaEwS*pVWM9^!E(* zLV<84_Qeris7UrSKq{?B+gE&df_E^xtJychO6P}R+bfN$LEX!Nt`va(D9UNn7ExfH zme!Y_*dX80Av0l|iNQS}w(O;%Y1iP~bGsDyY$fw2%^msYy@4MDQehTQs+@Ze`9td| z>DlQ$g0{juR^c8%X$`AY5LCd}S8Qmv)J4@eA^GZ=0;*Ne<5U_+rtX{?*+?8XP4qHS zoGedxy_{OI@tV$J`U9pwbCqynZDh z`3OMNu)Jly6sH0Au=u#)OpAsJfKY$}VUxp2$5DR?k+>mh4g57u(pVP3$|CoS;cB`=_7ro%p|&^Q&8Ogdq=ac$_f zx0#f{)@f+29f7%=;{8$GCOVvrbLg3D9?u6oaZ<$8!_zttnhKIO`D149xL_PQ9k?|T zAr%R$jtv!~eJ-fWuUIRr^1HHA3fh8&Mss3fE5nic2~;pUqJwH*nUw*k7zV6MYio??bz~2`fzlcXqL&Y9kUomqg+{p{Rc*m;#WKRFf zJF3atIv?ZeQ6>20tx=AYSEFf(+j~b*2=R2Uw8jA$u*MutKsJ)z*9>-id#^h7vYYCm z$k@;w#!8)M&*PTw>en%*eT?(Dj{c=x`!#CLzqMV_781(pDg}1xYGYCJ_grgLjr*ZZ zL3JtYMxIYq`{lOo?qJXNjyt{rN6(sm-trYZm_*zunxq3mb5A37g`?fY;;V<+`T{?$ zpXbU;zGciE-FYl7i$Igp{LjPDrIlXcIIqiEFlT1;F5=9N{5ROcfT1~lRw1`TjRe>i#ezsSnQI$Y06u`V(b0&&67K9?vr?RB$K8vHWG zSPjJpTx7ahQZ9dZ<`t6##AhIMpC1!~8c!qRMjc{1Kyxj?3RnGMcF@DJMRoJEvR(-&P8r!*#(d(@54jhfPyqTkROC(V(jk zclmAbp-GcOx4XX@ckZARKGP|<&9`mMB_|$Ym=EIl^*c8zH%es00GwTkQEh;fn!#c; zsv-&%T}QNVzd`qC1dTBza>R!(IcQsegJ1=vMTE&b$>gCzVv^RuQiN}h#?A6xR|5$) zM97HGM4s^6kr#qUlGn(im#db6?4mzr56n3~rA4FS%AlY%xQF~NmN6_N3RCz&f1xI5 zAutWp1=$10QzEd|k^(<0*eW*t#{J02NboFPa;;QOMpoI%nTEWz>Qf88XX0~4DYZ`r zLQ@%l-egd1hWB)J`%mkE)YC-9srn>B+OED54ee+Rq7HiH-&{vs)$_X#XO~#&HEJe1 zmr?ULAyOd!b4BE)W8!dpQ9lKR(2&m>u}K0kdftyaE+`5QXS`|*7TjgV1Wr+GQR!|& zCHRaye4;h2B4agF!4vjm&1W9gsa$)!(k-fFP*U$U_h@&_bVyO{fE!evlL}M0IZg>k zALKmHl`GtQo46P0n`iU66sDV{I8fL?8ud8j%EZPKHJW8cb73F(@Z8)l_%My3$im=yok+Qgr~wsnNEDn;3KfuqbTd9UnxxQut^EnGU-5_S_V z4Twf&iYY-SsS^QAEj4c@f0ReVJ2pBwk%p5KmGY4Z{DsB<%YNh~H06RkC+1mRlK%wF z7v$8*G!=F9YzJ9ir)OzN$zlci7I>`^>g@8C@UKEsQ)jd8(_?gGz-y2Nz23*N6X)(F zp@@vvmkjq#)jDi5{;)aS0>S$t-o^W|$G9>dx8yM2j7j9{9~$JoiW8G^jv-(!5H>HR zbLYSKOg=d|;S%;Z^GJ0?vk7%i zXr5M*UCMqRKDF_O2-c9;NiZl?)pNzs6bc!6v}<(_Y#0(4YT%=gL5zqJGAHNAu4dLe zn#h|Os`=Do?&EyH`jUD{Rw*cKFs$`%ddSlR&=CvGFswgshOJ5so3H^eOwF?KDicyf z&)z)7Ol(seL^o-3Ko0We?r^oJQ*#kzUW!hDn*K55*Ekh zpX;uxo-sIRt$X66Hbtd2rK_e=&SHG*mc+|W-+Wlb3{4G=)ziBORk~9)B5I5TBbWnd zYz=<=pj)WG|A0-DHOL?m(6WrOKtL5>;zkbedFCmhO_WQGDpb%{JnmIT4l3?XDHoL_ zZ2*3=2}S*GYLz;!u2GUKWgR`$+WO z7>cp5XYCp8HTe#B>*uk3OID51G>S_vQ;ZYcm>KH4GGiQ0k#rKELlDJrEFwLU?D42* z);aN6lJiX8OtRENEm}^jK&uEJbgqhznNgx+TU`N#CH(U^H$xIg)tG1#$NyOP3}S z9^pg^t4S^iuM;BV zlIpWz+-t~wN+33iOyY}!+Xo+{8=40(2gnKfeYjA$ zQl*TVoN8|QpGTF0#K{J!Ql>8;F8w@J0S}n1(f#rAP4Zw8_j0oX_nTJP2ZwdY| z#yKQx9*OrbCBW8cfc+sQC=I$m0ep~}RUE7E;}Mxxk_OqCe8CLm{(OpKoY+exlWd&> zLa^wffwYON-W~$Kz`y?1#V5aiG&M!&d|tp0dcmya&uCR-B8B)B&w+F3-ze$99iCjw z5WI_I%vx|Hp;S!e9R1TN$ZF=I7Id_tcm( zDy<*)k7mg_3F>A8Nhi6^!gXz0@L^j*VyPLEoc1%H7qB}ClBX|+-FV)#J3P+BJ7@iV zeWLa+bcUx_SvSoA6X6T!1~(akV#oVs^exYq2A2xf^sD{GMBeX`CGe$-5V#S^313_X z!WV?tX7F5~j2~xX$Q6&?;pZxi)TC7sC1CSx1}n^*c&JmNOjX*R=ZjwEixve` zrWv%b&)JuH%%ntQQX#XtA15tznr*tySk=_7BLE%{S@ZVX&L!A&VQg#XPozMAIX zs>&;vl56`ToTYWcwjGCCZD1e?0_OC0s!*~O=&!H07rUy`u5<_H>fFF4pM;uqlHU+0 zMIKH3`LWzj%f?x&$munF(@dLe1mue2Bw=Lr3%{pBSHZ2&I>UFKg2Xg-x`Z55=GqkO z>c=0C+p85`^a>bC3NlhNEh{=;SMK(iGggm%;g14B0&tlq$XFgQwKnJHS4C+cK`t|o z>Bc`}SKM4jJ{o0AJCLLqoN)}GBWm9~Xv9Q#72ForI=XMb|K$1+wg^HAmP_^dxA!OB zIB?)FV?t@E=Qt35Q{ZxgxDZgBLPa6Rk2KIs3bv7qWM|P>L3LIU6Bsp z<{HT+^O2p^N;Qm1>O0SGBVfDM*0q@^J~+ACS7F&`6pICw5Tk%Rs8^FBk}KHFpM&Qv zK|O3%RpGhm;?WpP9K5JacqqtC#$*o~E^i@Kt9-_)!h6&ct@pDarQ0q{n)$1znJdl_ zo0%-E5kt@>VK7DqoW>t*TwsvBA5fX0TMX*Pq3zUXWsYF>25?Es;mMgNof#Ps7Ge2H z5*GPKmunS=q1n)NI$$t*<0u}nr^f>8H|*}Y8YXv)qqRS>7-2n8r}xe(xKf9-W-9c` zGUq9rGlMS2y>$kVXQH@xy$ut%lNH9bxd4y<%S0MeZ5Z z-q9u0Q%ls}=47T&Qx=H5Mq!>+O^;_!kDg+y?kH$#YO_3@jze#pIegh9%PWD5M^LNW z2lp~9O(w7o$kc|Zw`!^HHYB%K?OD2UpFJN@=het%o&D!6#qc+Mf`1E*d&MWL@|(J- z?SmCgmPpf#IG7|E)f>#0U?L1tq7?E!VL>b&*Y>e}L98#OVFreE^D=yt&FNlWX5O|u zfsL*hAAOMyCitR06sZHtAH7AmvO-={*Z32-TmxE2 z@#Ir+`F-Xary!r_fy{_f1QD0)o!98diTMkqf&{sN#1sLa9ww_wi0xf1q}RQ?)RQal zLgvS;PvWUT;y7BuG}fYs@Mw*4oE#-#2BTIu6s;)%1jf>oNSE@<%v^08EE0+sYrvrW z+TZIH%FRhxgBCX0keIb{kau6igMkNQMa4m*dBgoGA z1i*yl^1&T7oWJ>JpBk>yHzRobBB}A@m%D2f?3WpNEj(hA{7nO5^A@ZRq!iTtR*(**!@d+Aufr$=Q3RB2%DMbn<> zV51*^U~e`j{DXXoQHsf}5$X8+ifM9HAm!=Joc=wQd!9n!m)*HC?PkW+r)+1I47;X% zy{)^Ad*qj+!_l}lYmIcHNH)^_xHPNpNePapbUf+1BpHf`x74eJk-)SchY+IYnf1%% zX6?Hdx93By^-S-o(R`$K?dIJ^?W)~|&qLMJ;Oflg%ScO`4s#yw>o`7SpM%{|f9}oB z{mUHt7{U5ma>mQ^&&PTf>K`RDey*;%Hvty_c>!Gs`?)}<7hx`-T?oV%nEeU97Jpc3H95U2Q(H)yn{w?FXWsT1(XQW zo_U{X@&u$6ARS>c9BTw7z67n+7cD;CJ!UDqlfUCXmyIoejDHb)PN3{X)&ZzHF?y2t zAeoOVxmY0a#eM?0iQ1sH%RQC7vET)&?2Wl|rSfEcfWHEU zEJz+7MO`SSJwRj`f7?Wx@!u*lc7o7~Sr&p%tbps62Pjn1wa zwp~(2coo>ua{(O^3Q;&v#2r$73O@&wT9K#D%AS_|0P*OAy&Hsm%p)4cG_}Jx7=X1# zY>#$y*k)!vv<L-T{}8@Y{1!~W;UC;3u--M!r_mV6tGcU@r8BS1>6uxcNA3#7}J zzWN3Fk>rtSc4(v>MC}ox`6ne~sCn?q_Js9>*Nwd^jl1k^ak?TS-i%K=9^|DBWcz8r zjt{!)mck~O6YQ?VgZi6x+9xK%M7DbPeT`vrn$G<2GUxK^QhqJZacE0&>&1*_WF*DKy;kcfONKI8J~c8!)RbZ`I7v#3KF)00e$rQt@#%(Pwm zjkz3yJ;O~Cof~ggG=AU*@-x&2MmR^Dw6)cqUjBNKXZBBp!Y>-6mB_l>EIxfVKELlC zY!+pPCa;sg48qw@q8?&jl4<^cUkYT?j|l0K|@GEXWKe&7# zFU+r+?LVM-!+m3f2Yn0z!*H;F@C>BVkx&DFw=z6r*qKpnWo6JhATkY2BX|hKGl!=a zP2<`i;2Kb`wOk^7Abs#Yi$1$P-{1}Xb|#{mtl498ZgZ@2hIUXC~14yQ-;B>HCUR!yGT?#~A6b*K-{ zvadZ@QY^U8D5?e*mE~;>{#?L+!+UyjGsB z%V?S=Z0G6DegjH#(jS;(&|bp`D|32E(;;xrI`L&7^$*QV&$>fk_A1x~;jbA&uKuQeimrAlZ&ZNQa1Bw~r^i&FW-^TRAd8kMh)KcpK4lBb@)as#}TjZrMC%7VPcpzuLGkq`X*7%)+f5cy8J*|V{e(|M8F=VLxh(Okk68&o>;kT?pV{-&1$}WfIJO1uvVeU0 z=0kj*r!`274XAq%txvcP@J8!i_wK*(2cjv09PryfS5t9 zby`c#egQ5+Gogr9Lz%Ye<8<#)B`>$HD*X=lIabrxk&|2cB+eu(@+3Lx z(X#-wan{=XPUawLBinS>;oV5J2)!PC>cr5W64Mps#92KjstVUxyvD^Fd+ta#!}8W% z;s_QYk@=;$XKgEe@}V^Wmt>P~dlhCfm(<&;R;M=5`h`qb+xd6bpLLX8IAGH;mV zBdwG_G`^a!oX>mj@%HCzn+m!{nlcQ&7v8$=Qo7Ol&;%|To$TrT}L1xO0V6PDq$*vF}ytrF0 zEaUObKsJ_iV=Y2#l^p~}#}Te333ty^C;3k+saOelfON615^93a{TT&RZlC@8;S`Z| z6A~Z5H%KB?VFFwz_~GayIn z40KVuvoaoNPaf6crA0Sc1E?CpS>~kqu|bYJcY;wa)eZd7`#3aLvS3c&{Q!igI<3bDMx8WSSs(GNgK)w*(w!ZU7bA%9Met++|F?%#yF$JPCMdmhl$RJ zJDv8jyNZxRue0YaSa3$_aQACF;l+F0Bvb|3BoZZXQjiSH-U%qZECca~JAo=PPC*ME z`277lniv}S>>z_yo)HrR6|({(e#9r!*gINUg`#-_XFw$Ongm1_OEj%etcX>vXg(`a z&9u%Tghlx+z1&@@sR^|vE zunY)p2T*VZYO^#~&>a)(O;3B!j;e3?3GrFpXaHlpj)>kMDcv#HFFhzs1MaMumlq)r7ZBj3#@Xv|o@jx(NMxo?A9kkw0G5P|zf5O^42fS4L6{l1 zG5bg&K09Ix+5n@w7!gZV&I<6GJ+Strh74{oSyLWy{FlT^99?iL6oJTA2AY78tyb59 z_!_q)fsNS;je&k9#3P~A%LBE`aSWS_W2^wV=VR{m^ULP?q9F4S7 zmki;P7Db8yDn^nL9@iwm3KR@^W2x`p=xNfU52+ohz+XEBu2I6fgo}QiIhuH$e;kfV za&!mkx5-`l_+g9-c~%)TO~{pMU}bi3OSVa>Ihkqcm(b!L&X0bk86Y9ed zxe^+GQ>HTKb1MoP8}Np_$Xeu(o1P2(thQ0SFOg}?3eB&YU5s9dMR#{T=GXPs2X|@p zNa_4?aG#jQE3g|GTrH}Cxz?{Q%tckr*AeLhlC=>(Y}zA*cV>xA&JZo4E2CVI?Swv% zpv1_0N>s_Z@NV#X^;=|0U*QTZ$)6A;IlqVTAf|p!B?4p<&LYVdeGGaLEs>V0g{Ri}8FxiP$A`WX zb0BySxp@aLy_7D;!q=GOt2yI&V^J<4EeJojT~I*6NdqJjq6M)+d~kxuc}KqA6|}Wu zGlNtriqAzX>rW^v(kUI5ee%9oA6R}7)jh5!qq0%dc-kB7bla5nGA47Xs3|%(YL4}W ze}e@Y?i$M%F3MC1m;7cm&x{4?zoZ?HVZ6`(y6mEa4ix-JPtF(JxDOSg2!jl_$U9Q=2nFvGw2m8L>XyTU~eAd*|8aRP!9@{ z$YC6=0a6G`h&`}gzCnI~2T{PAd6)=F8*mxgKQPM&efw8pnr}caIx7Ea`|^ttagq1Z zm*oqg7(BrbcF#_NFqBF1#dJzPJmn96b|^oXDQ>a;R4F3J7W6sj<5hL|msM+b@V4Bh z?NsTw{GZD(FKi)nFbt;`;cLIO&W{Pi4uP2U%{5@IMUGV zF$>`sbLFx}WAOrtNXTlk)GVkgam9|Om=)=4R2V!o1FgQ+nyumQ0w#cg64Qu3gbS($ ziNozHg#XAhRX3&wDE(844<-ir7M$_96MA~++NwL%>DBFPCE zBtjk_gyfHp>kF7}CZ6mh;icyy$4 z$Jl4WSm&IP%_C+;>=P<%VSxvy%kq+x@q4PcV>zaIZoe;I?F?F8B$%My!Xju1b-_%+ zjJ)uHh?5`L7c9wNM&#y!C;lg{qP0Iy?p5A9^Wj#rJAvf5&uoeRF!YQ{VosnFdvQzw z4HED*EApro^kZxB!;sP!8YL3+LwQkf^}HX7_B~_MWiOLb7p&OFr|3r#^UmS7&PHB2Py$tj~kixfkk^feu@dqofn3|gj{zW8B<^mG+}m*#V(7I zqDE^1h-!;zzbdRaWcuFoQkd6;V=g3~t`X_Y0+a5jDA}zLNLF@DhQeM%lm(r_r%j0d z(`2vt)L%4@1XeuH;9-~~cb5V-6jr;)8fVFWQ?z|#__wL~RZA|VthqKN68 zh)IYU(>||*glM{S^a=?*9Y{|s^o*DYQxu2d2Z%wplBW&1GiFU692PesxzgsO*(h_U zhJiN5 z5ekTd!lG`8qEALcyTV&P1b{IhWE1!vcD}BtR@;aNjKT{YGWy z0~nZcGU%_S1L`x}%-8@M8PR}PNRR*}0s!S9Jg@*3BFp?m8J*e(NXGfFVn7^fzatmI zD5j$nGlu}2CY~4=-to_b^vXOPsy2wNr4NDuI62cb-Ze#j>?;&7p*QO|!HIXsz%A0&8x8#$tNwpl48U^-W#wSKDwbMn<_NFp>9g&9v zT6MEU;TSzgn?#@=k6c-tY-vxExK+pK2Tg>)??vjn;sJJg%MgLeUzyaIlIRRn6-t7BW^(z!_#%CEH5nnPnkJw2N@k6wevhdsl_Kbhl3aqU@34F_V;kByUgt z&fG)x1k=j<6^ws8{?_=@Pfyy7Nk_c`F#KI*EA$Q*!?;r z4j0_v7r|Gy5Mi)EwR-e_Hn&<61jFdWlK3_=1(go6#xRR5Sxo1Rsu|6;j^wdys~muw zh~3lu$denbu^N=OCDOY_9h{pujA1_y3d8 z{XabI|4_PF*%{dXOX~iA8TbF8YX8H>|G$#D|6$Aj_a_6o3o~{@063sMZuxy<_wxHC za@!Kh#Xq`EYw@X9z)@ycTdPx&{xv z*Pa!9oHH24v5;4FfHBGMD?;o#=w>hU9}0H#VC=7I@FU?i`iZK}&_=-`6%)JFR1 zySl(ox&~T?zr>VbE)LFq0Pb@M07e9GF2nOfz!_%e34lU=Nz2Q-q5&2Fej#B|06!EY z41jR19EsE~MrKb9R8sq3oQI0ML1joF^QK39%q5_9OPe*^{1?`5|E8^L7Q8 zn<~=5&fe=>&3To97t+NTAo`>KSwZz@;cTp!zyU}I;5@p3{%@fe&SEy4S?phd`JvC- z#_K#dgQ15v@(i@|T6vf!@;v&(e*@B4um%Ntn8!~XG+}>~!Qel_Is9b;0N_7kZYZHJ zVKy94b^xG>14<6%>F~4S`~gfH5MK747&t-y&p(2KPytav5n&-=fDi}-_^*plC{zFh z5)lPM&jrtfAtE9IqEHA3iiz3q=jA`Ni2SwXto7VrFh=ZZgmO5GMgqX-JPTIxXLJql z?{oSyg2b;t9Df_pR9js$2_IGa^4T2vt|EIubSJD+!xhTxXoGUQ5sm|B!BY!fIn)SNYSVvRMyK2!}o$r~??d1kG))Odj1!aRcU z8dG&k+QmB93u~BjKv-c{vE6(e=@NZwHTi#Pr3Qd1!!AW6y z*lMItUzKC3zx*iSDpNgWO#u)G=O#I;OtZ22$n9`!7zN)QxmQr#FV|n$@tD2#f8t%7 zR8>W*uJ4e=A>o-HRGP7fZ;SNFHRyivpn&yZp?+9kbIHq!)W%xXQRb3fjqiFhEa1 zH7-7gL?8MY%Z-!_&>tlwQ@S!~W!~51Z~}Mxin2RFv~++P5B2B1_E&gmgjZ)6PaX(7 zSG$Cq-M)Fn>5+9W@sN%5c!$28wN8=YwUZ^KPTc+1Tg5qE;MSbpk{s&!L$*fq90N`c$f{nMI74i->PzD`)>@Tl0O?K!*+#=?G% zZ%QzoJf{+VArUDsj{)TK^VNN;+&FhQ(U9L_~mh#qiE5)~c0`%q5GD|Eb zM+61S%0$`Ts(K>Gl*)rn?$Nztfg2x}${)OI zyWKa8rL5HW%m%@Rv{B9vgzQ9Q-@6y?8wVk$J=qe4e_n|pbN00q_%!f^FYS4{M@^X_ zE>q^|0MDt}soBexhK-B8s$wqNc-Po6RA(uqWQryG(8U}39lN}hOGW~C{|XSiyTW%WW{U>QC&OMAk{XU&h=pX$7RFf*`ic3DFA&hB^B{hLJS2GWlfH&0&JiR2FqP+BsV zfN}{;mag$TyylFb+PwUF>DcqwaMojQ6#L>)xHqf&#fL@f2TSeACEb0Ogr%&l$(77q z^u}@yn&1jb@p|zx1=0+Ca`MD&{O(n>=6S@bTX1U>9u0($x#3&bpa z3mJ?(gsK$ryx#HBmyfmJOINF?9EgjfkbVnqey*it>!cRboMB)p{3yA5#X##5`C&o# zT!)2<^n&`wd`)I1DyV^nbCr_Fhn}id9hV~Fd5h>=jtsBk4^jInGro}*R|evI7K1%D z=wA$|O^-2F55Zd+yEIjmEn@PX5kW`3dKl@!#3SkTRvMsz$jkY+14}IRC@Ag9~p-ucc zTV94AaFY-@iKarYv;9)jiplLV9=YbKYz~O1jr2%*)<1aaS$pqlcb5+h4Sh5Tms-%1 z;)VH?Z&-}}Pk}tWI^ZXAjsu@@BKVzd?Ibv-K8|5DPS&&7C_|OTF0OvCGcr68sX~ua z1$L1J+I?Tuy!$2VyS_vh-%ef+&vr6e%33>VjyuC38Fzf(Y5Ila;YvS^VVXW#+T?t@ z+t0LssfMSs!@A;>p-dX~QL_X|9f_?DR5S(ymZ+%9E*Htb*^5+ZMfphUY@5ue5MyH_ zR;lnSPY_1XC5?G1TY}Boo3>OWj^-_`gO62Ap0CtC2h`;`^`HKoSBDEE6`HF9THXM!ZCuOY&9$GejIM z`Y)1{n)d7KQPFC_3uBd}^P5W1rOo)Bm1)aPLk)w+PM;j7$Cs9QwydKLqiXq^ZzUaP z;}(5KzkW^n5t!uH7cJFkztA#WvOK^MtC_o3ZWHUrUP)4k_-+j0pVOQ1+x0$ql&uoY zym4!KihI+orK3aRs-+*&2r`>VUgDY%)m307=JP`OQ(|V4l^)_F=Tciwsvl2@qNhu{ zW>Qhh(-&P11u~BT2fUR~wZ!LL#cxMf#A;VZ467>IpO;93J+~m4b{#`Gc0|>)pY~ql zuu3=0X!mM-vK3X={NQb3xH0NwBbhVlF;(b^y{l^D(tct6#br8Ka#S~$)y4k1o%^yO0Rr7jO~iKV5q%LE24 zRXrOd7Af?p8mVe_o_3oJ!>N_HMGrNqym6gM?2Ae*x~hiW_^-;llpGfpMLl-7awQB{ z@{C)f;&PL$T^YivPz(Qi_jV>f? z0=4ycKDphD`%YK3*v!~Sl#^lLvpQY%$@2xnO|w~z=_^@EY_@hIUY;JF-B-r1-3O&3 zbh^FN6UUHIt}(`$$#C;GHZ?gO#shS&y&CR&me6FJ>p`Ep4%jW=vzq#T*;c;$%1bWZ zyrtIft5R#zzB=j0cq&%&l;;Jgl!clm3SQpL5vM6G{w`SivF5Ag$J@Oo1Hd-BFPbl7 zhX`K74y_ml%T4bAn{3pYO) zYUel2=s9VjDfWB)^@^Gk#;%(=+e|0T zx%mx2eIWBNeE;nednRTeHcqES00Km#hP5@dzkMHQ~& zpa-KbR|{C!R_4!tMdWhb&NTZvVrKLtH|a`G-T=3WM&a!~@2uPE>S|FruY%qLq-5Nq zqj+YdNuipk-|=Yf(T9%BB+9I(Z=&$Ylq1+sv8TBBA4rFA>zbz52cM(dYZN_qt$;qI zBcH_(!nW)dd^OP${-FZC1-@&Km6W1sU37NmztUBfG_zfxuOBx!?0w)|XsSQrZB!FK zrAd9ih7Ee^@2eC(W^Fj@Mccb{dJ?O2Ve{n(VtnPDAAV12@^qr#rjwI#ET8$KoRV=L zqY0c(r+&<-Lnx~+h<7fs$Dw7ET|{D|(je^j<4S1eDGKQGa$p#*Tr`0di1D5+57o(*Yc1)`sJC2Gbbd zAwoBTwGv@;M2?M&99;*@q_+LXC*z!=*@`HxO~3Y72lH&{ro<^=)iZ@S$%|Y@he~BZiFxmH$B`;7*5h9 zY@Ia8A=8MSq@Vgk+@D%Qp9nwq_9 z%Sx-CrbO6d9(aV91{H&SWb~Os;tHardkfr|1SDI>+!!PyKcgAPBXrbQ3jh;r5uiHt z6V@a8`5^*}ev`Gv6q$Q26ix2>>gojv^hpc-#GMJ%m&vF1sMkpx*kBN)j5%@=4(Q2n zx~c#2ZaE!|Jdxci=8P7wGz$lJD4Uxx>R#_mE8g1H_Nzwho4s#wN~a~qoiKUCE&d9xEAS#8v6#)!7@DKDZfzylcrsFlgtpiDYgwUE!p;m`}1K=;hyESy4B?N z?`x{Fw;;;_6s!6)oZ!0HB25<7lBcPBoEq)o6%R*4r&e6sT%SdmFW4TiaCrhfT)!vX zrYvgCfB-03lWuSnw=%vWuME6gva5$Cw+4KBFBj#eR0==rwP$o`*`f?gsc%}D+u68I zB+{Lpvj4GbzMSYunnLY`Ea6*W=6k&D4Pv70$`JMOrj!Y}IFI)9ko&JlNRyTa-1?}^1>gf|idHiLJ zB@slUu{$1@uLdi;TD`xHb3`(JM`nf`t9&wDWO5;`Z_(%e7x0)4`ThKM&#(9!0{izk z9GeYgc?&Xy`)OatuC~04tCzo?e{0g?RS=4+`oc%tQTV7UdeTpt6AkXZyh9ZC%-Zns zv-zS!L13V}&ucev$>Sc-*t56mBlK5e((KFzf;JslLSG!DxBKWV9jG6xeEFuYxwHVv zyZAaB)x_u@rrkHQ5_vz?b1ZMF(c{kQcVF=p*=SOE#4Fl^wNa3noDLPQ z9j#+cr5o@2HkJc{BI#%E%#S>Dk(2wXs*SuYu?97j3UKC+_9#fvf!TzUN>p}y*Qgz`p z9&f)g2UCdNJ&8$Y+ns#w9`$lV`1PHkcm?!~_I*iF4dEuDeOkOW>aQ-4yRln(s~0Qc zIj2zVhAM?b4Lm*aLG2DK#jkDP>0e7oLu@|q;} zp}q9DgWMY}Yc}h)uz>INr-(T15qoqBR|o`v^9)D(iT%a{3qTt;<&iqI_W@u?K7D(^FE{W8sa>6jhXSLy;+$^?R>JCDH>VKxbxhnATJRii`u^fw#HIy4 z#Re+9GE!vI*7^Hb%06X-7xf!e?&SutPcvd`lk+p(0XoiXC2qI}&%zg|uLqNr2V@cN zq8yg*3^OgnO3_)6E!{9%)_ZfSy7{RtvB(A3Yk9Ztv*^}(*$kyyTNV|BYb*QfoZ~7! ziNwq{2gE&Zte7l_GjbCP1s3Z1ThE+V*RmCr3O{KXUqSv_)loa|V7Y=JgUd8QQ+*>^S z{d2n3*?@SD=B3W2qppWNrgNv6?hI?*WxD6m0p5Am_|Sr^1?WvT-_^{WcX1|z_8up# zP5NN0!nYD>axyw9dbC8|POiQQb-$RlKmB1EyrZW>(6&oW zLkqlc-&^hOXS^75j`gb?ljQ3BQcsr`68s_rZv|Z}(x_8is0-_3i&k~4u~eoFpaFmP zXvb17IVNnEwS>6ICDJJ}??sK;=u*uZbSH|c8S@jv_N#1p_&V&8Hdu3SM(7X0Mr_^@ z*nm6(UvS@N0&`b8TQC#v2*Q15?tbPhaL~2*ddJ;`m6p1kogp{D-KLmJJ%m2Aa)w4w zJpDmBzYyUpUqMQ@?A-FZaqJqn8uzesg=wP_Dn)QxBM_n*Q!$wbFkhT4LDN%-s(XotlhKtm@7pRoRt(QuXK`|^cVH&xDBciDvJq0f2hU(4~d zaZ52MiOD%W7*Y}5nQTF*AWZ#ZI8q-oiZV&yj>v&OTuJ1sm|zbNNb_u&mqmONqBg?@ zFL`Ju1dPU0nJl3zf~w+kYF~oTA^8tz+>ISA{N+l+x>e1C=K#^itUc`PQ>le!!lq;i zCyq2tK+Uow$>76sIc@vJyL65Ts2eNDcc}RHZTZ26z{kg%vk%72qB>A}G^Pn=?f41` zC;Ju49_``bU9tmOud+p3{d2ALzK#p4AsyZWSmn3n2p6kFESKbFE%=-Rb~Oo}Q1EV) zaAcva!lVsoaF|R?^Q7)Uy)*9Dr@Q++p0~6gXUk$_#@~t4{(y{`Y1DCTQ@v19nxxip ztBL8}=9gqO7Y*a^_cwCvjH_K9KOCIKoI8CpLUk9aI|Xr7(c+Kq+e$7W0ppzD1w_+~ z_CcBB=zH|Eok-j{>0=*0yKC@T{?IBg%g&gXqr4tnt6qK@qE<;jW|~D=t|YwG6*mui z-n=2>EO3BKc>i41vZ4IJsJ3aANK{3?1 zx`Qvd_^N9g6G!J0`Sk5639pofvuoJtP$zQ>#%g>`=9t0u=&UfxnjygYp4_ARfa>$W zM>?iz7ND0U9uc=;$FqBF*I0`tq0GJd4;*RyK7Pk)5D4$&#mgO+QIpMb==n&rrSn0w zo07RLiZ)`Par(o#guJ^SeVTJG*yakgfk8dQvZdH0(32S4!`^Ld7o#V_x|g5j z2Sw4ta=7)ipExsy-1VR4%5z`#ERfG&uWebv$(vEv(Xiw2&0^19c`e(N0xj1NZ~Q)e z;d+oeey2$|=OXr@iN^zb1amFWo$6r*-mE_?U$#c$us`IvQdg{7Tv)!jBV&HF6$vLv z>`V1`*bi<5Gd;zzLtZ3o)Ks?_dgrs3Ns^!uHe8qQ#RMLBT$(UKZ@hjv;S(lC&#kP_ zjaZC@b)rGCeHoX&yli98+Rz>Lmdi02ALLT*i*!oi_;f919=z}_R7z)jaH6wEl<6Rd znE1IWr9vR52Au>Vh@)}z<}IM`4C7_x#8MBdAr5+(Z|}1T9`mb=53=lpH8V-_A=HQ{ z3b`*tV)n+Da;<%2#iy6PHPn*mxgxGq;g3d^sTt42R95-w+O%5OGBA7!Izl-n!OT<% z4A@zf5Vs9pRzCsxti>C};*(CfzUSB`uK*$VqvZ)^n5goL>?5{qNz{`;QM$EHH4Lws z8!~quu<^BhTXfIj)MqWYSk8Gnc`Q64(RZtK;T8RU7r~)mB?+^2g7~)-d^I+VZ`W zefr7+F5ir!p?9ddH3O|=W#Xc@r`D@b(zkWT-xGRU^IV0B`}weUfE;%}$UEd$b>YEs zv89<04}qjkw%;zx#1!87y8af2ONPXZNO@4_U4cK%(r&2Uvz7(%ld*_hS+T_v{$+O- zay_#6M`7#uha$=9rS$%!Y`Z3MG0`CITM2kmaR;?>Sf8JSYp<4WwZ>ntT?LMsJ%D=J zWfxj*vhU6lm-H>Ws?Lu`Fs;UwWgcQ_v#sBdjl$Ll3cJYHN*Ojxv%9JYS|HP8^|{0= zYCgHeWOvWrCwBMxE8lOhHbN+swhB~}Wd5-|e8xGeLfrZhH{V7Es?TGs;rd1HU2QcwKJ60SpK{671D(;i(dbmsH>zwTA%qrG=fh>E}zF(#8bOWLcuYcvY&qB4Jy!KlY^LQa&qt+IRo+C@TD+5yO^?|-RP9wb z0t{Ulm$5h{w`n=u;X4j0wgc3=(oRHsQBMavIDPFnS1)8S(c)tCIi`CWchp|6*V(>J zp*~Xgb_2|TIe8~*aR>V_g89%bPsB~3Lv<#yE>ff!TactFpU%9*o37zUc9CjB>|{L@^LNVxUySBj1*e^0)K4vdI%> zLUR!FXd=Wdc<@z`UYvINph!(jKMs_BkPU6Mi%JLKnOcpMSD&Wqs$`j(`_w@iKPhdeACXmiG zIbEJ3&!{dx>)^`V>v*I42HLC=?buv3O=>vOw?aj3H1i1wVc2)!Dzq7vdmHlg0aNRZ zk1x~|whs>|4j4@vl@0c+m)jdxRjW_vs>Drpy#`OjxzUH^t z^4276wLT*M*px2ugjY*ML78Cupn!*7AvA<@DsQnZTRjL9{l=jESRHkNy?RmFX;5mg zu(rVHVI00W7FpYH{?}w#o6uGJ(2_d2dFMWut8!UzY#wh-)a+Ls^?K9xu@EuZu?hay zZ_T`z&AcXFEC^e8dGZ%qkuzOje4PFDE+5w}w_j1B1kPRCu}#}C^%o@_#ZG+*gha9A zG3pulUpbTS$HgtCZO5`{jsz zUqbH<{yv{$z{F_0wlvEHwDIW|zLQA4V1>wD7VC&T*!o3@X=cU1Ptg5{vg7LVlfW{^ zM@ON3Sueln&~k6Bbyppt2F4`SUnS|S#k`(SSsc^b6}dQe$(w@ZGehMI zV${6@l5R2{1miIG{S`t9B-Bh)V*2K?AJO+W=$RV>-R43susPKX46i9Y=(XKvPk5qm zrRQqW1Esm!IbI34i4X_s;Y;b7TOD7$Zy1$vUbBj;dlvh7LgdtSg4%g%ou^-g8T59S zy~GvY_%wzuk_qzGKfQq0F=#|eZ8OIG-L9+WK%yM+`xSn;>H^*r1aK$t(wbWOpJr6t0t(iBT(dSdz?S&i_|y?yxm9YyNmR=-eegN?tNkofhOw!<$-q)^IbK<|^xc}YP+E#f2`q7J)-m(6%oJR0IcBe!^^BCpq59{=W*xl*v!zsbhO17Sc1hdQz z>wNP`6(JSrEx01;f;2jD=`EH0Rmd1cZI{rh9_5;jLE(Z}{-h!+mx{cZs6Nb5orrSI zj~44LBUj|9kvU_WJHEv;vXOSXE}VC&*XsGNa=L67%DmjZyJ6Sq#$kRl?t(s?6XCpR zcVMt&;Ao!umVGIu+v@UL%}=@4ZBf}bb-q;SzMNK(>2M+Kw9dU64Xe@0Xg#6yNk`>1 zy_=iy3d?d^5Uoi@kqaQbJX}2YZqu25BZ>-%W8oCWHs&NShGa$Z8@}(ee*SLGy+}x4 zi`1+ktS>f0FKh@Ck79~)*o07lTY0k~Zws02r%6i2HlA|wp(inTA*p9e?TJ@%Q&`K2 z4~_emls0q{BYw+@5y@enQWYXjhl|t)c~yd#cZSkKFApFP+5+NL5aOOWxV29vd~%)W z{@w9vf`gGvwt$>TfPCB=yDtZtJBF893CB~9C33U#OK(Xv?U@E>&^sp+=GeNwsXo;h zKE5}`D-@@2s|+eQ&P?YVyF=g~s_*KFo}PNFb&{>%k|y{g;yO=O~*+G?*%pDUsa^k9bg?IKrH6XO{A5&`gu7E&Bx@ZlXbsvYVW(o5E)J z#KOYDFn4Da${QwKC1k)7J2h3w=XBpI8L-ql0jrJS0vVx~_){xAM zJ1bi&+bg?_--+El7S)zIjX%3ER;|8Vtz4~UDlepidoPp+yT08nHAeB8vtDTe4i{_s z^4c7gFAf`5#p|U%e$EABhne8msZvs_Q{fOZ5;uaX?|K~-@)%p>xn*4vWG#!kVH5~z^>~GXo@LUBFWL<{V^g67EKU|$@@$AP!T~WW0=)4 zXZP1P)Et+_7uDX^InAI)3Ps5UV&T_AMX22euIIV*!JlbQ;?2n@psA-6!cJu=MS=51 z;uG{Tp;LNv-WM%tZy@g>vrE9!jX1C>sPLi(Hm;9lf5z}Bj=X?Ov#fG;;{x{+Zx(r$ zaPf{*7v$bi1gY-VQ|dvJ=-Yq8@;YO){K2ym6@~uFYXkm0vCL2A%b$s4{w{aQUubp- zofw)OrPA)rX1@DMipy_=lXJO8(ESmMJGdZH0ev!}nefy42E~Gb1X=Me%SRIH7QSo6 zcN-=`?@KB7wG;-wu(+*A=O&nJB*e`&6WSrbHFZU2I_NQ*_T#}=A+q(j2kCHAn5Pk{ zfgcy^gqDT?t}=Xc%wK%*^4cvoc?MYTXIh#?ud(R$QXBb6r8i549ciyt6PHV;J-3Ox zr=rXgwWpTD-vgE-i%0V9v-)|=?-(Ce-{+SvDDl{iR}6Q28nqy6NX>YcT#@T3 zTYt)eq8E$!Lvkk9S?i6-4CPrW`3=)Ni!P)^%k(Dhn@uyLAs|g~D9hhK{9AS!h|n)0 z9psF3_0N2<|B^^&s-U5(s{Ox5r28Y|4~8!G{}GSwN3NqkNmr2H)AszyF#Ic#PC-ra zj5>GL{DU0$Czk;OB{9^r@k2UC&gWvGw$kb`BVZD29Y6hj6t)o>LFccw9Oj z2&BgudyZQTfuRgKc=`QmGVpeDb4I$H5ed&(f|$oY>v8r>f6gF=oOn(sJ8LlkSb#80 zyWePN!XQBmlMO0{Au$RG2@8OQAW%WU-!CzII03M*h?uY#07GvR!0_83LI5EVK>-XM z_KfT%ECdmN3PMGM09IDNk=A~`(f^W%h{5d#yYYXB+fUvg0QeIMe%134|DaPMU>I5? zX6$EV!t?pl|G~!mjazr-Q2xTL`)B*t`KQ!XkUyEd+5qSeh|hCZfkpl({OjaZT5;eD zjUdW!Oir=t5mrIpf~OBY-_?~r-#(aQ8n*ZtezIR(i5hWgSLnA zgJYWd4>2T#zMRwLSbOkeF@Nl2kaYJyyO47j{_xUZ(O*s)V-El5LQM6Mw&%$F^x2?4 zxU+wgeC^CE4IKQD0LTv_EXG7J3`zjx$JFV&x_SX1KNyi`l4mar0P-V!*6*$!e&fd8 zY3ALK=#Y@RA)z552?-ZfLF=KBpJW?T2vrd}*lL0}#Cbk|1UQ*ZH$D}40^li;s^~bu z#mwNa&Oksy5T)WM9#zUU08S1YFCK#MvMz7@4Vx}4!l>CHFbE6Fo4DN*V0@z@@0ZNK>t@bS|zyTq}Y9gcs(sENk+B>NFdms(`wGH9^PH-^< ztBf>>Bw8Hp;^uVX7^ z34r+FAPf&xL`(naNO~Y_#q|}H z|F|EfB*kj)<>e+01p4~=3iv_=Ts`c7U@Fjk2nscWB6%ANgw>-vYlKlIhY5VURm{^tHrkT_K6_xt}Q`HwpOFz5e~mfvN6 z(}S53aa|8rgf|@NAq|H)c(?*ENQ(#v0=UoY%M)W@=e2eoFgJS#xaZlt@JIsx((N~O z2)MYdtA`8BOB#mxP3ZuKd0~v~Oj;87XVY);=d&lS=nD5f`=zR;DDCa-fDji&2*X81 zM1}ZmAutfXAXpT{FD3{@V%{4OgdhR|5)-sVo+GTK^?Toc(yNBTI1CgV`BN_hWFrh0 z7O~~G1%sjdg2FaXej7m%IKME=R!9s978VAH2>#UjTi-wQN&?Sbmfv2UA8+P)E#}q4 zxCP)Zzi{64>s|W4b@Atb|8Ly^qxU~I`M1FR4_yC&>)%4)-xB_hb^Qmfe+z+sOZY$5 z^?wYm-&PiwoXt|KzL<3b*`EsiZNd={zS2Rr$`9? z$hd_;)j;6lU$wv-=q>6jybfYF?|=)C^@4L++?6 zhs`K5#o(IMe<4z8;*;5?BLkOR#|B#i)&Z$x1$VhBK1EyM_Oti)%U70Vl|Mf#JLV&*;`X5_` zXG8yyAzc@S`HKgpq~Pl8?Sk?I2m*CIkO&7j=AbRc8lfFbhAU?DLvsGOKERQkV{u)-|)5lFNJ=BVr0x*ziq1_+*2{@MrlIR}EE zv#0XG923SY#eu(V9Q@c`z|@`X(*e)+CV(dAdkkQ!->ucg#wH$i!{){w1uGq0%E^4v tk)6%t7}-~dM@ZxZDHb*Xmem3N=-=S=YZriw1atmVM2zgmJ^&fX{{nA+9QObK literal 0 HcmV?d00001 diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index ce46d4ef7..cf817b328 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2340,7 +2340,9 @@ page 1 / 3`); ); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items, styles } = await pdfPage.getTextContent(); + const { items, styles } = await pdfPage.getTextContent({ + disableNormalization: true, + }); expect(items.length).toEqual(1); // Font name will be a random object id. const fontName = items[0].fontName; @@ -2376,7 +2378,9 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue13226.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect(text).toEqual( @@ -2394,7 +2398,9 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue16119.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2410,7 +2416,9 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue13201.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2436,7 +2444,9 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue11913.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2456,7 +2466,9 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue10900.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2475,11 +2487,27 @@ page 1 / 3`); const loadingTask = getDocument(buildGetDocumentParams("issue10640.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); - const text = mergeText(items); + let { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + let text = mergeText(items); + let expected = `Open Sans is a humanist sans serif typeface designed by Steve Matteson. +Open Sans was designed with an upright stress, open forms and a neu- +tral, yet friendly appearance. It was optimized for print, web, and mobile +interfaces, and has excellent legibility characteristics in its letterforms (see +figure \x81 on the following page). This font is available from the Google Font +Directory [\x81] as TrueType files licensed under the Apache License version \x82.\x80. +This package provides support for this font in LATEX. It includes Type \x81 +versions of the fonts, converted for this package using FontForge from its +sources, for full support with Dvips.`; - expect( - text.includes(`Open Sans is a humanist sans serif typeface designed by Steve Matteson. + expect(text.includes(expected)).toEqual(true); + + ({ items } = await pdfPage.getTextContent({ + disableNormalization: false, + })); + text = mergeText(items); + expected = `Open Sans is a humanist sans serif typeface designed by Steve Matteson. Open Sans was designed with an upright stress, open forms and a neu- tral, yet friendly appearance. It was optimized for print, web, and mobile interfaces, and has excellent legibility characteristics in its letterforms (see @@ -2487,8 +2515,8 @@ figure \x81 on the following page). This font is available from the Google Font Directory [\x81] as TrueType files licensed under the Apache License version \x82.\x80. This package provides support for this font in LATEX. It includes Type \x81 versions of the fonts, converted for this package using FontForge from its -sources, for full support with Dvips.`) - ).toEqual(true); +sources, for full support with Dvips.`; + expect(text.includes(expected)).toEqual(true); await loadingTask.destroy(); }); @@ -2501,7 +2529,9 @@ sources, for full support with Dvips.`) const loadingTask = getDocument(buildGetDocumentParams("bug931481.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2529,7 +2559,9 @@ sozialökonomische Gerechtigkeit.`) const loadingTask = getDocument(buildGetDocumentParams("issue9186.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect( @@ -2550,7 +2582,9 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) ); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect(text).toEqual( @@ -2568,7 +2602,9 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const loadingTask = getDocument(buildGetDocumentParams("bug1755201.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(6); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect(/win aisle/.test(text)).toEqual(false); @@ -2586,10 +2622,12 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const pdfPage = await pdfDoc.getPage(568); let { items } = await pdfPage.getTextContent({ includeMarkedContent: false, + disableNormalization: true, }); const textWithoutMC = mergeText(items); ({ items } = await pdfPage.getTextContent({ includeMarkedContent: true, + disableNormalization: true, })); const textWithMC = mergeText(items); @@ -2607,7 +2645,9 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) ); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); const text = mergeText(items); expect(text).toEqual("𠮷"); @@ -2619,7 +2659,9 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const loadingTask = getDocument(buildGetDocumentParams("issue16221.pdf")); const pdfDoc = await loadingTask.promise; const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); expect(items.map(i => i.str)).toEqual(["Hello ", "World"]); diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index a371ecbe5..c0100bd8b 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -542,7 +542,7 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[2743]], + pageMatches: [[2734]], pageMatchesLength: [[14]], }); }); @@ -561,7 +561,7 @@ describe("pdf_find_controller", function () { pageIndex: 1, matchIndex: 0, }, - pageMatches: [[], [1493]], + pageMatches: [[], [1486]], pageMatchesLength: [[], [11]], }); }); @@ -594,7 +594,7 @@ describe("pdf_find_controller", function () { [], [], [], - [2087], + [2081], ], pageMatchesLength: [ [24], @@ -629,7 +629,7 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1501]], + pageMatches: [[1497]], pageMatchesLength: [[25]], }); }); @@ -670,7 +670,7 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1946]], + pageMatches: [[1941]], pageMatchesLength: [[21]], }); }); @@ -692,7 +692,7 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1946]], + pageMatches: [[1941]], pageMatchesLength: [[23]], }); }); @@ -712,7 +712,7 @@ describe("pdf_find_controller", function () { pageIndex: 0, matchIndex: 0, }, - pageMatches: [[1946]], + pageMatches: [[1941]], pageMatchesLength: [[23]], }); }); @@ -976,4 +976,61 @@ describe("pdf_find_controller", function () { pageMatchesLength: [[5, 5]], }); }); + + it("performs a search in a text with some arabic chars in different unicode ranges but with same normalized form", async function () { + const { eventBus, pdfFindController } = await initPdfFindController( + "ArabicCIDTrueType.pdf" + ); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "\u0629", + }, + matchesPerPage: [4], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[6, 25, 44, 63]], + pageMatchesLength: [[1, 1, 1, 1]], + }); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "\ufe94", + }, + matchesPerPage: [4], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[6, 25, 44, 63]], + pageMatchesLength: [[1, 1, 1, 1]], + }); + }); + + it("performs a search in a text with some f ligatures", async function () { + const { eventBus, pdfFindController } = await initPdfFindController( + "copy_paste_ligatures.pdf" + ); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "f", + }, + matchesPerPage: [9], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[5, 6, 6, 7, 8, 9, 9, 10, 10]], + pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1]], + }); + }); }); diff --git a/test/unit/unicode_spec.js b/test/unit/unicode_spec.js index 1753ef26b..1f7de5a1d 100644 --- a/test/unit/unicode_spec.js +++ b/test/unit/unicode_spec.js @@ -15,11 +15,9 @@ import { getCharUnicodeCategory, - getNormalizedUnicodes, getUnicodeForGlyph, getUnicodeRangeFor, mapSpecialUnicodeValues, - reverseIfRtl, } from "../../src/core/unicode.js"; import { getDingbatsGlyphsUnicode, @@ -152,69 +150,12 @@ describe("unicode", function () { expect(getUnicodeRangeFor(0x0041)).toEqual(0); // fi (Alphabetic Presentation Forms) expect(getUnicodeRangeFor(0xfb01)).toEqual(62); + // Combining diacritic (Cyrillic Extended-A) + expect(getUnicodeRangeFor(0x2dff)).toEqual(9); }); it("should not get a Unicode range", function () { - expect(getUnicodeRangeFor(0x05ff)).toEqual(-1); - }); - }); - - describe("getNormalizedUnicodes", function () { - let NormalizedUnicodes; - - beforeAll(function () { - NormalizedUnicodes = getNormalizedUnicodes(); - }); - - afterAll(function () { - NormalizedUnicodes = null; - }); - - it("should get normalized Unicode values for ligatures", function () { - // fi => f + i - expect(NormalizedUnicodes["\uFB01"]).toEqual("fi"); - // Arabic - expect(NormalizedUnicodes["\u0675"]).toEqual("\u0627\u0674"); - }); - - it("should not normalize standard characters", function () { - expect(NormalizedUnicodes.A).toEqual(undefined); - }); - }); - - describe("reverseIfRtl", function () { - let NormalizedUnicodes; - - function getGlyphUnicode(char) { - if (NormalizedUnicodes[char] !== undefined) { - return NormalizedUnicodes[char]; - } - return char; - } - - beforeAll(function () { - NormalizedUnicodes = getNormalizedUnicodes(); - }); - - afterAll(function () { - NormalizedUnicodes = null; - }); - - it("should not reverse LTR characters", function () { - const A = getGlyphUnicode("A"); - expect(reverseIfRtl(A)).toEqual("A"); - - const fi = getGlyphUnicode("\uFB01"); - expect(reverseIfRtl(fi)).toEqual("fi"); - }); - - it("should reverse RTL characters", function () { - // Hebrew (no-op, since it's not a combined character) - const heAlef = getGlyphUnicode("\u05D0"); - expect(reverseIfRtl(heAlef)).toEqual("\u05D0"); - // Arabic - const arAlef = getGlyphUnicode("\u0675"); - expect(reverseIfRtl(arAlef)).toEqual("\u0674\u0627"); + expect(getUnicodeRangeFor(0xaa60)).toEqual(-1); }); }); }); diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 3e5fb4d45..f5f241407 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -18,8 +18,8 @@ /** @typedef {import("./interfaces").IPDFLinkService} IPDFLinkService */ import { binarySearchFirstItem, scrollIntoView } from "./ui_utils.js"; +import { getCharacterType, getNormalizeWithNFKC } from "./pdf_find_utils.js"; import { createPromiseCapability } from "pdfjs-lib"; -import { getCharacterType } from "./pdf_find_utils.js"; const FindState = { FOUND: 0, @@ -126,12 +126,7 @@ function normalize(text) { } else { // Compile the regular expression for text normalization once. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(""); - const toNormalizeWithNFKC = - "\u2460-\u2473" + // Circled numbers. - "\u24b6-\u24ff" + // Circled letters/numbers. - "\u3244-\u32bf" + // Circled ideograms/numbers. - "\u32d0-\u32fe" + // Circled ideograms. - "\uff00-\uffef"; // Halfwidth, fullwidth forms. + const toNormalizeWithNFKC = getNormalizeWithNFKC(); // 3040-309F: Hiragana // 30A0-30FF: Katakana @@ -840,6 +835,7 @@ class PDFFindController { } let promise = Promise.resolve(); + const textOptions = { disableNormalization: true }; for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) { const extractTextCapability = createPromiseCapability(); this._extractTextPromises[i] = extractTextCapability.promise; @@ -848,7 +844,7 @@ class PDFFindController { return this._pdfDocument .getPage(i + 1) .then(pdfPage => { - return pdfPage.getTextContent(); + return pdfPage.getTextContent(textOptions); }) .then( textContent => { diff --git a/web/pdf_find_utils.js b/web/pdf_find_utils.js index 24ec4c575..78b747706 100644 --- a/web/pdf_find_utils.js +++ b/web/pdf_find_utils.js @@ -112,4 +112,46 @@ function getCharacterType(charCode) { return CharacterType.ALPHA_LETTER; } -export { CharacterType, getCharacterType }; +let NormalizeWithNFKC; +function getNormalizeWithNFKC() { + /* eslint-disable no-irregular-whitespace */ + NormalizeWithNFKC ||= ` ¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰꟲ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`; + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + const ranges = []; + const range = []; + const diacriticsRegex = /^\p{M}$/u; + // Some chars must be replaced by their NFKC counterpart during a search. + for (let i = 0; i < 65536; i++) { + const c = String.fromCharCode(i); + if (c.normalize("NFKC") !== c && !diacriticsRegex.test(c)) { + if (range.length !== 2) { + range[0] = range[1] = i; + continue; + } + if (range[1] + 1 !== i) { + if (range[0] === range[1]) { + ranges.push(String.fromCharCode(range[0])); + } else { + ranges.push( + `${String.fromCharCode(range[0])}-${String.fromCharCode( + range[1] + )}` + ); + } + range[0] = range[1] = i; + } else { + range[1] = i; + } + } + } + if (ranges.join("") !== NormalizeWithNFKC) { + throw new Error( + "getNormalizeWithNFKC - update the `NormalizeWithNFKC` string." + ); + } + } + return NormalizeWithNFKC; +} + +export { CharacterType, getCharacterType, getNormalizeWithNFKC }; diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js index e0b0d5632..ed3f751b5 100644 --- a/web/pdf_page_view.js +++ b/web/pdf_page_view.js @@ -368,6 +368,7 @@ class PDFPageView { if (!textLayer.renderingDone) { const readableStream = pdfPage.streamTextContent({ includeMarkedContent: true, + disableNormalization: true, }); textLayer.setTextContentSource(readableStream); } diff --git a/web/pdf_viewer.js b/web/pdf_viewer.js index 9bd14f39f..e3bee5c15 100644 --- a/web/pdf_viewer.js +++ b/web/pdf_viewer.js @@ -665,6 +665,8 @@ class PDFViewer { } buffer.length = 0; const page = await this.pdfDocument.getPage(pageNum); + // By default getTextContent pass disableNormalization equals to false + // which is fine because we want a normalized string. const { items } = await page.getTextContent(); for (const item of items) { if (item.str) { diff --git a/web/text_highlighter.js b/web/text_highlighter.js index c1b828c40..41721554d 100644 --- a/web/text_highlighter.js +++ b/web/text_highlighter.js @@ -208,9 +208,20 @@ class TextHighlighter { return; } + let lastDivIdx = -1; + let lastOffset = -1; for (let i = i0; i < i1; i++) { const match = matches[i]; const begin = match.begin; + if (begin.divIdx === lastDivIdx && begin.offset === lastOffset) { + // It's possible to be in this situation if we searched for a 'f' and we + // have a ligature 'ff' in the text. The 'ff' has to be highlighted two + // times. + continue; + } + lastDivIdx = begin.divIdx; + lastOffset = begin.offset; + const end = match.end; const isSelected = isSelectedPage && i === selectedMatchIdx; const highlightSuffix = isSelected ? " selected" : ""; diff --git a/web/text_layer_builder.js b/web/text_layer_builder.js index 20fc7f67d..81a77918c 100644 --- a/web/text_layer_builder.js +++ b/web/text_layer_builder.js @@ -20,7 +20,8 @@ // eslint-disable-next-line max-len /** @typedef {import("./text_accessibility.js").TextAccessibilityManager} TextAccessibilityManager */ -import { renderTextLayer, updateTextLayer } from "pdfjs-lib"; +import { normalizeUnicode, renderTextLayer, updateTextLayer } from "pdfjs-lib"; +import { removeNullCharacters } from "./ui_utils.js"; /** * @typedef {Object} TextLayerBuilderOptions @@ -212,6 +213,16 @@ class TextLayerBuilder { } end.classList.remove("active"); }); + + div.addEventListener("copy", event => { + const selection = document.getSelection(); + event.clipboardData.setData( + "text/plain", + removeNullCharacters(normalizeUnicode(selection.toString())) + ); + event.preventDefault(); + event.stopPropagation(); + }); } }