diff --git a/src/core/fonts.js b/src/core/fonts.js index 996dbf98d..246d1d630 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -2396,6 +2396,32 @@ var OpenTypeFileBuilder = (function OpenTypeFileBuilderClosure() { return OpenTypeFileBuilder; })(); +// Problematic Unicode characters in the fonts that needs to be moved to avoid +// issues when they are painted on the canvas, e.g. complex-script shaping or +// control/whitespace characters. The ranges are listed in pairs: the first item +// is a code of the first problematic code, the second one is the next +// non-problematic code. The ranges must be in sorted order. +var ProblematicCharRanges = new Int32Array([ + // Control characters. + 0x0000, 0x0020, + 0x007F, 0x00A1, + 0x00AD, 0x00AE, + // Chars that is used in complex-script shaping. + 0x0600, 0x0780, + 0x08A0, 0x10A0, + 0x1780, 0x1800, + // General punctuation chars. + 0x2000, 0x2010, + 0x2011, 0x2012, + 0x2028, 0x2030, + 0x205F, 0x2070, + 0x25CC, 0x25CD, + // Chars that is used in complex-script shaping. + 0xAA60, 0xAA80, + // Specials Unicode block. + 0xFFF0, 0x10000 +]); + /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -2679,33 +2705,18 @@ var Font = (function FontClosure() { * @return {boolean} */ function isProblematicUnicodeLocation(code) { - if (code <= 0x1F) { // Control chars - return true; + // Using binary search to find a range start. + var i = 0, j = ProblematicCharRanges.length - 1; + while (i < j) { + var c = (i + j + 1) >> 1; + if (code < ProblematicCharRanges[c]) { + j = c - 1; + } else { + i = c; + } } - if (code >= 0x80 && code <= 0x9F) { // Control chars - return true; - } - if ((code >= 0x2000 && code <= 0x200F) || // General punctuation chars - (code >= 0x2028 && code <= 0x202F) || - (code >= 0x2060 && code <= 0x206F)) { - return true; - } - if (code >= 0xFFF0 && code <= 0xFFFF) { // Specials Unicode block - return true; - } - switch (code) { - case 0x7F: // Control char - case 0xA0: // Non breaking space - case 0xAD: // Soft hyphen - case 0x2011: // Non breaking hyphen - case 0x205F: // Medium mathematical space - case 0x25CC: // Dotted circle (combining mark) - return true; - } - if ((code & ~0xFF) === 0x0E00) { // Thai/Lao chars (with combining mark) - return true; - } - return false; + // Even index means code in problematic range. + return !(i & 1); } /** diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 70e09390b..ca614c71c 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -130,6 +130,7 @@ !issue4800.pdf !issue4801.pdf !issue5334.pdf +!issue5540.pdf !issue5549.pdf !issue5475.pdf !annotation-border-styles.pdf diff --git a/test/pdfs/issue5540.pdf b/test/pdfs/issue5540.pdf new file mode 100644 index 000000000..3c62879a7 Binary files /dev/null and b/test/pdfs/issue5540.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index f6137dc61..855d03273 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1549,6 +1549,12 @@ "rounds": 1, "type": "load" }, + { "id": "issue5540", + "file": "pdfs/issue5540.pdf", + "md5": "12b69b19e366232422812ad8b2534f37", + "rounds": 1, + "type": "eq" + }, { "id": "issue2176", "file": "pdfs/issue2176.pdf", "md5": "ca5cbbc7e2b717997f0b24ffa485eac6",