Bidi: skip invalid Unicode character to make indexing work
For Arabic characters, the Unicode character codes are mapped to Unicode character types using the character codes for indexing. However, the character code 0x061D is undefined (and therefore invalid) in the Unicode standard. The imported list does not contain this entry, but not having it in the list breaks indexing for items after it. Therefore, put an empty string on its position to make indexing work properly and issue a warning in the unlikely event that we encounter this character.
This commit is contained in:
parent
11839f018f
commit
995be19378
@ -17,13 +17,14 @@
|
|||||||
|
|
||||||
(function (root, factory) {
|
(function (root, factory) {
|
||||||
if (typeof define === 'function' && define.amd) {
|
if (typeof define === 'function' && define.amd) {
|
||||||
define('pdfjs/core/bidi', ['exports'], factory);
|
define('pdfjs/core/bidi', ['exports', 'pdfjs/shared/util'], factory);
|
||||||
} else if (typeof exports !== 'undefined') {
|
} else if (typeof exports !== 'undefined') {
|
||||||
factory(exports);
|
factory(exports, require('../shared/util.js'));
|
||||||
} else {
|
} else {
|
||||||
factory((root.pdfjsCoreBidi = {}));
|
factory((root.pdfjsCoreBidi = {}), root.pdfjsSharedUtil);
|
||||||
}
|
}
|
||||||
}(this, function (exports) {
|
}(this, function (exports, sharedUtil) {
|
||||||
|
var warn = sharedUtil.warn;
|
||||||
|
|
||||||
// Character types for symbols from 0000 to 00FF.
|
// Character types for symbols from 0000 to 00FF.
|
||||||
// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||||
@ -52,10 +53,14 @@
|
|||||||
|
|
||||||
// Character types for symbols from 0600 to 06FF.
|
// Character types for symbols from 0600 to 06FF.
|
||||||
// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||||
|
// Note that 061D does not exist in the Unicode standard (see
|
||||||
|
// http://unicode.org/charts/PDF/U0600.pdf), so we replace it with an
|
||||||
|
// empty string and issue a warning if we encounter this character. The
|
||||||
|
// empty string is required to properly index the items after it.
|
||||||
var arabicTypes = [
|
var arabicTypes = [
|
||||||
'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'ON', 'ON', 'AL', 'ET', 'ET', 'AL',
|
'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'ON', 'ON', 'AL', 'ET', 'ET', 'AL',
|
||||||
'CS', 'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
'CS', 'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
||||||
'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', '', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
@ -144,6 +149,9 @@
|
|||||||
charType = 'R';
|
charType = 'R';
|
||||||
} else if (0x0600 <= charCode && charCode <= 0x06ff) {
|
} else if (0x0600 <= charCode && charCode <= 0x06ff) {
|
||||||
charType = arabicTypes[charCode & 0xff];
|
charType = arabicTypes[charCode & 0xff];
|
||||||
|
if (!charType) {
|
||||||
|
warn('Bidi: invalid Unicode character ' + charCode.toString(16));
|
||||||
|
}
|
||||||
} else if (0x0700 <= charCode && charCode <= 0x08AC) {
|
} else if (0x0700 <= charCode && charCode <= 0x08AC) {
|
||||||
charType = 'AL';
|
charType = 'AL';
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user