create bidi closure
This commit is contained in:
parent
e8ca7b44d9
commit
3b29736882
630
src/bidi.js
630
src/bidi.js
@ -3,55 +3,56 @@
|
|||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
// Character types for symbols from 0000 to 00FF.
|
var bidi = (function bidiClosure() {
|
||||||
var baseTypes = [
|
// Character types for symbols from 0000 to 00FF.
|
||||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'S', 'B', 'S', 'WS',
|
var baseTypes = [
|
||||||
'B', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'S', 'B', 'S', 'WS',
|
||||||
'BN', 'BN', 'B', 'B', 'B', 'S', 'WS', 'ON', 'ON', 'ET', 'ET', 'ET', 'ON',
|
'B', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||||
'ON', 'ON', 'ON', 'ON', 'ON', 'CS', 'ON', 'CS', 'ON', 'EN', 'EN', 'EN', 'EN',
|
'BN', 'BN', 'B', 'B', 'B', 'S', 'WS', 'ON', 'ON', 'ET', 'ET', 'ET', 'ON',
|
||||||
'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'ON', 'ON', 'ON', 'ON', 'ON', 'ON', 'ON',
|
'ON', 'ON', 'ON', 'ON', 'ON', 'CS', 'ON', 'CS', 'ON', 'EN', 'EN', 'EN',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'ON', 'ON', 'ON', 'ON', 'ON',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'ON', 'ON', 'ON',
|
'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'ON',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'ON',
|
'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'ON', 'ON', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'B', 'BN', 'BN', 'BN', 'BN',
|
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
'L', 'ON', 'ON', 'ON', 'ON', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'B', 'BN',
|
||||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'CS', 'ON', 'ET', 'ET',
|
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||||
'ET', 'ET', 'ON', 'ON', 'ON', 'ON', 'L', 'ON', 'ON', 'ON', 'ON', 'ON', 'ET',
|
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||||
'ET', 'EN', 'EN', 'ON', 'L', 'ON', 'ON', 'ON', 'EN', 'L', 'ON', 'ON', 'ON',
|
'BN', 'CS', 'ON', 'ET', 'ET', 'ET', 'ET', 'ON', 'ON', 'ON', 'ON', 'L', 'ON',
|
||||||
'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
'ON', 'ON', 'ON', 'ON', 'ET', 'ET', 'EN', 'EN', 'ON', 'L', 'ON', 'ON', 'ON',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'L', 'L', 'L', 'L',
|
'EN', 'L', 'ON', 'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'L', 'L',
|
'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
'L', 'L', 'L', 'L', 'L', 'L'
|
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||||
];
|
'L', 'L', 'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L'
|
||||||
|
];
|
||||||
|
|
||||||
// Character types for symbols from 0600 to 06FF
|
// Character types for symbols from 0600 to 06FF
|
||||||
var arabicTypes = [
|
var arabicTypes = [
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'CS',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL',
|
'CS', 'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'NSM',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
'AL', 'AL', 'AL', 'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
||||||
'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AN', 'AN', 'AN',
|
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'ET', 'AN', 'AN', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN',
|
||||||
'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AN', 'ET', 'AN', 'AN', 'AL', 'AL', 'AL', 'NSM', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'NSM',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'ON', 'NSM', 'NSM', 'NSM',
|
'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
||||||
'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'ON', 'NSM',
|
||||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL'
|
'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||||
];
|
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL'
|
||||||
|
];
|
||||||
|
|
||||||
function bidi(text, startLevel) {
|
|
||||||
function isOdd(i) {
|
function isOdd(i) {
|
||||||
return (i & 1) != 0;
|
return (i & 1) != 0;
|
||||||
}
|
}
|
||||||
@ -123,307 +124,310 @@ function bidi(text, startLevel) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var str = text.str;
|
return (function bidi(text, startLevel) {
|
||||||
var strLength = str.length;
|
var str = text.str;
|
||||||
if (strLength == 0)
|
var strLength = str.length;
|
||||||
return str;
|
if (strLength == 0)
|
||||||
|
return str;
|
||||||
|
|
||||||
// get types, fill arrays
|
// get types, fill arrays
|
||||||
|
|
||||||
var chars = new Array(strLength);
|
var chars = new Array(strLength);
|
||||||
var types = new Array(strLength);
|
var types = new Array(strLength);
|
||||||
var oldtypes = new Array(strLength);
|
var oldtypes = new Array(strLength);
|
||||||
var numBidi = 0;
|
var numBidi = 0;
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
for (var i = 0; i < strLength; ++i) {
|
||||||
chars[i] = str.charAt(i);
|
chars[i] = str.charAt(i);
|
||||||
|
|
||||||
var charCode = str.charCodeAt(i);
|
var charCode = str.charCodeAt(i);
|
||||||
var charType = 'L';
|
var charType = 'L';
|
||||||
if (charCode <= 0x00ff)
|
if (charCode <= 0x00ff)
|
||||||
charType = baseTypes[charCode];
|
charType = baseTypes[charCode];
|
||||||
else if (0x0590 <= charCode && charCode <= 0x05f4)
|
else if (0x0590 <= charCode && charCode <= 0x05f4)
|
||||||
charType = 'R';
|
charType = 'R';
|
||||||
else if (0x0600 <= charCode && charCode <= 0x06ff)
|
else if (0x0600 <= charCode && charCode <= 0x06ff)
|
||||||
charType = arabicTypes[charCode & 0xff];
|
charType = arabicTypes[charCode & 0xff];
|
||||||
else if (0x0700 <= charCode && charCode <= 0x08AC)
|
else if (0x0700 <= charCode && charCode <= 0x08AC)
|
||||||
charType = 'AL';
|
charType = 'AL';
|
||||||
|
|
||||||
if (charType == 'R' || charType == 'AL' || charType == 'AN')
|
if (charType == 'R' || charType == 'AL' || charType == 'AN')
|
||||||
numBidi++;
|
numBidi++;
|
||||||
|
|
||||||
oldtypes[i] = types[i] = charType;
|
oldtypes[i] = types[i] = charType;
|
||||||
}
|
}
|
||||||
|
|
||||||
// detect the bidi method
|
// detect the bidi method
|
||||||
// if there are no rtl characters then no bidi needed
|
// if there are no rtl characters then no bidi needed
|
||||||
// if less than 30% chars are rtl then string is primarily ltr
|
// if less than 30% chars are rtl then string is primarily ltr
|
||||||
// if more than 30% chars are rtl then string is primarily rtl
|
// if more than 30% chars are rtl then string is primarily rtl
|
||||||
if (numBidi == 0) {
|
if (numBidi == 0) {
|
||||||
text.direction = 'ltr';
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (startLevel == -1) {
|
|
||||||
if ((strLength / numBidi) < 0.3) {
|
|
||||||
text.direction = 'ltr';
|
text.direction = 'ltr';
|
||||||
startLevel = 0;
|
return str;
|
||||||
} else {
|
|
||||||
text.direction = 'rtl';
|
|
||||||
startLevel = 1;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
var levels = new Array(strLength);
|
if (startLevel == -1) {
|
||||||
|
if ((strLength / numBidi) < 0.3) {
|
||||||
for (var i = 0; i < strLength; ++i) {
|
text.direction = 'ltr';
|
||||||
levels[i] = startLevel;
|
startLevel = 0;
|
||||||
}
|
} else {
|
||||||
|
text.direction = 'rtl';
|
||||||
var diffChars = new Array(strLength);
|
startLevel = 1;
|
||||||
var diffLevels = new Array(strLength);
|
|
||||||
var diffTypes = new Array(strLength);
|
|
||||||
|
|
||||||
/*
|
|
||||||
X1-X10: skip most of this, since we are NOT doing the embeddings.
|
|
||||||
*/
|
|
||||||
|
|
||||||
var e = isOdd(startLevel) ? 'R' : 'L';
|
|
||||||
var sor = e;
|
|
||||||
var eor = sor;
|
|
||||||
|
|
||||||
/*
|
|
||||||
W1. Examine each non-spacing mark (NSM) in the level run, and change the type
|
|
||||||
of the NSM to the type of the previous character. If the NSM is at the start
|
|
||||||
of the level run, it will get the type of sor.
|
|
||||||
*/
|
|
||||||
|
|
||||||
var lastType = sor;
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
if (types[i] == 'NSM')
|
|
||||||
types[i] = lastType;
|
|
||||||
else
|
|
||||||
lastType = types[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
W2. Search backwards from each instance of a European number until the first
|
|
||||||
strong type (R, L, AL, or sor) is found. If an AL is found, change the type
|
|
||||||
of the European number to Arabic number.
|
|
||||||
*/
|
|
||||||
|
|
||||||
var lastType = sor;
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
var t = types[i];
|
|
||||||
if (t == 'EN')
|
|
||||||
types[i] = (lastType == 'AL') ? 'AN' : 'EN';
|
|
||||||
else if (t == 'R' || t == 'L' || t == 'AL')
|
|
||||||
lastType = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
W3. Change all ALs to R.
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
var t = types[i];
|
|
||||||
if (t == 'AL')
|
|
||||||
types[i] = 'R';
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
W4. A single European separator between two European numbers changes to a
|
|
||||||
European number. A single common separator between two numbers of the same
|
|
||||||
type changes to that type:
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 1; i < strLength - 1; ++i) {
|
|
||||||
if (types[i] == 'ES' && types[i - 1] == 'EN' && types[i + 1] == 'EN')
|
|
||||||
types[i] = 'EN';
|
|
||||||
if (types[i] == 'CS' && (types[i - 1] == 'EN' || types[i - 1] == 'AN') &&
|
|
||||||
types[i + 1] == types[i - 1])
|
|
||||||
types[i] = types[i - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
W5. A sequence of European terminators adjacent to European numbers changes
|
|
||||||
to all European numbers:
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
if (types[i] == 'EN') {
|
|
||||||
// do before
|
|
||||||
for (var j = i - 1; j >= 0; --j) {
|
|
||||||
if (types[j] != 'ET')
|
|
||||||
break;
|
|
||||||
types[j] = 'EN';
|
|
||||||
}
|
|
||||||
// do after
|
|
||||||
for (var j = i + 1; j < strLength; --j) {
|
|
||||||
if (types[j] != 'ET')
|
|
||||||
break;
|
|
||||||
types[j] = 'EN';
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
var levels = new Array(strLength);
|
||||||
W6. Otherwise, separators and terminators change to Other Neutral:
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
for (var i = 0; i < strLength; ++i) {
|
||||||
var t = types[i];
|
levels[i] = startLevel;
|
||||||
if (t == 'WS' || t == 'ES' || t == 'ET' || t == 'CS')
|
|
||||||
types[i] = 'ON';
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
W7. Search backwards from each instance of a European number until the first
|
|
||||||
strong type (R, L, or sor) is found. If an L is found, then change the type
|
|
||||||
of the European number to L.
|
|
||||||
*/
|
|
||||||
|
|
||||||
var lastType = sor;
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
var t = types[i];
|
|
||||||
if (t == 'EN')
|
|
||||||
types[i] = (lastType == 'L') ? 'L' : 'EN';
|
|
||||||
else if (t == 'R' || t == 'L')
|
|
||||||
lastType = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
N1. A sequence of neutrals takes the direction of the surrounding strong text
|
|
||||||
if the text on both sides has the same direction. European and Arabic numbers
|
|
||||||
are treated as though they were R. Start-of-level-run (sor) and
|
|
||||||
end-of-level-run (eor) are used at level run boundaries.
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
|
||||||
if (types[i] == 'ON') {
|
|
||||||
var end = findUnequal(types, i + 1, 'ON');
|
|
||||||
var before = sor;
|
|
||||||
if (i > 0)
|
|
||||||
before = types[i - 1];
|
|
||||||
var after = eor;
|
|
||||||
if (end + 1 < strLength)
|
|
||||||
after = types[end + 1];
|
|
||||||
if (before != 'L')
|
|
||||||
before = 'R';
|
|
||||||
if (after != 'L')
|
|
||||||
after = 'R';
|
|
||||||
if (before == after)
|
|
||||||
setValues(types, i, end, before);
|
|
||||||
i = end - 1; // reset to end (-1 so next iteration is ok)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
var diffChars = new Array(strLength);
|
||||||
N2. Any remaining neutrals take the embedding direction.
|
var diffLevels = new Array(strLength);
|
||||||
*/
|
var diffTypes = new Array(strLength);
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
/*
|
||||||
if (types[i] == 'ON')
|
X1-X10: skip most of this, since we are NOT doing the embeddings.
|
||||||
types[i] = e;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
var e = isOdd(startLevel) ? 'R' : 'L';
|
||||||
I1. For all characters with an even (left-to-right) embedding direction,
|
var sor = e;
|
||||||
those of type R go up one level and those of type AN or EN go up two levels.
|
var eor = sor;
|
||||||
I2. For all characters with an odd (right-to-left) embedding direction, those
|
|
||||||
of type L, EN or AN go up one level.
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (var i = 0; i < strLength; ++i) {
|
/*
|
||||||
var t = types[i];
|
W1. Examine each non-spacing mark (NSM) in the level run, and change the
|
||||||
if (isEven(levels[i])) {
|
type of the NSM to the type of the previous character. If the NSM is at the
|
||||||
if (t == 'R') {
|
start of the level run, it will get the type of sor.
|
||||||
levels[i] += 1;
|
*/
|
||||||
} else if (t == 'AN' || t == 'EN') {
|
|
||||||
levels[i] += 2;
|
var lastType = sor;
|
||||||
}
|
for (var i = 0; i < strLength; ++i) {
|
||||||
} else { // isOdd, so
|
if (types[i] == 'NSM')
|
||||||
if (t == 'L' || t == 'AN' || t == 'EN') {
|
types[i] = lastType;
|
||||||
levels[i] += 1;
|
else
|
||||||
}
|
lastType = types[i];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
L1. On each line, reset the embedding level of the following characters to
|
W2. Search backwards from each instance of a European number until the
|
||||||
the paragraph embedding level:
|
first strong type (R, L, AL, or sor) is found. If an AL is found, change
|
||||||
|
the type of the European number to Arabic number.
|
||||||
|
*/
|
||||||
|
|
||||||
segment separators,
|
var lastType = sor;
|
||||||
paragraph separators,
|
for (var i = 0; i < strLength; ++i) {
|
||||||
any sequence of whitespace characters preceding a segment separator or
|
var t = types[i];
|
||||||
paragraph separator, and any sequence of white space characters at the end
|
if (t == 'EN')
|
||||||
of the line.
|
types[i] = (lastType == 'AL') ? 'AN' : 'EN';
|
||||||
*/
|
else if (t == 'R' || t == 'L' || t == 'AL')
|
||||||
|
lastType = t;
|
||||||
|
}
|
||||||
|
|
||||||
// don't bother as text is only single line
|
/*
|
||||||
|
W3. Change all ALs to R.
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
for (var i = 0; i < strLength; ++i) {
|
||||||
L2. From the highest level found in the text to the lowest odd level on each
|
var t = types[i];
|
||||||
line, reverse any contiguous sequence of characters that are at that level or
|
if (t == 'AL')
|
||||||
higher.
|
types[i] = 'R';
|
||||||
*/
|
}
|
||||||
|
|
||||||
// find highest level & lowest odd level
|
/*
|
||||||
|
W4. A single European separator between two European numbers changes to a
|
||||||
|
European number. A single common separator between two numbers of the same
|
||||||
|
type changes to that type:
|
||||||
|
*/
|
||||||
|
|
||||||
var highestLevel = -1;
|
for (var i = 1; i < strLength - 1; ++i) {
|
||||||
var lowestOddLevel = 99;
|
if (types[i] == 'ES' && types[i - 1] == 'EN' && types[i + 1] == 'EN')
|
||||||
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
types[i] = 'EN';
|
||||||
var level = levels[i];
|
if (types[i] == 'CS' && (types[i - 1] == 'EN' || types[i - 1] == 'AN') &&
|
||||||
if (highestLevel < level)
|
types[i + 1] == types[i - 1])
|
||||||
highestLevel = level;
|
types[i] = types[i - 1];
|
||||||
if (lowestOddLevel > level && isOdd(level))
|
}
|
||||||
lowestOddLevel = level;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now reverse between those limits
|
/*
|
||||||
|
W5. A sequence of European terminators adjacent to European numbers changes
|
||||||
|
to all European numbers:
|
||||||
|
*/
|
||||||
|
|
||||||
for (var level = highestLevel; level >= lowestOddLevel; --level) {
|
for (var i = 0; i < strLength; ++i) {
|
||||||
// find segments to reverse
|
if (types[i] == 'EN') {
|
||||||
var start = -1;
|
// do before
|
||||||
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
for (var j = i - 1; j >= 0; --j) {
|
||||||
if (levels[i] < level) {
|
if (types[j] != 'ET')
|
||||||
if (start >= 0) {
|
break;
|
||||||
reverseValues(chars, start, i);
|
types[j] = 'EN';
|
||||||
start = -1;
|
}
|
||||||
|
// do after
|
||||||
|
for (var j = i + 1; j < strLength; --j) {
|
||||||
|
if (types[j] != 'ET')
|
||||||
|
break;
|
||||||
|
types[j] = 'EN';
|
||||||
}
|
}
|
||||||
} else if (start < 0) {
|
|
||||||
start = i;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (start >= 0) {
|
|
||||||
reverseValues(chars, start, levels.length);
|
/*
|
||||||
|
W6. Otherwise, separators and terminators change to Other Neutral:
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (var i = 0; i < strLength; ++i) {
|
||||||
|
var t = types[i];
|
||||||
|
if (t == 'WS' || t == 'ES' || t == 'ET' || t == 'CS')
|
||||||
|
types[i] = 'ON';
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
L3. Combining marks applied to a right-to-left base character will at this
|
W7. Search backwards from each instance of a European number until the
|
||||||
point precede their base character. If the rendering engine expects them to
|
first strong type (R, L, or sor) is found. If an L is found, then change
|
||||||
follow the base characters in the final display process, then the ordering of
|
the type of the European number to L.
|
||||||
the marks and the base character must be reversed.
|
*/
|
||||||
*/
|
|
||||||
|
|
||||||
// don't bother for now
|
var lastType = sor;
|
||||||
|
for (var i = 0; i < strLength; ++i) {
|
||||||
|
var t = types[i];
|
||||||
|
if (t == 'EN')
|
||||||
|
types[i] = (lastType == 'L') ? 'L' : 'EN';
|
||||||
|
else if (t == 'R' || t == 'L')
|
||||||
|
lastType = t;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
L4. A character that possesses the mirrored property as specified by
|
N1. A sequence of neutrals takes the direction of the surrounding strong
|
||||||
Section 4.7, Mirrored, must be depicted by a mirrored glyph if the resolved
|
text if the text on both sides has the same direction. European and Arabic
|
||||||
directionality of that character is R.
|
numbers are treated as though they were R. Start-of-level-run (sor) and
|
||||||
*/
|
end-of-level-run (eor) are used at level run boundaries.
|
||||||
|
*/
|
||||||
|
|
||||||
// don't mirror as characters are already mirrored in the pdf
|
for (var i = 0; i < strLength; ++i) {
|
||||||
|
if (types[i] == 'ON') {
|
||||||
|
var end = findUnequal(types, i + 1, 'ON');
|
||||||
|
var before = sor;
|
||||||
|
if (i > 0)
|
||||||
|
before = types[i - 1];
|
||||||
|
var after = eor;
|
||||||
|
if (end + 1 < strLength)
|
||||||
|
after = types[end + 1];
|
||||||
|
if (before != 'L')
|
||||||
|
before = 'R';
|
||||||
|
if (after != 'L')
|
||||||
|
after = 'R';
|
||||||
|
if (before == after)
|
||||||
|
setValues(types, i, end, before);
|
||||||
|
i = end - 1; // reset to end (-1 so next iteration is ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Finally, return string
|
/*
|
||||||
|
N2. Any remaining neutrals take the embedding direction.
|
||||||
|
*/
|
||||||
|
|
||||||
var result = '';
|
for (var i = 0; i < strLength; ++i) {
|
||||||
for (var i = 0, ii = chars.length; i < ii; ++i) {
|
if (types[i] == 'ON')
|
||||||
var ch = chars[i];
|
types[i] = e;
|
||||||
if (ch != '<' && ch != '>')
|
}
|
||||||
result += ch;
|
|
||||||
}
|
/*
|
||||||
return result;
|
I1. For all characters with an even (left-to-right) embedding direction,
|
||||||
}
|
those of type R go up one level and those of type AN or EN go up two
|
||||||
|
levels.
|
||||||
|
I2. For all characters with an odd (right-to-left) embedding direction,
|
||||||
|
those of type L, EN or AN go up one level.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (var i = 0; i < strLength; ++i) {
|
||||||
|
var t = types[i];
|
||||||
|
if (isEven(levels[i])) {
|
||||||
|
if (t == 'R') {
|
||||||
|
levels[i] += 1;
|
||||||
|
} else if (t == 'AN' || t == 'EN') {
|
||||||
|
levels[i] += 2;
|
||||||
|
}
|
||||||
|
} else { // isOdd, so
|
||||||
|
if (t == 'L' || t == 'AN' || t == 'EN') {
|
||||||
|
levels[i] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
L1. On each line, reset the embedding level of the following characters to
|
||||||
|
the paragraph embedding level:
|
||||||
|
|
||||||
|
segment separators,
|
||||||
|
paragraph separators,
|
||||||
|
any sequence of whitespace characters preceding a segment separator or
|
||||||
|
paragraph separator, and any sequence of white space characters at the end
|
||||||
|
of the line.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// don't bother as text is only single line
|
||||||
|
|
||||||
|
/*
|
||||||
|
L2. From the highest level found in the text to the lowest odd level on
|
||||||
|
each line, reverse any contiguous sequence of characters that are at that
|
||||||
|
level or higher.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// find highest level & lowest odd level
|
||||||
|
|
||||||
|
var highestLevel = -1;
|
||||||
|
var lowestOddLevel = 99;
|
||||||
|
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
||||||
|
var level = levels[i];
|
||||||
|
if (highestLevel < level)
|
||||||
|
highestLevel = level;
|
||||||
|
if (lowestOddLevel > level && isOdd(level))
|
||||||
|
lowestOddLevel = level;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now reverse between those limits
|
||||||
|
|
||||||
|
for (var level = highestLevel; level >= lowestOddLevel; --level) {
|
||||||
|
// find segments to reverse
|
||||||
|
var start = -1;
|
||||||
|
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
||||||
|
if (levels[i] < level) {
|
||||||
|
if (start >= 0) {
|
||||||
|
reverseValues(chars, start, i);
|
||||||
|
start = -1;
|
||||||
|
}
|
||||||
|
} else if (start < 0) {
|
||||||
|
start = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (start >= 0) {
|
||||||
|
reverseValues(chars, start, levels.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
L3. Combining marks applied to a right-to-left base character will at this
|
||||||
|
point precede their base character. If the rendering engine expects them to
|
||||||
|
follow the base characters in the final display process, then the ordering
|
||||||
|
of the marks and the base character must be reversed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// don't bother for now
|
||||||
|
|
||||||
|
/*
|
||||||
|
L4. A character that possesses the mirrored property as specified by
|
||||||
|
Section 4.7, Mirrored, must be depicted by a mirrored glyph if the resolved
|
||||||
|
directionality of that character is R.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// don't mirror as characters are already mirrored in the pdf
|
||||||
|
|
||||||
|
// Finally, return string
|
||||||
|
|
||||||
|
var result = '';
|
||||||
|
for (var i = 0, ii = chars.length; i < ii; ++i) {
|
||||||
|
var ch = chars[i];
|
||||||
|
if (ch != '<' && ch != '>')
|
||||||
|
result += ch;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
});
|
||||||
|
})();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user