Merge pull request #1218 from mozilla/bidi
bidi characters algorithm; separation of the toFontChar and toUnicode
This commit is contained in:
commit
676e575a52
1
Makefile
1
Makefile
@ -38,6 +38,7 @@ PDF_JS_FILES = \
|
||||
worker.js \
|
||||
../external/jpgjs/jpg.js \
|
||||
jpx.js \
|
||||
bidi.js \
|
||||
$(NULL)
|
||||
|
||||
# make server
|
||||
|
433
src/bidi.js
Normal file
433
src/bidi.js
Normal file
@ -0,0 +1,433 @@
|
||||
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
|
||||
|
||||
'use strict';
|
||||
|
||||
var bidi = (function bidiClosure() {
|
||||
// Character types for symbols from 0000 to 00FF.
|
||||
var baseTypes = [
|
||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'S', 'B', 'S', 'WS',
|
||||
'B', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||
'BN', 'BN', 'B', 'B', 'B', 'S', 'WS', 'ON', 'ON', 'ET', 'ET', 'ET', 'ON',
|
||||
'ON', 'ON', 'ON', 'ON', 'ON', 'CS', 'ON', 'CS', 'ON', 'EN', 'EN', 'EN',
|
||||
'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'ON', 'ON', 'ON', 'ON', 'ON',
|
||||
'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'ON',
|
||||
'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'ON', 'ON', 'ON', 'ON', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'B', 'BN',
|
||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||
'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN',
|
||||
'BN', 'CS', 'ON', 'ET', 'ET', 'ET', 'ET', 'ON', 'ON', 'ON', 'ON', 'L', 'ON',
|
||||
'ON', 'ON', 'ON', 'ON', 'ET', 'ET', 'EN', 'EN', 'ON', 'L', 'ON', 'ON', 'ON',
|
||||
'EN', 'L', 'ON', 'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
|
||||
'L', 'L', 'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L'
|
||||
];
|
||||
|
||||
// Character types for symbols from 0600 to 06FF
|
||||
var arabicTypes = [
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'CS', 'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
||||
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN',
|
||||
'AN', 'ET', 'AN', 'AN', 'AL', 'AL', 'AL', 'NSM', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM',
|
||||
'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'ON', 'NSM',
|
||||
'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL',
|
||||
'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL'
|
||||
];
|
||||
|
||||
function isOdd(i) {
|
||||
return (i & 1) != 0;
|
||||
}
|
||||
|
||||
function isEven(i) {
|
||||
return (i & 1) == 0;
|
||||
}
|
||||
|
||||
function findUnequal(arr, start, value) {
|
||||
var j;
|
||||
for (var j = start, jj = arr.length; j < jj; ++j) {
|
||||
if (arr[j] != value)
|
||||
return j;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
function setValues(arr, start, end, value) {
|
||||
for (var j = start; j < end; ++j) {
|
||||
arr[j] = value;
|
||||
}
|
||||
}
|
||||
|
||||
function reverseValues(arr, start, end) {
|
||||
for (var i = start, j = end - 1; i < j; ++i, --j) {
|
||||
var temp = arr[i];
|
||||
arr[i] = arr[j];
|
||||
arr[j] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
function mirrorGlyphs(c) {
|
||||
/*
|
||||
# BidiMirroring-1.txt
|
||||
0028; 0029 # LEFT PARENTHESIS
|
||||
0029; 0028 # RIGHT PARENTHESIS
|
||||
003C; 003E # LESS-THAN SIGN
|
||||
003E; 003C # GREATER-THAN SIGN
|
||||
005B; 005D # LEFT SQUARE BRACKET
|
||||
005D; 005B # RIGHT SQUARE BRACKET
|
||||
007B; 007D # LEFT CURLY BRACKET
|
||||
007D; 007B # RIGHT CURLY BRACKET
|
||||
00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
*/
|
||||
switch (c) {
|
||||
case '(':
|
||||
return ')';
|
||||
case ')':
|
||||
return '(';
|
||||
case '<':
|
||||
return '>';
|
||||
case '>':
|
||||
return '<';
|
||||
case ']':
|
||||
return '[';
|
||||
case '[':
|
||||
return ']';
|
||||
case '}':
|
||||
return '{';
|
||||
case '{':
|
||||
return '}';
|
||||
case '\u00AB':
|
||||
return '\u00BB';
|
||||
case '\u00BB':
|
||||
return '\u00AB';
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return (function bidi(text, startLevel) {
|
||||
var str = text.str;
|
||||
var strLength = str.length;
|
||||
if (strLength == 0)
|
||||
return str;
|
||||
|
||||
// get types, fill arrays
|
||||
|
||||
var chars = new Array(strLength);
|
||||
var types = new Array(strLength);
|
||||
var oldtypes = new Array(strLength);
|
||||
var numBidi = 0;
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
chars[i] = str.charAt(i);
|
||||
|
||||
var charCode = str.charCodeAt(i);
|
||||
var charType = 'L';
|
||||
if (charCode <= 0x00ff)
|
||||
charType = baseTypes[charCode];
|
||||
else if (0x0590 <= charCode && charCode <= 0x05f4)
|
||||
charType = 'R';
|
||||
else if (0x0600 <= charCode && charCode <= 0x06ff)
|
||||
charType = arabicTypes[charCode & 0xff];
|
||||
else if (0x0700 <= charCode && charCode <= 0x08AC)
|
||||
charType = 'AL';
|
||||
|
||||
if (charType == 'R' || charType == 'AL' || charType == 'AN')
|
||||
numBidi++;
|
||||
|
||||
oldtypes[i] = types[i] = charType;
|
||||
}
|
||||
|
||||
// detect the bidi method
|
||||
// if there are no rtl characters then no bidi needed
|
||||
// if less than 30% chars are rtl then string is primarily ltr
|
||||
// if more than 30% chars are rtl then string is primarily rtl
|
||||
if (numBidi == 0) {
|
||||
text.direction = 'ltr';
|
||||
return str;
|
||||
}
|
||||
|
||||
if (startLevel == -1) {
|
||||
if ((strLength / numBidi) < 0.3) {
|
||||
text.direction = 'ltr';
|
||||
startLevel = 0;
|
||||
} else {
|
||||
text.direction = 'rtl';
|
||||
startLevel = 1;
|
||||
}
|
||||
}
|
||||
|
||||
var levels = new Array(strLength);
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
levels[i] = startLevel;
|
||||
}
|
||||
|
||||
var diffChars = new Array(strLength);
|
||||
var diffLevels = new Array(strLength);
|
||||
var diffTypes = new Array(strLength);
|
||||
|
||||
/*
|
||||
X1-X10: skip most of this, since we are NOT doing the embeddings.
|
||||
*/
|
||||
|
||||
var e = isOdd(startLevel) ? 'R' : 'L';
|
||||
var sor = e;
|
||||
var eor = sor;
|
||||
|
||||
/*
|
||||
W1. Examine each non-spacing mark (NSM) in the level run, and change the
|
||||
type of the NSM to the type of the previous character. If the NSM is at the
|
||||
start of the level run, it will get the type of sor.
|
||||
*/
|
||||
|
||||
var lastType = sor;
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
if (types[i] == 'NSM')
|
||||
types[i] = lastType;
|
||||
else
|
||||
lastType = types[i];
|
||||
}
|
||||
|
||||
/*
|
||||
W2. Search backwards from each instance of a European number until the
|
||||
first strong type (R, L, AL, or sor) is found. If an AL is found, change
|
||||
the type of the European number to Arabic number.
|
||||
*/
|
||||
|
||||
var lastType = sor;
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
var t = types[i];
|
||||
if (t == 'EN')
|
||||
types[i] = (lastType == 'AL') ? 'AN' : 'EN';
|
||||
else if (t == 'R' || t == 'L' || t == 'AL')
|
||||
lastType = t;
|
||||
}
|
||||
|
||||
/*
|
||||
W3. Change all ALs to R.
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
var t = types[i];
|
||||
if (t == 'AL')
|
||||
types[i] = 'R';
|
||||
}
|
||||
|
||||
/*
|
||||
W4. A single European separator between two European numbers changes to a
|
||||
European number. A single common separator between two numbers of the same
|
||||
type changes to that type:
|
||||
*/
|
||||
|
||||
for (var i = 1; i < strLength - 1; ++i) {
|
||||
if (types[i] == 'ES' && types[i - 1] == 'EN' && types[i + 1] == 'EN')
|
||||
types[i] = 'EN';
|
||||
if (types[i] == 'CS' && (types[i - 1] == 'EN' || types[i - 1] == 'AN') &&
|
||||
types[i + 1] == types[i - 1])
|
||||
types[i] = types[i - 1];
|
||||
}
|
||||
|
||||
/*
|
||||
W5. A sequence of European terminators adjacent to European numbers changes
|
||||
to all European numbers:
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
if (types[i] == 'EN') {
|
||||
// do before
|
||||
for (var j = i - 1; j >= 0; --j) {
|
||||
if (types[j] != 'ET')
|
||||
break;
|
||||
types[j] = 'EN';
|
||||
}
|
||||
// do after
|
||||
for (var j = i + 1; j < strLength; --j) {
|
||||
if (types[j] != 'ET')
|
||||
break;
|
||||
types[j] = 'EN';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
W6. Otherwise, separators and terminators change to Other Neutral:
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
var t = types[i];
|
||||
if (t == 'WS' || t == 'ES' || t == 'ET' || t == 'CS')
|
||||
types[i] = 'ON';
|
||||
}
|
||||
|
||||
/*
|
||||
W7. Search backwards from each instance of a European number until the
|
||||
first strong type (R, L, or sor) is found. If an L is found, then change
|
||||
the type of the European number to L.
|
||||
*/
|
||||
|
||||
var lastType = sor;
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
var t = types[i];
|
||||
if (t == 'EN')
|
||||
types[i] = (lastType == 'L') ? 'L' : 'EN';
|
||||
else if (t == 'R' || t == 'L')
|
||||
lastType = t;
|
||||
}
|
||||
|
||||
/*
|
||||
N1. A sequence of neutrals takes the direction of the surrounding strong
|
||||
text if the text on both sides has the same direction. European and Arabic
|
||||
numbers are treated as though they were R. Start-of-level-run (sor) and
|
||||
end-of-level-run (eor) are used at level run boundaries.
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
if (types[i] == 'ON') {
|
||||
var end = findUnequal(types, i + 1, 'ON');
|
||||
var before = sor;
|
||||
if (i > 0)
|
||||
before = types[i - 1];
|
||||
var after = eor;
|
||||
if (end + 1 < strLength)
|
||||
after = types[end + 1];
|
||||
if (before != 'L')
|
||||
before = 'R';
|
||||
if (after != 'L')
|
||||
after = 'R';
|
||||
if (before == after)
|
||||
setValues(types, i, end, before);
|
||||
i = end - 1; // reset to end (-1 so next iteration is ok)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
N2. Any remaining neutrals take the embedding direction.
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
if (types[i] == 'ON')
|
||||
types[i] = e;
|
||||
}
|
||||
|
||||
/*
|
||||
I1. For all characters with an even (left-to-right) embedding direction,
|
||||
those of type R go up one level and those of type AN or EN go up two
|
||||
levels.
|
||||
I2. For all characters with an odd (right-to-left) embedding direction,
|
||||
those of type L, EN or AN go up one level.
|
||||
*/
|
||||
|
||||
for (var i = 0; i < strLength; ++i) {
|
||||
var t = types[i];
|
||||
if (isEven(levels[i])) {
|
||||
if (t == 'R') {
|
||||
levels[i] += 1;
|
||||
} else if (t == 'AN' || t == 'EN') {
|
||||
levels[i] += 2;
|
||||
}
|
||||
} else { // isOdd, so
|
||||
if (t == 'L' || t == 'AN' || t == 'EN') {
|
||||
levels[i] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
L1. On each line, reset the embedding level of the following characters to
|
||||
the paragraph embedding level:
|
||||
|
||||
segment separators,
|
||||
paragraph separators,
|
||||
any sequence of whitespace characters preceding a segment separator or
|
||||
paragraph separator, and any sequence of white space characters at the end
|
||||
of the line.
|
||||
*/
|
||||
|
||||
// don't bother as text is only single line
|
||||
|
||||
/*
|
||||
L2. From the highest level found in the text to the lowest odd level on
|
||||
each line, reverse any contiguous sequence of characters that are at that
|
||||
level or higher.
|
||||
*/
|
||||
|
||||
// find highest level & lowest odd level
|
||||
|
||||
var highestLevel = -1;
|
||||
var lowestOddLevel = 99;
|
||||
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
||||
var level = levels[i];
|
||||
if (highestLevel < level)
|
||||
highestLevel = level;
|
||||
if (lowestOddLevel > level && isOdd(level))
|
||||
lowestOddLevel = level;
|
||||
}
|
||||
|
||||
// now reverse between those limits
|
||||
|
||||
for (var level = highestLevel; level >= lowestOddLevel; --level) {
|
||||
// find segments to reverse
|
||||
var start = -1;
|
||||
for (var i = 0, ii = levels.length; i < ii; ++i) {
|
||||
if (levels[i] < level) {
|
||||
if (start >= 0) {
|
||||
reverseValues(chars, start, i);
|
||||
start = -1;
|
||||
}
|
||||
} else if (start < 0) {
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
if (start >= 0) {
|
||||
reverseValues(chars, start, levels.length);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
L3. Combining marks applied to a right-to-left base character will at this
|
||||
point precede their base character. If the rendering engine expects them to
|
||||
follow the base characters in the final display process, then the ordering
|
||||
of the marks and the base character must be reversed.
|
||||
*/
|
||||
|
||||
// don't bother for now
|
||||
|
||||
/*
|
||||
L4. A character that possesses the mirrored property as specified by
|
||||
Section 4.7, Mirrored, must be depicted by a mirrored glyph if the resolved
|
||||
directionality of that character is R.
|
||||
*/
|
||||
|
||||
// don't mirror as characters are already mirrored in the pdf
|
||||
|
||||
// Finally, return string
|
||||
|
||||
var result = '';
|
||||
for (var i = 0, ii = chars.length; i < ii; ++i) {
|
||||
var ch = chars[i];
|
||||
if (ch != '<' && ch != '>')
|
||||
result += ch;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
})();
|
@ -778,8 +778,16 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
|
||||
|
||||
x += charWidth;
|
||||
|
||||
text.str += glyph.unicode === ' ' ? '\u00A0' : glyph.unicode;
|
||||
text.length++;
|
||||
var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode;
|
||||
var glyphUnicodeLength = glyphUnicode.length;
|
||||
//reverse an arabic ligature
|
||||
if (glyphUnicodeLength > 1 &&
|
||||
isRTLRangeFor(glyphUnicode.charCodeAt(0))) {
|
||||
for (var ii = glyphUnicodeLength - 1; ii >= 0; ii--)
|
||||
text.str += glyphUnicode[ii];
|
||||
} else
|
||||
text.str += glyphUnicode;
|
||||
text.length += glyphUnicodeLength;
|
||||
text.canvasWidth += charWidth;
|
||||
}
|
||||
current.x += x * textHScale2;
|
||||
@ -845,7 +853,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
|
||||
text.str += shownText.str;
|
||||
}
|
||||
text.canvasWidth += shownText.canvasWidth;
|
||||
text.length += e.length;
|
||||
text.length += shownText.length;
|
||||
}
|
||||
} else {
|
||||
malformed('TJ array element ' + e + ' is not string or num');
|
||||
|
@ -620,8 +620,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
} else {
|
||||
// parsing hex UTF-16BE numbers
|
||||
var str = [];
|
||||
for (var i = 0, ii = token.length; i < ii; i += 4)
|
||||
str.push(parseInt(token.substr(i, 4), 16));
|
||||
for (var k = 0, kk = token.length; k < kk; k += 4) {
|
||||
var b = parseInt(token.substr(k, 4), 16);
|
||||
if (b <= 0x10) {
|
||||
k += 4;
|
||||
b = (b << 16) | parseInt(token.substr(k, 4), 16);
|
||||
b -= 0x10000;
|
||||
str.push(0xD800 | (b >> 10));
|
||||
str.push(0xDC00 | (b & 0x3FF));
|
||||
break;
|
||||
}
|
||||
str.push(b);
|
||||
}
|
||||
tokens.push(String.fromCharCode.apply(String, str));
|
||||
token = '';
|
||||
}
|
||||
|
93
src/fonts.js
93
src/fonts.js
@ -736,6 +736,16 @@ function getUnicodeRangeFor(value) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
function isRTLRangeFor(value) {
|
||||
var range = UnicodeRanges[13];
|
||||
if (value >= range.begin && value < range.end)
|
||||
return true;
|
||||
range = UnicodeRanges[11];
|
||||
if (value >= range.begin && value < range.end)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function isSpecialUnicode(unicode) {
|
||||
return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ||
|
||||
(unicode >= kCmapGlyphOffset &&
|
||||
@ -796,6 +806,8 @@ var Font = (function FontClosure() {
|
||||
else
|
||||
this.rebuildToUnicode(properties);
|
||||
|
||||
this.toFontChar = this.buildToFontChar(this.toUnicode);
|
||||
|
||||
if (!file) {
|
||||
// The file data is not specified. Trying to fix the font name
|
||||
// to be used with the canvas.font.
|
||||
@ -1817,9 +1829,9 @@ var Font = (function FontClosure() {
|
||||
var unassignedUnicodeItems = [];
|
||||
for (var i = 1; i < numGlyphs; i++) {
|
||||
var cid = gidToCidMap[i] || i;
|
||||
var unicode = this.toUnicode[cid];
|
||||
if (!unicode || isSpecialUnicode(unicode) ||
|
||||
unicode in usedUnicodes) {
|
||||
var unicode = this.toFontChar[cid];
|
||||
if (!unicode || typeof unicode !== 'number' ||
|
||||
isSpecialUnicode(unicode) || unicode in usedUnicodes) {
|
||||
unassignedUnicodeItems.push(i);
|
||||
continue;
|
||||
}
|
||||
@ -1838,7 +1850,7 @@ var Font = (function FontClosure() {
|
||||
if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea)
|
||||
break;
|
||||
var unicode = unusedUnicode++;
|
||||
this.toUnicode[cid] = unicode;
|
||||
this.toFontChar[cid] = unicode;
|
||||
usedUnicodes[unicode] = true;
|
||||
glyphs.push({ unicode: unicode, code: cid });
|
||||
ids.push(i);
|
||||
@ -1849,9 +1861,9 @@ var Font = (function FontClosure() {
|
||||
var glyphs = cmapTable.glyphs;
|
||||
var ids = cmapTable.ids;
|
||||
var hasShortCmap = !!cmapTable.hasShortCmap;
|
||||
var toUnicode = this.toUnicode;
|
||||
var toFontChar = this.toFontChar;
|
||||
|
||||
if (toUnicode && toUnicode.length > 0) {
|
||||
if (toFontChar && toFontChar.length > 0) {
|
||||
// checking if cmap is just identity map
|
||||
var isIdentity = true;
|
||||
for (var i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
@ -1864,8 +1876,9 @@ var Font = (function FontClosure() {
|
||||
if (isIdentity && !this.isSymbolicFont) {
|
||||
var usedUnicodes = [], unassignedUnicodeItems = [];
|
||||
for (var i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
var unicode = toUnicode[i + 1];
|
||||
if (!unicode || unicode in usedUnicodes) {
|
||||
var unicode = toFontChar[i + 1];
|
||||
if (!unicode || typeof unicode !== 'number' ||
|
||||
unicode in usedUnicodes) {
|
||||
unassignedUnicodeItems.push(i);
|
||||
continue;
|
||||
}
|
||||
@ -1879,11 +1892,11 @@ var Font = (function FontClosure() {
|
||||
unusedUnicode++;
|
||||
var cid = i + 1;
|
||||
// override only if unicode mapping is not specified
|
||||
if (!(cid in toUnicode))
|
||||
toUnicode[cid] = unusedUnicode;
|
||||
if (!(cid in toFontChar))
|
||||
toFontChar[cid] = unusedUnicode;
|
||||
glyphs[i].unicode = unusedUnicode++;
|
||||
}
|
||||
this.useToUnicode = true;
|
||||
this.useToFontChar = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1914,13 +1927,13 @@ var Font = (function FontClosure() {
|
||||
}
|
||||
|
||||
// Moving all symbolic font glyphs into 0xF000 - 0xF0FF range.
|
||||
this.symbolicGlyphsOffset = 0;
|
||||
if (this.isSymbolicFont) {
|
||||
for (var i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
var code = glyphs[i].unicode;
|
||||
glyphs[i].unicode = kSymbolicFontGlyphOffset | (code & 0xFF);
|
||||
var code = glyphs[i].unicode & 0xFF;
|
||||
var fontCharCode = kSymbolicFontGlyphOffset | code;
|
||||
glyphs[i].unicode = toFontChar[code] = fontCharCode;
|
||||
}
|
||||
this.symbolicGlyphsOffset = kSymbolicFontGlyphOffset;
|
||||
this.useToFontChar = true;
|
||||
}
|
||||
|
||||
// remove glyph references outside range of avaialable glyphs
|
||||
@ -2023,12 +2036,12 @@ var Font = (function FontClosure() {
|
||||
properties.baseEncoding = encoding;
|
||||
}
|
||||
if (properties.subtype == 'CIDFontType0C') {
|
||||
var toUnicode = [];
|
||||
var toFontChar = [];
|
||||
for (var i = 0; i < charstrings.length; ++i) {
|
||||
var charstring = charstrings[i];
|
||||
toUnicode[charstring.code] = charstring.unicode;
|
||||
toFontChar[charstring.code] = charstring.unicode;
|
||||
}
|
||||
this.toUnicode = toUnicode;
|
||||
this.toFontChar = toFontChar;
|
||||
}
|
||||
|
||||
var fields = {
|
||||
@ -2123,6 +2136,19 @@ var Font = (function FontClosure() {
|
||||
return stringToArray(otf.file);
|
||||
},
|
||||
|
||||
buildToFontChar: function font_buildToFontChar(toUnicode) {
|
||||
var result = [];
|
||||
var unusedUnicode = kCmapGlyphOffset;
|
||||
for (var i = 0, ii = toUnicode.length; i < ii; i++) {
|
||||
var unicode = toUnicode[i];
|
||||
var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ :
|
||||
unicode;
|
||||
if (typeof unicode !== 'undefined')
|
||||
result[i] = fontCharCode;
|
||||
}
|
||||
return result;
|
||||
},
|
||||
|
||||
rebuildToUnicode: function font_rebuildToUnicode(properties) {
|
||||
var firstChar = properties.firstChar, lastChar = properties.lastChar;
|
||||
var map = [];
|
||||
@ -2258,7 +2284,7 @@ var Font = (function FontClosure() {
|
||||
},
|
||||
|
||||
charToGlyph: function fonts_charToGlyph(charcode) {
|
||||
var unicode, width, codeIRQueue;
|
||||
var fontCharCode, width, codeIRQueue;
|
||||
|
||||
var width = this.widths[charcode];
|
||||
|
||||
@ -2266,38 +2292,39 @@ var Font = (function FontClosure() {
|
||||
case 'CIDFontType0':
|
||||
if (this.noUnicodeAdaptation) {
|
||||
width = this.widths[this.unicodeToCID[charcode] || charcode];
|
||||
unicode = mapPrivateUseChars(charcode);
|
||||
fontCharCode = mapPrivateUseChars(charcode);
|
||||
break;
|
||||
}
|
||||
unicode = this.toUnicode[charcode] || charcode;
|
||||
fontCharCode = this.toFontChar[charcode] || charcode;
|
||||
break;
|
||||
case 'CIDFontType2':
|
||||
if (this.noUnicodeAdaptation) {
|
||||
width = this.widths[this.unicodeToCID[charcode] || charcode];
|
||||
unicode = mapPrivateUseChars(charcode);
|
||||
fontCharCode = mapPrivateUseChars(charcode);
|
||||
break;
|
||||
}
|
||||
unicode = this.toUnicode[charcode] || charcode;
|
||||
fontCharCode = this.toFontChar[charcode] || charcode;
|
||||
break;
|
||||
case 'Type1':
|
||||
var glyphName = this.differences[charcode] || this.encoding[charcode];
|
||||
if (!isNum(width))
|
||||
width = this.widths[glyphName];
|
||||
if (this.noUnicodeAdaptation) {
|
||||
unicode = mapPrivateUseChars(GlyphsUnicode[glyphName] || charcode);
|
||||
fontCharCode = mapPrivateUseChars(GlyphsUnicode[glyphName] ||
|
||||
charcode);
|
||||
break;
|
||||
}
|
||||
unicode = this.glyphNameMap[glyphName] ||
|
||||
fontCharCode = this.glyphNameMap[glyphName] ||
|
||||
GlyphsUnicode[glyphName] || charcode;
|
||||
break;
|
||||
case 'Type3':
|
||||
var glyphName = this.differences[charcode] || this.encoding[charcode];
|
||||
codeIRQueue = this.charProcIRQueues[glyphName];
|
||||
unicode = charcode;
|
||||
fontCharCode = charcode;
|
||||
break;
|
||||
case 'TrueType':
|
||||
if (this.useToUnicode) {
|
||||
unicode = this.toUnicode[charcode] || charcode;
|
||||
if (this.useToFontChar) {
|
||||
fontCharCode = this.toFontChar[charcode] || charcode;
|
||||
break;
|
||||
}
|
||||
var glyphName = this.differences[charcode] || this.encoding[charcode];
|
||||
@ -2306,17 +2333,17 @@ var Font = (function FontClosure() {
|
||||
if (!isNum(width))
|
||||
width = this.widths[glyphName];
|
||||
if (this.noUnicodeAdaptation) {
|
||||
unicode = GlyphsUnicode[glyphName] || charcode;
|
||||
fontCharCode = GlyphsUnicode[glyphName] || charcode;
|
||||
break;
|
||||
}
|
||||
if (!this.hasEncoding || this.isSymbolicFont) {
|
||||
unicode = this.useToUnicode ? this.toUnicode[charcode] :
|
||||
(this.symbolicGlyphsOffset + charcode);
|
||||
fontCharCode = this.useToFontChar ? this.toFontChar[charcode] :
|
||||
charcode;
|
||||
break;
|
||||
}
|
||||
|
||||
// MacRoman encoding address by re-encoding the cmap table
|
||||
unicode = glyphName in this.glyphNameMap ?
|
||||
fontCharCode = glyphName in this.glyphNameMap ?
|
||||
this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName];
|
||||
break;
|
||||
default:
|
||||
@ -2332,7 +2359,7 @@ var Font = (function FontClosure() {
|
||||
width = (isNum(width) ? width : this.defaultWidth) * this.widthMultiplier;
|
||||
|
||||
return {
|
||||
fontChar: String.fromCharCode(unicode),
|
||||
fontChar: String.fromCharCode(fontCharCode),
|
||||
unicode: unicodeChars,
|
||||
width: width,
|
||||
codeIRQueue: codeIRQueue
|
||||
|
51
src/util.js
51
src/util.js
@ -174,6 +174,57 @@ var Util = (function UtilClosure() {
|
||||
return Util;
|
||||
})();
|
||||
|
||||
// optimised CSS custom property getter/setter
|
||||
var CustomStyle = (function CustomStyleClosure() {
|
||||
|
||||
// As noted on: http://www.zachstronaut.com/posts/2009/02/17/
|
||||
// animate-css-transforms-firefox-webkit.html
|
||||
// in some versions of IE9 it is critical that ms appear in this list
|
||||
// before Moz
|
||||
var prefixes = ['ms', 'Moz', 'Webkit', 'O'];
|
||||
var _cache = { };
|
||||
|
||||
function CustomStyle() {
|
||||
}
|
||||
|
||||
CustomStyle.getProp = function get(propName, element) {
|
||||
// check cache only when no element is given
|
||||
if (arguments.length == 1 && typeof _cache[propName] == 'string') {
|
||||
return _cache[propName];
|
||||
}
|
||||
|
||||
element = element || document.documentElement;
|
||||
var style = element.style, prefixed, uPropName;
|
||||
|
||||
// test standard property first
|
||||
if (typeof style[propName] == 'string') {
|
||||
return (_cache[propName] = propName);
|
||||
}
|
||||
|
||||
// capitalize
|
||||
uPropName = propName.charAt(0).toUpperCase() + propName.slice(1);
|
||||
|
||||
// test vendor specific properties
|
||||
for (var i = 0, l = prefixes.length; i < l; i++) {
|
||||
prefixed = prefixes[i] + uPropName;
|
||||
if (typeof style[prefixed] == 'string') {
|
||||
return (_cache[propName] = prefixed);
|
||||
}
|
||||
}
|
||||
|
||||
//if all fails then set to undefined
|
||||
return (_cache[propName] = 'undefined');
|
||||
}
|
||||
|
||||
CustomStyle.setProp = function set(propName, element, str) {
|
||||
var prop = this.getProp(propName);
|
||||
if (prop != 'undefined')
|
||||
element.style[prop] = str;
|
||||
}
|
||||
|
||||
return CustomStyle;
|
||||
})();
|
||||
|
||||
var PDFStringTranslateTable = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0x2D8, 0x2C7, 0x2C6, 0x2D9, 0x2DD, 0x2DB, 0x2DA, 0x2DC, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -24,7 +24,8 @@ var files = [
|
||||
'stream.js',
|
||||
'worker.js',
|
||||
'../external/jpgjs/jpg.js',
|
||||
'jpx.js'
|
||||
'jpx.js',
|
||||
'bidi.js'
|
||||
];
|
||||
|
||||
// Load all the files.
|
||||
|
@ -23,6 +23,7 @@
|
||||
<script type="text/javascript" src="/src/worker.js"></script>
|
||||
<script type="text/javascript" src="/external/jpgjs/jpg.js"></script>
|
||||
<script type="text/javascript" src="/src/jpx.js"></script>
|
||||
<script type="text/javascript" src="/src/bidi.js"></script>
|
||||
<script type="text/javascript" src="driver.js"></script>
|
||||
|
||||
<script type="text/javascript">
|
||||
|
@ -22,6 +22,7 @@ load:
|
||||
- ../../src/pattern.js
|
||||
- ../../src/stream.js
|
||||
- ../../src/worker.js
|
||||
- ../../src/bidi.js
|
||||
- ../../external/jpgjs/jpg.js
|
||||
- ../unit/obj_spec.js
|
||||
- ../unit/function_spec.js
|
||||
|
@ -29,6 +29,7 @@
|
||||
<script type="text/javascript" src="../src/worker.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../external/jpgjs/jpg.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/jpx.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="../src/bidi.js"></script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript">PDFJS.workerSrc = '../src/worker_loader.js';</script> <!-- PDFJSSCRIPT_REMOVE_CORE -->
|
||||
<script type="text/javascript" src="debugger.js"></script>
|
||||
<script type="text/javascript" src="viewer.js"></script>
|
||||
|
@ -1051,12 +1051,13 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
|
||||
textLayerDiv.appendChild(textDiv);
|
||||
|
||||
if (textDiv.dataset.textLength > 1) { // avoid div by zero
|
||||
// Adjust div width (via letterSpacing) to match canvas text
|
||||
// Adjust div width to match canvas text
|
||||
// Due to the .offsetWidth calls, this is slow
|
||||
// This needs to come after appending to the DOM
|
||||
textDiv.style.letterSpacing =
|
||||
((textDiv.dataset.canvasWidth - textDiv.offsetWidth) /
|
||||
(textDiv.dataset.textLength - 1)) + 'px';
|
||||
var textScale = textDiv.dataset.canvasWidth / textDiv.offsetWidth;
|
||||
CustomStyle.setProp('transform' , textDiv,
|
||||
'scale(' + textScale + ', 1)');
|
||||
CustomStyle.setProp('transformOrigin' , textDiv, '0% 0%');
|
||||
}
|
||||
} // textLength > 0
|
||||
}
|
||||
@ -1096,7 +1097,8 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
|
||||
textDiv.style.fontSize = fontHeight + 'px';
|
||||
textDiv.style.left = text.geom.x + 'px';
|
||||
textDiv.style.top = (text.geom.y - fontHeight) + 'px';
|
||||
textDiv.textContent = text.str;
|
||||
textDiv.textContent = bidi(text, -1);
|
||||
textDiv.dir = text.direction;
|
||||
textDiv.dataset.textLength = text.length;
|
||||
this.textDivs.push(textDiv);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user