diff --git a/Makefile b/Makefile index 7688025a5..b11b015b6 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,7 @@ PDF_JS_FILES = \ worker.js \ ../external/jpgjs/jpg.js \ jpx.js \ + bidi.js \ $(NULL) # make server diff --git a/src/bidi.js b/src/bidi.js new file mode 100644 index 000000000..a3308fa2c --- /dev/null +++ b/src/bidi.js @@ -0,0 +1,433 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ + +'use strict'; + +var bidi = (function bidiClosure() { + // Character types for symbols from 0000 to 00FF. + var baseTypes = [ + 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'S', 'B', 'S', 'WS', + 'B', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', + 'BN', 'BN', 'B', 'B', 'B', 'S', 'WS', 'ON', 'ON', 'ET', 'ET', 'ET', 'ON', + 'ON', 'ON', 'ON', 'ON', 'ON', 'CS', 'ON', 'CS', 'ON', 'EN', 'EN', 'EN', + 'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'EN', 'ON', 'ON', 'ON', 'ON', 'ON', + 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'ON', 'ON', + 'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'ON', 'ON', 'ON', 'ON', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'B', 'BN', + 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', + 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', 'BN', + 'BN', 'CS', 'ON', 'ET', 'ET', 'ET', 'ET', 'ON', 'ON', 'ON', 'ON', 'L', 'ON', + 'ON', 'ON', 'ON', 'ON', 'ET', 'ET', 'EN', 'EN', 'ON', 'L', 'ON', 'ON', 'ON', + 'EN', 'L', 'ON', 'ON', 'ON', 'ON', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', + 'L', 'L', 'L', 'ON', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L' + ]; + + // Character types for symbols from 0600 to 06FF + var arabicTypes = [ + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'CS', 'AL', 'ON', 'ON', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', + 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', 'AN', + 'AN', 'ET', 'AN', 'AN', 'AL', 'AL', 'AL', 'NSM', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', + 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'NSM', 'ON', 'NSM', + 'NSM', 'NSM', 'NSM', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', + 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL', 'AL' + ]; + + function isOdd(i) { + return (i & 1) != 0; + } + + function isEven(i) { + return (i & 1) == 0; + } + + function findUnequal(arr, start, value) { + var j; + for (var j = start, jj = arr.length; j < jj; ++j) { + if (arr[j] != value) + return j; + } + return j; + } + + function setValues(arr, start, end, value) { + for (var j = start; j < end; ++j) { + arr[j] = value; + } + } + + function reverseValues(arr, start, end) { + for (var i = start, j = end - 1; i < j; ++i, --j) { + var temp = arr[i]; + arr[i] = arr[j]; + arr[j] = temp; + } + } + + function mirrorGlyphs(c) { + /* + # BidiMirroring-1.txt + 0028; 0029 # LEFT PARENTHESIS + 0029; 0028 # RIGHT PARENTHESIS + 003C; 003E # LESS-THAN SIGN + 003E; 003C # GREATER-THAN SIGN + 005B; 005D # LEFT SQUARE BRACKET + 005D; 005B # RIGHT SQUARE BRACKET + 007B; 007D # LEFT CURLY BRACKET + 007D; 007B # RIGHT CURLY BRACKET + 00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + */ + switch (c) { + case '(': + return ')'; + case ')': + return '('; + case '<': + return '>'; + case '>': + return '<'; + case ']': + return '['; + case '[': + return ']'; + case '}': + return '{'; + case '{': + return '}'; + case '\u00AB': + return '\u00BB'; + case '\u00BB': + return '\u00AB'; + default: + return c; + } + } + + return (function bidi(text, startLevel) { + var str = text.str; + var strLength = str.length; + if (strLength == 0) + return str; + + // get types, fill arrays + + var chars = new Array(strLength); + var types = new Array(strLength); + var oldtypes = new Array(strLength); + var numBidi = 0; + + for (var i = 0; i < strLength; ++i) { + chars[i] = str.charAt(i); + + var charCode = str.charCodeAt(i); + var charType = 'L'; + if (charCode <= 0x00ff) + charType = baseTypes[charCode]; + else if (0x0590 <= charCode && charCode <= 0x05f4) + charType = 'R'; + else if (0x0600 <= charCode && charCode <= 0x06ff) + charType = arabicTypes[charCode & 0xff]; + else if (0x0700 <= charCode && charCode <= 0x08AC) + charType = 'AL'; + + if (charType == 'R' || charType == 'AL' || charType == 'AN') + numBidi++; + + oldtypes[i] = types[i] = charType; + } + + // detect the bidi method + // if there are no rtl characters then no bidi needed + // if less than 30% chars are rtl then string is primarily ltr + // if more than 30% chars are rtl then string is primarily rtl + if (numBidi == 0) { + text.direction = 'ltr'; + return str; + } + + if (startLevel == -1) { + if ((strLength / numBidi) < 0.3) { + text.direction = 'ltr'; + startLevel = 0; + } else { + text.direction = 'rtl'; + startLevel = 1; + } + } + + var levels = new Array(strLength); + + for (var i = 0; i < strLength; ++i) { + levels[i] = startLevel; + } + + var diffChars = new Array(strLength); + var diffLevels = new Array(strLength); + var diffTypes = new Array(strLength); + + /* + X1-X10: skip most of this, since we are NOT doing the embeddings. + */ + + var e = isOdd(startLevel) ? 'R' : 'L'; + var sor = e; + var eor = sor; + + /* + W1. Examine each non-spacing mark (NSM) in the level run, and change the + type of the NSM to the type of the previous character. If the NSM is at the + start of the level run, it will get the type of sor. + */ + + var lastType = sor; + for (var i = 0; i < strLength; ++i) { + if (types[i] == 'NSM') + types[i] = lastType; + else + lastType = types[i]; + } + + /* + W2. Search backwards from each instance of a European number until the + first strong type (R, L, AL, or sor) is found. If an AL is found, change + the type of the European number to Arabic number. + */ + + var lastType = sor; + for (var i = 0; i < strLength; ++i) { + var t = types[i]; + if (t == 'EN') + types[i] = (lastType == 'AL') ? 'AN' : 'EN'; + else if (t == 'R' || t == 'L' || t == 'AL') + lastType = t; + } + + /* + W3. Change all ALs to R. + */ + + for (var i = 0; i < strLength; ++i) { + var t = types[i]; + if (t == 'AL') + types[i] = 'R'; + } + + /* + W4. A single European separator between two European numbers changes to a + European number. A single common separator between two numbers of the same + type changes to that type: + */ + + for (var i = 1; i < strLength - 1; ++i) { + if (types[i] == 'ES' && types[i - 1] == 'EN' && types[i + 1] == 'EN') + types[i] = 'EN'; + if (types[i] == 'CS' && (types[i - 1] == 'EN' || types[i - 1] == 'AN') && + types[i + 1] == types[i - 1]) + types[i] = types[i - 1]; + } + + /* + W5. A sequence of European terminators adjacent to European numbers changes + to all European numbers: + */ + + for (var i = 0; i < strLength; ++i) { + if (types[i] == 'EN') { + // do before + for (var j = i - 1; j >= 0; --j) { + if (types[j] != 'ET') + break; + types[j] = 'EN'; + } + // do after + for (var j = i + 1; j < strLength; --j) { + if (types[j] != 'ET') + break; + types[j] = 'EN'; + } + } + } + + /* + W6. Otherwise, separators and terminators change to Other Neutral: + */ + + for (var i = 0; i < strLength; ++i) { + var t = types[i]; + if (t == 'WS' || t == 'ES' || t == 'ET' || t == 'CS') + types[i] = 'ON'; + } + + /* + W7. Search backwards from each instance of a European number until the + first strong type (R, L, or sor) is found. If an L is found, then change + the type of the European number to L. + */ + + var lastType = sor; + for (var i = 0; i < strLength; ++i) { + var t = types[i]; + if (t == 'EN') + types[i] = (lastType == 'L') ? 'L' : 'EN'; + else if (t == 'R' || t == 'L') + lastType = t; + } + + /* + N1. A sequence of neutrals takes the direction of the surrounding strong + text if the text on both sides has the same direction. European and Arabic + numbers are treated as though they were R. Start-of-level-run (sor) and + end-of-level-run (eor) are used at level run boundaries. + */ + + for (var i = 0; i < strLength; ++i) { + if (types[i] == 'ON') { + var end = findUnequal(types, i + 1, 'ON'); + var before = sor; + if (i > 0) + before = types[i - 1]; + var after = eor; + if (end + 1 < strLength) + after = types[end + 1]; + if (before != 'L') + before = 'R'; + if (after != 'L') + after = 'R'; + if (before == after) + setValues(types, i, end, before); + i = end - 1; // reset to end (-1 so next iteration is ok) + } + } + + /* + N2. Any remaining neutrals take the embedding direction. + */ + + for (var i = 0; i < strLength; ++i) { + if (types[i] == 'ON') + types[i] = e; + } + + /* + I1. For all characters with an even (left-to-right) embedding direction, + those of type R go up one level and those of type AN or EN go up two + levels. + I2. For all characters with an odd (right-to-left) embedding direction, + those of type L, EN or AN go up one level. + */ + + for (var i = 0; i < strLength; ++i) { + var t = types[i]; + if (isEven(levels[i])) { + if (t == 'R') { + levels[i] += 1; + } else if (t == 'AN' || t == 'EN') { + levels[i] += 2; + } + } else { // isOdd, so + if (t == 'L' || t == 'AN' || t == 'EN') { + levels[i] += 1; + } + } + } + + /* + L1. On each line, reset the embedding level of the following characters to + the paragraph embedding level: + + segment separators, + paragraph separators, + any sequence of whitespace characters preceding a segment separator or + paragraph separator, and any sequence of white space characters at the end + of the line. + */ + + // don't bother as text is only single line + + /* + L2. From the highest level found in the text to the lowest odd level on + each line, reverse any contiguous sequence of characters that are at that + level or higher. + */ + + // find highest level & lowest odd level + + var highestLevel = -1; + var lowestOddLevel = 99; + for (var i = 0, ii = levels.length; i < ii; ++i) { + var level = levels[i]; + if (highestLevel < level) + highestLevel = level; + if (lowestOddLevel > level && isOdd(level)) + lowestOddLevel = level; + } + + // now reverse between those limits + + for (var level = highestLevel; level >= lowestOddLevel; --level) { + // find segments to reverse + var start = -1; + for (var i = 0, ii = levels.length; i < ii; ++i) { + if (levels[i] < level) { + if (start >= 0) { + reverseValues(chars, start, i); + start = -1; + } + } else if (start < 0) { + start = i; + } + } + if (start >= 0) { + reverseValues(chars, start, levels.length); + } + } + + /* + L3. Combining marks applied to a right-to-left base character will at this + point precede their base character. If the rendering engine expects them to + follow the base characters in the final display process, then the ordering + of the marks and the base character must be reversed. + */ + + // don't bother for now + + /* + L4. A character that possesses the mirrored property as specified by + Section 4.7, Mirrored, must be depicted by a mirrored glyph if the resolved + directionality of that character is R. + */ + + // don't mirror as characters are already mirrored in the pdf + + // Finally, return string + + var result = ''; + for (var i = 0, ii = chars.length; i < ii; ++i) { + var ch = chars[i]; + if (ch != '<' && ch != '>') + result += ch; + } + return result; + }); +})(); diff --git a/src/canvas.js b/src/canvas.js index 073c3d2d6..54ab5b4d0 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -778,8 +778,16 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { x += charWidth; - text.str += glyph.unicode === ' ' ? '\u00A0' : glyph.unicode; - text.length++; + var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode; + var glyphUnicodeLength = glyphUnicode.length; + //reverse an arabic ligature + if (glyphUnicodeLength > 1 && + isRTLRangeFor(glyphUnicode.charCodeAt(0))) { + for (var ii = glyphUnicodeLength - 1; ii >= 0; ii--) + text.str += glyphUnicode[ii]; + } else + text.str += glyphUnicode; + text.length += glyphUnicodeLength; text.canvasWidth += charWidth; } current.x += x * textHScale2; @@ -845,7 +853,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { text.str += shownText.str; } text.canvasWidth += shownText.canvasWidth; - text.length += e.length; + text.length += shownText.length; } } else { malformed('TJ array element ' + e + ' is not string or num'); diff --git a/src/evaluator.js b/src/evaluator.js index 1597bed11..e34787e41 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -620,8 +620,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } else { // parsing hex UTF-16BE numbers var str = []; - for (var i = 0, ii = token.length; i < ii; i += 4) - str.push(parseInt(token.substr(i, 4), 16)); + for (var k = 0, kk = token.length; k < kk; k += 4) { + var b = parseInt(token.substr(k, 4), 16); + if (b <= 0x10) { + k += 4; + b = (b << 16) | parseInt(token.substr(k, 4), 16); + b -= 0x10000; + str.push(0xD800 | (b >> 10)); + str.push(0xDC00 | (b & 0x3FF)); + break; + } + str.push(b); + } tokens.push(String.fromCharCode.apply(String, str)); token = ''; } diff --git a/src/fonts.js b/src/fonts.js index 4ea61c9a8..542c33f55 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -736,6 +736,16 @@ function getUnicodeRangeFor(value) { return -1; } +function isRTLRangeFor(value) { + var range = UnicodeRanges[13]; + if (value >= range.begin && value < range.end) + return true; + range = UnicodeRanges[11]; + if (value >= range.begin && value < range.end) + return true; + return false; +} + function isSpecialUnicode(unicode) { return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) || (unicode >= kCmapGlyphOffset && @@ -796,6 +806,8 @@ var Font = (function FontClosure() { else this.rebuildToUnicode(properties); + this.toFontChar = this.buildToFontChar(this.toUnicode); + if (!file) { // The file data is not specified. Trying to fix the font name // to be used with the canvas.font. @@ -1817,9 +1829,9 @@ var Font = (function FontClosure() { var unassignedUnicodeItems = []; for (var i = 1; i < numGlyphs; i++) { var cid = gidToCidMap[i] || i; - var unicode = this.toUnicode[cid]; - if (!unicode || isSpecialUnicode(unicode) || - unicode in usedUnicodes) { + var unicode = this.toFontChar[cid]; + if (!unicode || typeof unicode !== 'number' || + isSpecialUnicode(unicode) || unicode in usedUnicodes) { unassignedUnicodeItems.push(i); continue; } @@ -1838,7 +1850,7 @@ var Font = (function FontClosure() { if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) break; var unicode = unusedUnicode++; - this.toUnicode[cid] = unicode; + this.toFontChar[cid] = unicode; usedUnicodes[unicode] = true; glyphs.push({ unicode: unicode, code: cid }); ids.push(i); @@ -1849,9 +1861,9 @@ var Font = (function FontClosure() { var glyphs = cmapTable.glyphs; var ids = cmapTable.ids; var hasShortCmap = !!cmapTable.hasShortCmap; - var toUnicode = this.toUnicode; + var toFontChar = this.toFontChar; - if (toUnicode && toUnicode.length > 0) { + if (toFontChar && toFontChar.length > 0) { // checking if cmap is just identity map var isIdentity = true; for (var i = 0, ii = glyphs.length; i < ii; i++) { @@ -1864,8 +1876,9 @@ var Font = (function FontClosure() { if (isIdentity && !this.isSymbolicFont) { var usedUnicodes = [], unassignedUnicodeItems = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { - var unicode = toUnicode[i + 1]; - if (!unicode || unicode in usedUnicodes) { + var unicode = toFontChar[i + 1]; + if (!unicode || typeof unicode !== 'number' || + unicode in usedUnicodes) { unassignedUnicodeItems.push(i); continue; } @@ -1879,11 +1892,11 @@ var Font = (function FontClosure() { unusedUnicode++; var cid = i + 1; // override only if unicode mapping is not specified - if (!(cid in toUnicode)) - toUnicode[cid] = unusedUnicode; + if (!(cid in toFontChar)) + toFontChar[cid] = unusedUnicode; glyphs[i].unicode = unusedUnicode++; } - this.useToUnicode = true; + this.useToFontChar = true; } } @@ -1914,13 +1927,13 @@ var Font = (function FontClosure() { } // Moving all symbolic font glyphs into 0xF000 - 0xF0FF range. - this.symbolicGlyphsOffset = 0; if (this.isSymbolicFont) { for (var i = 0, ii = glyphs.length; i < ii; i++) { - var code = glyphs[i].unicode; - glyphs[i].unicode = kSymbolicFontGlyphOffset | (code & 0xFF); + var code = glyphs[i].unicode & 0xFF; + var fontCharCode = kSymbolicFontGlyphOffset | code; + glyphs[i].unicode = toFontChar[code] = fontCharCode; } - this.symbolicGlyphsOffset = kSymbolicFontGlyphOffset; + this.useToFontChar = true; } // remove glyph references outside range of avaialable glyphs @@ -2023,12 +2036,12 @@ var Font = (function FontClosure() { properties.baseEncoding = encoding; } if (properties.subtype == 'CIDFontType0C') { - var toUnicode = []; + var toFontChar = []; for (var i = 0; i < charstrings.length; ++i) { var charstring = charstrings[i]; - toUnicode[charstring.code] = charstring.unicode; + toFontChar[charstring.code] = charstring.unicode; } - this.toUnicode = toUnicode; + this.toFontChar = toFontChar; } var fields = { @@ -2123,6 +2136,19 @@ var Font = (function FontClosure() { return stringToArray(otf.file); }, + buildToFontChar: function font_buildToFontChar(toUnicode) { + var result = []; + var unusedUnicode = kCmapGlyphOffset; + for (var i = 0, ii = toUnicode.length; i < ii; i++) { + var unicode = toUnicode[i]; + var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ : + unicode; + if (typeof unicode !== 'undefined') + result[i] = fontCharCode; + } + return result; + }, + rebuildToUnicode: function font_rebuildToUnicode(properties) { var firstChar = properties.firstChar, lastChar = properties.lastChar; var map = []; @@ -2258,7 +2284,7 @@ var Font = (function FontClosure() { }, charToGlyph: function fonts_charToGlyph(charcode) { - var unicode, width, codeIRQueue; + var fontCharCode, width, codeIRQueue; var width = this.widths[charcode]; @@ -2266,38 +2292,39 @@ var Font = (function FontClosure() { case 'CIDFontType0': if (this.noUnicodeAdaptation) { width = this.widths[this.unicodeToCID[charcode] || charcode]; - unicode = mapPrivateUseChars(charcode); + fontCharCode = mapPrivateUseChars(charcode); break; } - unicode = this.toUnicode[charcode] || charcode; + fontCharCode = this.toFontChar[charcode] || charcode; break; case 'CIDFontType2': if (this.noUnicodeAdaptation) { width = this.widths[this.unicodeToCID[charcode] || charcode]; - unicode = mapPrivateUseChars(charcode); + fontCharCode = mapPrivateUseChars(charcode); break; } - unicode = this.toUnicode[charcode] || charcode; + fontCharCode = this.toFontChar[charcode] || charcode; break; case 'Type1': var glyphName = this.differences[charcode] || this.encoding[charcode]; if (!isNum(width)) width = this.widths[glyphName]; if (this.noUnicodeAdaptation) { - unicode = mapPrivateUseChars(GlyphsUnicode[glyphName] || charcode); + fontCharCode = mapPrivateUseChars(GlyphsUnicode[glyphName] || + charcode); break; } - unicode = this.glyphNameMap[glyphName] || + fontCharCode = this.glyphNameMap[glyphName] || GlyphsUnicode[glyphName] || charcode; break; case 'Type3': var glyphName = this.differences[charcode] || this.encoding[charcode]; codeIRQueue = this.charProcIRQueues[glyphName]; - unicode = charcode; + fontCharCode = charcode; break; case 'TrueType': - if (this.useToUnicode) { - unicode = this.toUnicode[charcode] || charcode; + if (this.useToFontChar) { + fontCharCode = this.toFontChar[charcode] || charcode; break; } var glyphName = this.differences[charcode] || this.encoding[charcode]; @@ -2306,17 +2333,17 @@ var Font = (function FontClosure() { if (!isNum(width)) width = this.widths[glyphName]; if (this.noUnicodeAdaptation) { - unicode = GlyphsUnicode[glyphName] || charcode; + fontCharCode = GlyphsUnicode[glyphName] || charcode; break; } if (!this.hasEncoding || this.isSymbolicFont) { - unicode = this.useToUnicode ? this.toUnicode[charcode] : - (this.symbolicGlyphsOffset + charcode); + fontCharCode = this.useToFontChar ? this.toFontChar[charcode] : + charcode; break; } // MacRoman encoding address by re-encoding the cmap table - unicode = glyphName in this.glyphNameMap ? + fontCharCode = glyphName in this.glyphNameMap ? this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName]; break; default: @@ -2332,7 +2359,7 @@ var Font = (function FontClosure() { width = (isNum(width) ? width : this.defaultWidth) * this.widthMultiplier; return { - fontChar: String.fromCharCode(unicode), + fontChar: String.fromCharCode(fontCharCode), unicode: unicodeChars, width: width, codeIRQueue: codeIRQueue diff --git a/src/util.js b/src/util.js index 93bd36b55..b0739ec3a 100644 --- a/src/util.js +++ b/src/util.js @@ -174,6 +174,57 @@ var Util = (function UtilClosure() { return Util; })(); +// optimised CSS custom property getter/setter +var CustomStyle = (function CustomStyleClosure() { + + // As noted on: http://www.zachstronaut.com/posts/2009/02/17/ + // animate-css-transforms-firefox-webkit.html + // in some versions of IE9 it is critical that ms appear in this list + // before Moz + var prefixes = ['ms', 'Moz', 'Webkit', 'O']; + var _cache = { }; + + function CustomStyle() { + } + + CustomStyle.getProp = function get(propName, element) { + // check cache only when no element is given + if (arguments.length == 1 && typeof _cache[propName] == 'string') { + return _cache[propName]; + } + + element = element || document.documentElement; + var style = element.style, prefixed, uPropName; + + // test standard property first + if (typeof style[propName] == 'string') { + return (_cache[propName] = propName); + } + + // capitalize + uPropName = propName.charAt(0).toUpperCase() + propName.slice(1); + + // test vendor specific properties + for (var i = 0, l = prefixes.length; i < l; i++) { + prefixed = prefixes[i] + uPropName; + if (typeof style[prefixed] == 'string') { + return (_cache[propName] = prefixed); + } + } + + //if all fails then set to undefined + return (_cache[propName] = 'undefined'); + } + + CustomStyle.setProp = function set(propName, element, str) { + var prop = this.getProp(propName); + if (prop != 'undefined') + element.style[prop] = str; + } + + return CustomStyle; +})(); + var PDFStringTranslateTable = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2D8, 0x2C7, 0x2C6, 0x2D9, 0x2DD, 0x2DB, 0x2DA, 0x2DC, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/worker_loader.js b/src/worker_loader.js index fc3d7a5f7..69eb1414f 100644 --- a/src/worker_loader.js +++ b/src/worker_loader.js @@ -24,7 +24,8 @@ var files = [ 'stream.js', 'worker.js', '../external/jpgjs/jpg.js', - 'jpx.js' + 'jpx.js', + 'bidi.js' ]; // Load all the files. diff --git a/test/test_slave.html b/test/test_slave.html index a2682a5a6..50bb067e1 100644 --- a/test/test_slave.html +++ b/test/test_slave.html @@ -23,6 +23,7 @@ + + diff --git a/web/viewer.js b/web/viewer.js index 2fb16b8cf..0a1ad7508 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -1051,12 +1051,13 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textLayerDiv.appendChild(textDiv); if (textDiv.dataset.textLength > 1) { // avoid div by zero - // Adjust div width (via letterSpacing) to match canvas text + // Adjust div width to match canvas text // Due to the .offsetWidth calls, this is slow // This needs to come after appending to the DOM - textDiv.style.letterSpacing = - ((textDiv.dataset.canvasWidth - textDiv.offsetWidth) / - (textDiv.dataset.textLength - 1)) + 'px'; + var textScale = textDiv.dataset.canvasWidth / textDiv.offsetWidth; + CustomStyle.setProp('transform' , textDiv, + 'scale(' + textScale + ', 1)'); + CustomStyle.setProp('transformOrigin' , textDiv, '0% 0%'); } } // textLength > 0 } @@ -1096,7 +1097,8 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) { textDiv.style.fontSize = fontHeight + 'px'; textDiv.style.left = text.geom.x + 'px'; textDiv.style.top = (text.geom.y - fontHeight) + 'px'; - textDiv.textContent = text.str; + textDiv.textContent = bidi(text, -1); + textDiv.dir = text.direction; textDiv.dataset.textLength = text.length; this.textDivs.push(textDiv); };