From 38ffebc9876f257db7e2639238d3d3dbe25bad4b Mon Sep 17 00:00:00 2001 From: vyv03354 Date: Tue, 15 Jan 2013 23:20:58 +0900 Subject: [PATCH] Supports 'H/V', 'EUC-H/V' and '90ms*' CMaps --- src/fonts.js | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index dda0c0df3..928c63b0a 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -401,6 +401,48 @@ var symbolsFonts = { 'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true }; +var CMapConverterList = { + 'H': jis7ToUnicode, + 'V': jis7ToUnicode, + 'EUC-H': eucjpToUnicode, + 'EUC-V': eucjpToUnicode, + '90ms-RKSJ-H': sjisToUnicode, + '90ms-RKSJ-V': sjisToUnicode, + '90msp-RKSJ-H': sjisToUnicode, + '90msp-RKSJ-V': sjisToUnicode +}; + +var decodeBytes; +if (typeof TextDecoder !== 'undefined') { + decodeBytes = function(bytes, encoding) { + return new TextDecoder(encoding).decode(bytes); + }; +} else if (typeof FileReaderSync !== 'undefined') { + decodeBytes = function(bytes, encoding) { + return new FileReaderSync().readAsText(new Blob([bytes]), encoding); + }; +} else { + // Clear the list so that decodeBytes will never be called. + CMapConverterList = {}; +} + +function jis7ToUnicode(str) { + var bytes = stringToBytes(str); + var length = bytes.length; + for (var i = 0; i < length; ++i) { + bytes[i] |= 0x80; + } + return decodeBytes(bytes, 'euc-jp'); +} + +function eucjpToUnicode(str) { + return decodeBytes(stringToBytes(str), 'euc-jp'); +} + +function sjisToUnicode(str) { + return decodeBytes(stringToBytes(str), 'shift_jis'); +} + // Some characters, e.g. copyrightserif, mapped to the private use area and // might not be displayed using standard fonts. Mapping/hacking well-known chars // to the similar equivalents in the normal characters range. @@ -2282,6 +2324,7 @@ var Font = (function FontClosure() { // Trying to fix encoding using glyph CIDSystemInfo. this.loadCidToUnicode(properties); + this.cidEncoding = properties.cidEncoding; if (properties.toUnicode) this.toUnicode = properties.toUnicode; @@ -4128,8 +4171,8 @@ var Font = (function FontClosure() { } var cidEncoding = properties.cidEncoding; - if (cidEncoding && cidEncoding.indexOf('Uni') === 0) { - // input is already Unicode for Uni* CMap encodings. + if (cidEncoding && cidEncoding.indexOf('Identity-') !== 0) { + // input is already Unicode for non-Identity CMap encodings. // However, Unicode-to-CID conversion is needed // regardless of the CMap encoding. So we can't reset // unicodeToCID. @@ -4304,8 +4347,20 @@ var Font = (function FontClosure() { charsCache = this.charsCache = Object.create(null); glyphs = []; + var charsCacheKey = chars; - if (this.wideChars) { + var converter; + var cidEncoding = this.cidEncoding; + if (cidEncoding) { + converter = CMapConverterList[cidEncoding]; + if (converter) { + chars = converter(chars); + } else if (cidEncoding.indexOf('Uni') !== 0 && + cidEncoding.indexOf('Identity-') !== 0) { + warn('Unsupported CMap: ' + cidEncoding); + } + } + if (!converter && this.wideChars) { // composite fonts have multi-byte strings convert the string from // single-byte to multi-byte // XXX assuming CIDFonts are two-byte - later need to extract the @@ -4332,7 +4387,7 @@ var Font = (function FontClosure() { } // Enter the translated string into the cache - return (charsCache[chars] = glyphs); + return (charsCache[charsCacheKey] = glyphs); } };