From 709dc1a0c9f334d5079c03feb69323bec1c21372 Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Thu, 24 Nov 2011 09:38:09 -0600
Subject: [PATCH 1/5] Initial ToUnicode modifications

---
 src/canvas.js    |  8 +++-----
 src/evaluator.js | 16 +++++++++++++---
 src/fonts.js     | 41 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/src/canvas.js b/src/canvas.js
index 9b3ed0ba9..8d6fb046d 100644
--- a/src/canvas.js
+++ b/src/canvas.js
@@ -474,13 +474,11 @@ var CanvasGraphics = (function canvasGraphics() {
             continue;
           }
 
-          var unicode = glyph.unicode;
-          var char = (unicode >= 0x10000) ?
-            String.fromCharCode(0xD800 | ((unicode - 0x10000) >> 10),
-            0xDC00 | (unicode & 0x3FF)) : String.fromCharCode(unicode);
-
+          var char = glyph.fontChar;
           ctx.fillText(char, width, 0);
           width += glyph.width * fontSize * 0.001 + charSpacing;
+
+          // TODO actual characters can be extracted from the glyph.unicode
         }
         current.x += width;
 
diff --git a/src/evaluator.js b/src/evaluator.js
index a863a531e..03fce2d9a 100644
--- a/src/evaluator.js
+++ b/src/evaluator.js
@@ -512,6 +512,7 @@ var PartialEvaluator = (function partialEvaluator() {
           error('Encoding is not a Name nor a Dict');
         }
       }
+
       properties.differences = differences;
       properties.baseEncoding = baseEncoding;
       properties.hasEncoding = hasEncoding;
@@ -595,9 +596,18 @@ var PartialEvaluator = (function partialEvaluator() {
             }
           } else if (byte == 0x3E) {
             if (token.length) {
-              // parsing hex number
-              tokens.push(parseInt(token, 16));
-              token = '';
+              if (token.length <= 4) {
+                // parsing hex number
+                tokens.push(parseInt(token, 16));
+                token = '';
+              } else {
+                // parsing hex UTF-16BE numbers
+                var str = [];
+                for (var i = 0, ii = token.length; i < ii; i += 4)
+                  str.push(parseInt(token.substr(i, 4), 16));
+                tokens.push(String.fromCharCode.apply(String, str));
+                token = '';
+              }
             }
           } else {
             token += String.fromCharCode(byte);
diff --git a/src/fonts.js b/src/fonts.js
index 116bb4dfc..d028a9786 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -771,7 +771,6 @@ var Font = (function Font() {
     this.widths = properties.widths;
     this.defaultWidth = properties.defaultWidth;
     this.composite = properties.composite;
-    this.toUnicode = properties.toUnicode;
     this.hasEncoding = properties.hasEncoding;
 
     this.fontMatrix = properties.fontMatrix;
@@ -781,6 +780,11 @@ var Font = (function Font() {
     // Trying to fix encoding using glyph CIDSystemInfo.
     this.loadCidToUnicode(properties);
 
+    if (properties.toUnicode)
+      this.toUnicode = properties.toUnicode;
+    else
+      this.rebuildToUnicode(properties);
+
     if (!file) {
       // The file data is not specified. Trying to fix the font name
       // to be used with the canvas.font.
@@ -1898,6 +1902,29 @@ var Font = (function Font() {
       return stringToArray(otf.file);
     },
 
+    rebuildToUnicode: function font_rebuildToUnicode(properties) {
+      var map = [];
+      if (properties.composite) {
+        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+          // TODO missing map the character according font's CMap
+          var cid = i;
+          map[i] = this.cidToUnicode[cid];
+        }
+      } else {
+        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+          var glyph = properties.differences[i];
+          if (!glyph)
+            glyph = properties.baseEncoding[i];
+          if (!!glyph && (glyph in GlyphsUnicode))
+            map[i] = GlyphsUnicode[glyph]
+        }
+      }
+      this.toUnicode = map;
+      this.refreshToUnicode = function refreshToUnicode() {
+        this.font_rebuildToUnicode(properties);
+      };
+    },
+
     loadCidToUnicode: function font_loadCidToUnicode(properties) {
       if (properties.cidToGidMap) {
         this.cidToUnicode = properties.cidToGidMap;
@@ -2039,8 +2066,18 @@ var Font = (function Font() {
           warn('Unsupported font type: ' + this.type);
           break;
       }
+
+      var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode;
+      if (typeof unicodeChars === 'number') {
+        unicodeChars = (unicodeChars >= 0x10000) ?
+            String.fromCharCode(0xD800 | ((unicodeChars - 0x10000) >> 10),
+            0xDC00 | (unicodeChars & 0x3FF)) : String.fromCharCode(unicodeChars);
+        // TODO we probably don't need convert high/low surrogate... keeping for now
+      }
+
       return {
-        unicode: unicode,
+        fontChar: String.fromCharCode(unicode),
+        unicode: unicodeChars,
         width: isNum(width) ? width : this.defaultWidth,
         codeIRQueue: codeIRQueue
       };

From 08e3fd88ada78c094dfbe527854264b2c1c4dbb8 Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Sun, 27 Nov 2011 20:43:23 -0600
Subject: [PATCH 2/5] Removing adaptUnicode; making cmap equal to ToUnicode
 tables

---
 src/evaluator.js |  18 ++-
 src/fonts.js     | 277 +++++++++++++++++++++++++++--------------------
 2 files changed, 172 insertions(+), 123 deletions(-)

diff --git a/src/evaluator.js b/src/evaluator.js
index 03fce2d9a..3e687c72d 100644
--- a/src/evaluator.js
+++ b/src/evaluator.js
@@ -555,9 +555,21 @@ var PartialEvaluator = (function partialEvaluator() {
                   var startRange = tokens[j];
                   var endRange = tokens[j + 1];
                   var code = tokens[j + 2];
-                  while (startRange <= endRange) {
-                    charToUnicode[startRange] = code++;
-                    ++startRange;
+                  if (code == 0xFFFF) {
+                    // CMap is broken, assuming code == startRange
+                    code = startRange;
+                  }
+                  if (isArray(code)) {
+                    var codeindex = 0;
+                    while (startRange <= endRange) {
+                      charToUnicode[startRange] = code[codeindex++];
+                      ++startRange;
+                    }
+                  } else {
+                    while (startRange <= endRange) {
+                      charToUnicode[startRange] = code++;
+                      ++startRange;
+                    }
                   }
                 }
                 break;
diff --git a/src/fonts.js b/src/fonts.js
index d028a9786..fb9bb9f0c 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -719,20 +719,10 @@ function getUnicodeRangeFor(value) {
   return -1;
 }
 
-function adaptUnicode(unicode) {
-  return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ?
-    unicode + kCmapGlyphOffset : unicode;
-}
-
-function isAdaptedUnicode(unicode) {
-  return unicode >= kCmapGlyphOffset &&
-    unicode < kCmapGlyphOffset + kSizeOfGlyphArea;
-}
-
 function isSpecialUnicode(unicode) {
   return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ||
-    unicode >= kCmapGlyphOffset &&
-    unicode < kCmapGlyphOffset + kSizeOfGlyphArea;
+    (unicode >= kCmapGlyphOffset &&
+    unicode < kCmapGlyphOffset + kSizeOfGlyphArea);
 }
 
 /**
@@ -965,15 +955,15 @@ var Font = (function Font() {
     var ranges = [];
     for (var n = 0; n < length; ) {
       var start = codes[n].unicode;
-      var startCode = codes[n].code;
+      var codeIndices = [codes[n].code];
       ++n;
       var end = start;
       while (n < length && end + 1 == codes[n].unicode) {
+        codeIndices.push(codes[n].code);
         ++end;
         ++n;
       }
-      var endCode = codes[n - 1].code;
-      ranges.push([start, end, startCode, endCode]);
+      ranges.push([start, end, codeIndices]);
     }
 
     return ranges;
@@ -1016,17 +1006,16 @@ var Font = (function Font() {
         idDeltas += string16(0);
         idRangeOffsets += string16(offset);
 
-        var startCode = range[2];
-        var endCode = range[3];
-        for (var j = startCode; j <= endCode; ++j)
-          glyphsIds += string16(deltas[j]);
+        var codes = range[2];
+        for (var j = 0, jj = codes.length; j < jj; ++j)
+          glyphsIds += string16(deltas[codes[j]]);
       }
     } else {
       for (var i = 0; i < segCount - 1; i++) {
         var range = ranges[i];
         var start = range[0];
         var end = range[1];
-        var startCode = range[2];
+        var startCode = range[2][0];
 
         startCount += string16(start);
         endCount += string16(end);
@@ -1303,7 +1292,7 @@ var Font = (function Font() {
           properties.baseEncoding = encoding;
       }
 
-      function replaceCMapTable(cmap, font, properties) {
+      function readCMapTable(cmap, font) {
         var start = (font.start ? font.start : 0) + cmap.offset;
         font.pos = start;
 
@@ -1320,7 +1309,7 @@ var Font = (function Font() {
         }
 
         // Check that table are sorted by platformID then encodingID,
-        records.sort(function fontReplaceCMapTableSort(a, b) {
+        records.sort(function fontReadCMapTableSort(a, b) {
           return ((a.platformID << 16) + a.encodingID) -
                  ((b.platformID << 16) + b.encodingID);
         });
@@ -1375,16 +1364,15 @@ var Font = (function Font() {
             for (var j = 0; j < 256; j++) {
               var index = font.getByte();
               if (index) {
-                var unicode = adaptUnicode(j);
-                glyphs.push({ unicode: unicode, code: j });
+                glyphs.push({ unicode: j, code: j });
                 ids.push(index);
               }
             }
-
-            properties.hasShortCmap = true;
-
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids,
+              hasShortCmap: true
+            };
           } else if (format == 4) {
             // re-creating the table in format 4 since the encoding
             // might be changed
@@ -1436,17 +1424,18 @@ var Font = (function Font() {
                 var glyphCode = offsetIndex < 0 ? j :
                   offsets[offsetIndex + j - start];
                 glyphCode = (glyphCode + delta) & 0xFFFF;
-                if (glyphCode == 0 || isAdaptedUnicode(j))
+                if (glyphCode == 0)
                   continue;
 
-                var unicode = adaptUnicode(j);
-                glyphs.push({ unicode: unicode, code: j });
+                glyphs.push({ unicode: j, code: j });
                 ids.push(glyphCode);
               }
             }
 
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids
+            };
           } else if (format == 6) {
             // Format 6 is a 2-bytes dense mapping, which means the font data
             // lives glue together even if they are pretty far in the unicode
@@ -1461,19 +1450,18 @@ var Font = (function Font() {
             for (var j = 0; j < entryCount; j++) {
               var glyphCode = int16(font.getBytes(2));
               var code = firstCode + j;
-              if (isAdaptedUnicode(glyphCode))
-                continue;
 
-              var unicode = adaptUnicode(code);
-              glyphs.push({ unicode: unicode, code: code });
+              glyphs.push({ unicode: code, code: code });
               ids.push(glyphCode);
             }
 
-            createGlyphNameMap(glyphs, ids, properties);
-            return cmap.data = createCMapTable(glyphs, ids);
+            return {
+              glyphs: glyphs,
+              ids: ids
+            };
           }
         }
-        return cmap.data;
+        error('Unsupported cmap table format');
       };
 
       function sanitizeMetrics(font, header, metrics, numGlyphs) {
@@ -1712,17 +1700,60 @@ var Font = (function Font() {
           tables.push(cmap);
         }
 
-        var glyphs = [];
+        var glyphs = [], ids = [];
+        var usedUnicodes = [], unusedUnicode = kCmapGlyphOffset;
+        var cidToGidMap = properties.cidToGidMap;
         for (i = 1; i < numGlyphs; i++) {
-          if (isAdaptedUnicode(i))
-            continue;
-
-          glyphs.push({ unicode: adaptUnicode(i) });
+          var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
+          var unicode = this.toUnicode[cid];
+          if (!unicode || isSpecialUnicode(unicode) ||
+              unicode in usedUnicodes) {
+            // overriding the special special symbols mapping
+            while (unusedUnicode in usedUnicodes)
+              unusedUnicode++;
+            this.toUnicode[cid] = unicode = unusedUnicode++;
+            if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) {
+              // overflow of the user defined symblos range
+              // using symbols that a little bit lower than this range
+              unusedUnicode = kCmapGlyphOffset - numGlyphs;
+            }
+          }
+          usedUnicodes[unicode] = true;
+          glyphs.push({ unicode: unicode, code: cid });
+          ids.push(i);
         }
-        cmap.data = createCMapTable(glyphs);
+        cmap.data = createCMapTable(glyphs, ids);
       } else {
-        replaceCMapTable(cmap, font, properties);
+        var cmapTable = readCMapTable(cmap, font);
+        var glyphs = cmapTable.glyphs;
+        var ids = cmapTable.ids;
+        var hasShortCmap = !!cmapTable.hasShortCmap;
+        var toUnicode = this.toUnicode;
+
+        if (hasShortCmap && toUnicode) {
+          // checking if cmap is just identity map
+          var isIdentity = true;
+          for (var i = 0, ii = glyphs.length; i < ii; i++) {
+            if (glyphs[i].unicode != i + 1) {
+              isIdentity = false;
+              break;
+            }
+          }
+          // if it is, replacing with meaningful toUnicode values
+          if (isIdentity) {
+            for (var i = 0, ii = glyphs.length; i < ii; i++) {
+              var unicode = toUnicode[i + 1] || i + 1;
+              glyphs[i].unicode = unicode;
+            }
+            this.useToUnicode = true;
+          }
+        }
+        properties.hasShortCmap = hasShortCmap;
+
+        createGlyphNameMap(glyphs, ids, properties);
         this.glyphNameMap = properties.glyphNameMap;
+
+        cmap.data = createCMapTable(glyphs, ids);
       }
 
       // Rewrite the 'post' table if needed
@@ -1812,6 +1843,14 @@ var Font = (function Font() {
         }
         properties.baseEncoding = encoding;
       }
+      if (properties.subtype == 'CIDFontType0C') {
+        var toUnicode = [];
+        for (var i = 0; i < charstrings.length; ++i) {
+          var charstring = charstrings[i];
+          toUnicode[charstring.code] = charstring.unicode;
+        }
+        this.toUnicode = toUnicode;
+      }
 
       var fields = {
         // PostScript Font Program
@@ -1872,8 +1911,11 @@ var Font = (function Font() {
         // Horizontal metrics
         'hmtx': (function fontFieldsHmtx() {
           var hmtx = '\x00\x00\x00\x00'; // Fake .notdef
-          for (var i = 0, ii = charstrings.length; i < ii; i++)
-            hmtx += string16(charstrings[i].width) + string16(0);
+          for (var i = 0, ii = charstrings.length; i < ii; i++) {
+            var charstring = charstrings[i];
+            var width = 'width' in charstring ? charstring.width : 0;
+            hmtx += string16(width) + string16(0);
+          }
           return stringToArray(hmtx);
         })(),
 
@@ -1903,20 +1945,22 @@ var Font = (function Font() {
     },
 
     rebuildToUnicode: function font_rebuildToUnicode(properties) {
+      var firstChar = properties.firstChar, lastChar = properties.lastChar;
       var map = [];
       if (properties.composite) {
-        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+        var isIdentityMap = this.cidToUnicode.length == 0;
+        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
           // TODO missing map the character according font's CMap
           var cid = i;
-          map[i] = this.cidToUnicode[cid];
+          map[i] = isIdentityMap ? cid : this.cidToUnicode[cid];
         }
       } else {
-        for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) {
+        for (var i = firstChar, ii = lastChar; i <= ii; i++) {
           var glyph = properties.differences[i];
           if (!glyph)
             glyph = properties.baseEncoding[i];
           if (!!glyph && (glyph in GlyphsUnicode))
-            map[i] = GlyphsUnicode[glyph]
+            map[i] = GlyphsUnicode[glyph];
         }
       }
       this.toUnicode = map;
@@ -1926,16 +1970,12 @@ var Font = (function Font() {
     },
 
     loadCidToUnicode: function font_loadCidToUnicode(properties) {
-      if (properties.cidToGidMap) {
-        this.cidToUnicode = properties.cidToGidMap;
-        return;
-      }
-
       if (!properties.cidSystemInfo)
         return;
 
-      var cidToUnicodeMap = [];
+      var cidToUnicodeMap = [], unicodeToCIDMap = [];
       this.cidToUnicode = cidToUnicodeMap;
+      this.unicodeToCID = unicodeToCIDMap;
 
       var cidSystemInfo = properties.cidSystemInfo;
       var cidToUnicode;
@@ -1947,28 +1987,34 @@ var Font = (function Font() {
       if (!cidToUnicode)
         return; // identity encoding
 
-      var glyph = 1, i, j, k, ii;
+      var cid = 1, i, j, k, ii;
       for (i = 0, ii = cidToUnicode.length; i < ii; ++i) {
         var unicode = cidToUnicode[i];
         if (isArray(unicode)) {
           var length = unicode.length;
-          for (j = 0; j < length; j++)
-            cidToUnicodeMap[unicode[j]] = glyph;
-          glyph++;
+          for (j = 0; j < length; j++) {
+            cidToUnicodeMap[cid] = unicode[j];
+            unicodeToCIDMap[unicode[j]] = cid;
+          }
+          cid++;
         } else if (typeof unicode === 'object') {
           var fillLength = unicode.f;
           if (fillLength) {
             k = unicode.c;
             for (j = 0; j < fillLength; ++j) {
-              cidToUnicodeMap[k] = glyph++;
+              cidToUnicodeMap[cid] = k;
+              unicodeToCIDMap[k] = cid;
+              cid++;
               k++;
             }
           } else
-            glyph += unicode.s;
+            cid += unicode.s;
         } else if (unicode) {
-          cidToUnicodeMap[unicode] = glyph++;
+          cidToUnicodeMap[cid] = unicode;
+          unicodeToCIDMap[unicode] = cid;
+          cid++;
         } else
-          glyph++;
+          cid++;
       }
     },
 
@@ -2008,19 +2054,19 @@ var Font = (function Font() {
       switch (this.type) {
         case 'CIDFontType0':
           if (this.noUnicodeAdaptation) {
-            width = this.widths[this.cidToUnicode[charcode]];
+            width = this.widths[this.unicodeToCID[charcode] || charcode];
             unicode = charcode;
             break;
           }
-          unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode);
+          unicode = this.toUnicode[charcode] || charcode;
           break;
         case 'CIDFontType2':
           if (this.noUnicodeAdaptation) {
-            width = this.widths[this.cidToUnicode[charcode]];
+            width = this.widths[this.unicodeToCID[charcode] || charcode];
             unicode = charcode;
             break;
           }
-          unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode);
+          unicode = this.toUnicode[charcode] || charcode;
           break;
         case 'Type1':
           var glyphName = this.differences[charcode] || this.encoding[charcode];
@@ -2031,7 +2077,7 @@ var Font = (function Font() {
             break;
           }
           unicode = this.glyphNameMap[glyphName] ||
-            adaptUnicode(GlyphsUnicode[glyphName] || charcode);
+            GlyphsUnicode[glyphName] || charcode;
           break;
         case 'Type3':
           var glyphName = this.differences[charcode] || this.encoding[charcode];
@@ -2049,16 +2095,16 @@ var Font = (function Font() {
             break;
           }
           if (!this.hasEncoding) {
-            unicode = adaptUnicode(charcode);
+            unicode = this.useToUnicode ? this.toUnicode[charcode] : charcode;
             break;
           }
-          if (this.hasShortCmap) {
+          if (this.hasShortCmap && false) {
             var j = Encodings.MacRomanEncoding.indexOf(glyphName);
-            unicode = j >= 0 && !isSpecialUnicode(j) ? j :
+            unicode = j >= 0 ? j :
               this.glyphNameMap[glyphName];
           } else {
             unicode = glyphName in GlyphsUnicode ?
-              adaptUnicode(GlyphsUnicode[glyphName]) :
+              GlyphsUnicode[glyphName] :
               this.glyphNameMap[glyphName];
           }
           break;
@@ -2068,12 +2114,8 @@ var Font = (function Font() {
       }
 
       var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode;
-      if (typeof unicodeChars === 'number') {
-        unicodeChars = (unicodeChars >= 0x10000) ?
-            String.fromCharCode(0xD800 | ((unicodeChars - 0x10000) >> 10),
-            0xDC00 | (unicodeChars & 0x3FF)) : String.fromCharCode(unicodeChars);
-        // TODO we probably don't need convert high/low surrogate... keeping for now
-      }
+      if (typeof unicodeChars === 'number')
+        unicodeChars = String.fromCharCode(unicodeChars);
 
       return {
         fontChar: String.fromCharCode(unicode),
@@ -2790,22 +2832,13 @@ CFF.prototype = {
   getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs,
                                                             properties) {
     var charstrings = [];
-    var reverseMapping = {};
-    var encoding = properties.baseEncoding;
     var i, length, glyphName;
-    for (i = 0, length = encoding.length; i < length; ++i) {
-      glyphName = encoding[i];
-      if (!glyphName || isSpecialUnicode(i))
-        continue;
-      reverseMapping[glyphName] = i;
-    }
-    reverseMapping['.notdef'] = 0;
     var unusedUnicode = kCmapGlyphOffset;
     for (i = 0, length = glyphs.length; i < length; i++) {
       var item = glyphs[i];
       var glyphName = item.glyph;
-      var unicode = glyphName in reverseMapping ?
-        reverseMapping[glyphName] : unusedUnicode++;
+      var unicode = glyphName in GlyphsUnicode ?
+        GlyphsUnicode[glyphName] : unusedUnicode++;
       charstrings.push({
         glyph: glyphName,
         unicode: unicode,
@@ -3092,16 +3125,14 @@ var Type2CFF = (function type2CFF() {
       }
 
       var charStrings = this.parseIndex(topDict.CharStrings);
-      var charset = this.parseCharsets(topDict.charset,
-                                       charStrings.length, strings);
-      var encoding = this.parseEncoding(topDict.Encoding, properties,
-                                             strings, charset);
 
       var charset, encoding;
       var isCIDFont = properties.subtype == 'CIDFontType0C';
       if (isCIDFont) {
-        charset = [];
-        charset.length = charStrings.length;
+        charset = ['.notdef'];
+        for (var i = 1, ii = charStrings.length; i < ii; ++i)
+          charset.push('glyph' + i);
+
         encoding = this.parseCidMap(topDict.charset,
                                     charStrings.length);
       } else {
@@ -3170,38 +3201,44 @@ var Type2CFF = (function type2CFF() {
       var charstrings = [];
       var unicodeUsed = [];
       var unassignedUnicodeItems = [];
+      var inverseEncoding = [];
+      for (var charcode in encoding)
+        inverseEncoding[encoding[charcode]] = charcode | 0;
       for (var i = 0, ii = charsets.length; i < ii; i++) {
         var glyph = charsets[i];
-        var encodingFound = false;
-        for (var charcode in encoding) {
-          if (encoding[charcode] == i) {
-            var code = charcode | 0;
-            charstrings.push({
-              unicode: adaptUnicode(code),
-              code: code,
-              gid: i,
-              glyph: glyph
-            });
-            unicodeUsed[code] = true;
-            encodingFound = true;
-            break;
-          }
+        if (glyph == '.notdef') {
+          charstrings.push({
+            unicode: 0,
+            code: 0,
+            gid: i,
+            glyph: glyph
+          });
+          continue;
         }
-        if (!encodingFound) {
+        var code = inverseEncoding[i];
+        if (!code || isSpecialUnicode(code)) {
           unassignedUnicodeItems.push(i);
+          continue;
         }
+        charstrings.push({
+          unicode: code,
+          code: code,
+          gid: i,
+          glyph: glyph
+        });
+        unicodeUsed[code] = true;
       }
 
-      var nextUnusedUnicode = 0x21;
+      var nextUnusedUnicode = kCmapGlyphOffset;
       for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) {
         var i = unassignedUnicodeItems[j];
         // giving unicode value anyway
-        while (unicodeUsed[nextUnusedUnicode])
+        while (nextUnusedUnicode in unicodeUsed)
           nextUnusedUnicode++;
-        var code = nextUnusedUnicode++;
+        var unicode = nextUnusedUnicode++;
         charstrings.push({
-          unicode: adaptUnicode(code),
-          code: code,
+          unicode: unicode,
+          code: inverseEncoding[i] || 0,
           gid: i,
           glyph: charsets[i]
         });

From e4e864766df1c2252197113378ad0e191665e219 Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Mon, 28 Nov 2011 19:47:37 -0600
Subject: [PATCH 3/5] Fix CIDFontType2 large cmap tables

---
 src/fonts.js | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/fonts.js b/src/fonts.js
index fb9bb9f0c..734b38c4a 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -1701,27 +1701,37 @@ var Font = (function Font() {
         }
 
         var glyphs = [], ids = [];
-        var usedUnicodes = [], unusedUnicode = kCmapGlyphOffset;
+        var usedUnicodes = [];
         var cidToGidMap = properties.cidToGidMap;
-        for (i = 1; i < numGlyphs; i++) {
+        var unassignedUnicodeItems = [];
+        for (var i = 1; i < numGlyphs; i++) {
           var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
           var unicode = this.toUnicode[cid];
           if (!unicode || isSpecialUnicode(unicode) ||
               unicode in usedUnicodes) {
-            // overriding the special special symbols mapping
-            while (unusedUnicode in usedUnicodes)
-              unusedUnicode++;
-            this.toUnicode[cid] = unicode = unusedUnicode++;
-            if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) {
-              // overflow of the user defined symblos range
-              // using symbols that a little bit lower than this range
-              unusedUnicode = kCmapGlyphOffset - numGlyphs;
-            }
+            unassignedUnicodeItems.push(i);
+            continue;
           }
           usedUnicodes[unicode] = true;
           glyphs.push({ unicode: unicode, code: cid });
           ids.push(i);
         }
+        // checking if unassigned symbols will fit the user defined symbols
+        // if those symbols too many, probably they will not be used anyway
+        if (unassignedUnicodeItems.length <= kSizeOfGlyphArea) {
+          var unusedUnicode = kCmapGlyphOffset;
+          for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) {
+            var i = unassignedUnicodeItems[j];
+            var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
+            while (unusedUnicode in usedUnicodes)
+              unusedUnicode++;
+            var unicode = unusedUnicode++;
+            this.toUnicode[cid] = unicode;
+            usedUnicodes[unicode] = true;
+            glyphs.push({ unicode: unicode, code: cid });
+            ids.push(i);
+          }
+        }
         cmap.data = createCMapTable(glyphs, ids);
       } else {
         var cmapTable = readCMapTable(cmap, font);

From 88310a09c2470589aa4343d9da9f5c43427a060a Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Mon, 28 Nov 2011 19:49:02 -0600
Subject: [PATCH 4/5] Removing refreshToUnicode

---
 src/fonts.js | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/fonts.js b/src/fonts.js
index 734b38c4a..9ec87227e 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -1974,9 +1974,6 @@ var Font = (function Font() {
         }
       }
       this.toUnicode = map;
-      this.refreshToUnicode = function refreshToUnicode() {
-        this.font_rebuildToUnicode(properties);
-      };
     },
 
     loadCidToUnicode: function font_loadCidToUnicode(properties) {

From 683a8f0de8d9155a34f1ba422cc5d12ee4133b9b Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Mon, 28 Nov 2011 21:54:07 -0600
Subject: [PATCH 5/5] Remove indexOf, using reverse map instead

---
 src/fonts.js | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/fonts.js b/src/fonts.js
index 9ec87227e..9aabb3f57 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -1700,12 +1700,19 @@ var Font = (function Font() {
           tables.push(cmap);
         }
 
+        var cidToGidMap = properties.cidToGidMap || [];
+        var gidToCidMap = [0];
+        for (var j = cidToGidMap.length - 1; j >= 0; j--) {
+          var gid = cidToGidMap[j];
+          if (gid)
+            gidToCidMap[gid] = j;
+        }
+
         var glyphs = [], ids = [];
         var usedUnicodes = [];
-        var cidToGidMap = properties.cidToGidMap;
         var unassignedUnicodeItems = [];
         for (var i = 1; i < numGlyphs; i++) {
-          var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
+          var cid = gidToCidMap[i] || i;
           var unicode = this.toUnicode[cid];
           if (!unicode || isSpecialUnicode(unicode) ||
               unicode in usedUnicodes) {
@@ -1716,21 +1723,21 @@ var Font = (function Font() {
           glyphs.push({ unicode: unicode, code: cid });
           ids.push(i);
         }
-        // checking if unassigned symbols will fit the user defined symbols
-        // if those symbols too many, probably they will not be used anyway
-        if (unassignedUnicodeItems.length <= kSizeOfGlyphArea) {
-          var unusedUnicode = kCmapGlyphOffset;
-          for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) {
-            var i = unassignedUnicodeItems[j];
-            var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
-            while (unusedUnicode in usedUnicodes)
-              unusedUnicode++;
-            var unicode = unusedUnicode++;
-            this.toUnicode[cid] = unicode;
-            usedUnicodes[unicode] = true;
-            glyphs.push({ unicode: unicode, code: cid });
-            ids.push(i);
-          }
+        // trying to fit as many unassigned symbols as we can
+        // in the range allocated for the user defined symbols
+        var unusedUnicode = kCmapGlyphOffset;
+        for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) {
+          var i = unassignedUnicodeItems[j];
+          var cid = gidToCidMap[i] || i;
+          while (unusedUnicode in usedUnicodes)
+            unusedUnicode++;
+          if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea)
+            break;
+          var unicode = unusedUnicode++;
+          this.toUnicode[cid] = unicode;
+          usedUnicodes[unicode] = true;
+          glyphs.push({ unicode: unicode, code: cid });
+          ids.push(i);
         }
         cmap.data = createCMapTable(glyphs, ids);
       } else {