Add more glue between glyph mapping and code mapping

2011-09-08 03:16:33 +02:00 · 2011-09-08 03:16:33 +02:00 · 567be29720
commit 567be29720
parent 92081af896
2 changed files with 71 additions and 56 deletions
--- a/fonts.js
+++ b/fonts.js
@ -711,7 +711,7 @@ var Font = (function Font() {

    var encoding = properties.encoding;
    for (var index in encoding) {
-      var code = encoding[index];
+      var code = encoding[index].unicode;
      if (firstCharIndex > code || !firstCharIndex)
        firstCharIndex = code;
      if (lastCharIndex < code)
@ -970,15 +970,9 @@ var Font = (function Font() {
              if (index) {
                deltas.push(index);

-                var code = encoding[index];
-                for (var glyph in properties.glyphs) {
-                  if (properties.glyphs[glyph] == code) 
-                    break;
-                }
-
                var unicode = j + kCmapGlyphOffset;
-                properties.glyphs[glyph] = encoding[j] = unicode;
-                glyphs.push({ glyph: glyph, unicode: unicode });
+                encoding[j].unicode = unicode;
+                glyphs.push({ unicode: unicode });
              }
            }
            
@ -1023,8 +1017,10 @@ var Font = (function Font() {
            var start = denseRange[0];
            var end = denseRange[1];
            var index = firstCode;
-            for (var j = start; j <= end; j++)
-              encoding[index++] = glyphs[j - firstCode - 1].unicode;
+            for (var j = start; j <= end; j++) {
+              var code = j - firstCode - 1;
+              encoding[index++] = { unicode: glyphs[code].unicode };
+            }
            return cmap.data = createCMapTable(glyphs);
          }
        }
@ -1118,23 +1114,6 @@ var Font = (function Font() {
        // U+00AD (soft hyphen) is not drawn.
        // So, offset all the glyphs by 0xFF to avoid these cases and use
        // the encoding to map incoming characters to the new glyph positions
-
-        var glyphs = [];
-        var encoding = properties.encoding;
-
-        for (var i = 1; i < numGlyphs; i++)
-          glyphs.push({ unicode: i + kCmapGlyphOffset });
-
-        if ('undefined' == typeof(encoding[0])) {
-          // the font is directly characters to glyphs with no encoding
-          // so create an identity encoding
-          for (i = 0; i < numGlyphs; i++)
-            encoding[i] = i + kCmapGlyphOffset;
-        } else {
-          for (var code in encoding)
-            encoding[code] += kCmapGlyphOffset;
-        }
-
        if (!cmap) {
          cmap = {
            tag: 'cmap',
@ -1142,6 +1121,21 @@ var Font = (function Font() {
          };
          tables.push(cmap);
        }
+
+        var encoding = properties.encoding;
+        if (!encoding[0]) {
+          // the font is directly characters to glyphs with no encoding
+          // so create an identity encoding
+          for (i = 0; i < numGlyphs; i++)
+            encoding[i] = { unicode: i + kCmapGlyphOffset };
+        } else {
+          for (var code in encoding)
+            encoding[code].unicode += kCmapGlyphOffset;
+        }
+
+        var glyphs = [];
+        for (var i = 1; i < numGlyphs; i++)
+          glyphs.push({ unicode: i + kCmapGlyphOffset });
        cmap.data = createCMapTable(glyphs);
      } else {
        replaceCMapTable(cmap, font, properties);
@ -1361,14 +1355,14 @@ var Font = (function Font() {
                                       // loop should never end on the last byte
        for (var i = 0; i < length; i++) {
          var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
-          var unicode = encoding[charcode];
+          var unicode = encoding[charcode].unicode;
          str += String.fromCharCode(unicode);
        }
      }
      else {
        for (var i = 0; i < chars.length; ++i) {
          var charcode = chars.charCodeAt(i);
-          var unicode = encoding[charcode];
+          var unicode = encoding[charcode].unicode;
          if ('undefined' == typeof(unicode)) {
            warn('Unencoded charcode ' + charcode);
            unicode = charcode;
@ -1376,7 +1370,7 @@ var Font = (function Font() {

          // Check if the glyph has already been converted
          if (!IsNum(unicode))
-            unicode = encoding[charcode] = this.glyphs[unicode];
+            unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode;

          // Handle surrogate pairs
          if (unicode > 0xFFFF) {
@ -1830,8 +1824,8 @@ var Type1Parser = function() {
                var glyph = getToken();
              
                if ('undefined' == typeof(properties.differences[index])) {
-                  properties.encoding[index] = glyph;
-                  properties.glyphs[glyph] = GlyphsUnicode[glyph] || index;
+                  var mapping = { unicode: GlyphsUnicode[glyph] || j };
+                  properties.glyphs[glyph] = properties.encoding[index] = mapping;
                }
                getToken(); // read the in 'put'
              }
@ -2000,14 +1994,14 @@ CFF.prototype = {

    for (var i = 0; i < glyphs.length; i++) {
      var glyph = glyphs[i];
-      var unicode = properties.glyphs[glyph.glyph];
-      if (!unicode) {
+      var mapping = properties.glyphs[glyph.glyph];
+      if (!mapping) {
        if (glyph.glyph != '.notdef')
          missings.push(glyph.glyph);
      } else {
        charstrings.push({
          glyph: glyph.glyph,
-          unicode: unicode,
+          unicode: mapping.unicode,
          charstring: glyph.data,
          width: glyph.width,
          lsb: glyph.lsb
@ -2340,17 +2334,24 @@ var Type2CFF = (function() {
          }
        }

-        if (code == -1)
-          index = code = properties.glyphs[glyph] || index;
+        if (code == -1) {
+          var mapping = properties.glyphs[glyph] || {};
+          index = code = mapping.unicode || index;
+        }

        var width = widths[code] || defaultWidth;
        if (code <= 0x1f || (code >= 127 && code <= 255))
          code += kCmapGlyphOffset;

-        properties.encoding[index] = code;
+        properties.glyphs[glyph] = properties.encoding[index] = {
+          unicode: code,
+          width: width
+        };
+
        charstrings.push({
          unicode: code,
-          width: width, gid: i
+          width: width,
+          gid: i
        });
        index++;
      }
--- a/pdf.js
+++ b/pdf.js
@ -4194,13 +4194,19 @@ var PartialEvaluator = (function() {
          var glyphsData = glyphsStream.getBytes(0);

          // Glyph ids are big-endian 2-byte values
-          // Set this to 0 to verify the font has an encoding.
          var encoding = properties.encoding;
-          encoding[0] = 0;
+
+          // Set encoding 0 to later verify the font has an encoding
+          encoding[0] = { unicode: 0 };
          for (var j = 0; j < glyphsData.length; j++) {
            var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
-            if (glyphID != 0)
-              encoding[j >> 1] = glyphID;
+            if (glyphID == 0)
+              continue;
+
+            encoding[j >> 1] = {
+              unicode: glyphID,
+              width: 0
+            };
          }
        } else if (type == 'CIDFontType0') {
          var encoding = xref.fetchIfRef(dict.get('Encoding'));
@ -4269,7 +4275,10 @@ var PartialEvaluator = (function() {
        var glyph = differences[i] || baseEncoding[i];
        if (glyph) {
          var index = GlyphsUnicode[glyph] || i;
-          glyphs[glyph] = map[i] = index;
+          glyphs[glyph] = map[i] = {
+            unicode: index,
+            width: properties.widths[i - firstChar] || properties.defaultWidth
+          };

          // If there is no file, the character mapping can't be modified
          // but this is unlikely that there is any standard encoding with
@ -4278,7 +4287,7 @@ var PartialEvaluator = (function() {
            continue;

          if (index <= 0x1f || (index >= 127 && index <= 255))
-            glyphs[glyph] = map[i] += kCmapGlyphOffset;
+            map[i].unicode += kCmapGlyphOffset;
        }
      }

@ -4316,7 +4325,10 @@ var PartialEvaluator = (function() {
                    var endRange = tokens[j + 1];
                    var code = tokens[j + 2];
                    while (startRange < endRange) {
-                      map[startRange] = code++;
+                      map[startRange] = {
+                        unicode: code++,
+                        width: 0
+                      }
                      ++startRange;
                    }
                  }
@ -4327,7 +4339,10 @@ var PartialEvaluator = (function() {
                  for (var j = 0; j < tokens.length; j += 2) {
                    var index = tokens[j];
                    var code = tokens[j + 1];
-                    map[index] = code;
+                    map[index] = {
+                      unicode: code,
+                      width: 0
+                    };
                  }
                  break;

@ -4478,19 +4493,18 @@ var PartialEvaluator = (function() {
        descent: descriptor.get('Descent'),
        xHeight: descriptor.get('XHeight'),
        capHeight: descriptor.get('CapHeight'),
+        defaultWidth: descriptor.get('MissingWidth') || 0,
        flags: descriptor.get('Flags'),
        italicAngle: descriptor.get('ItalicAngle'),
        differences: [],
-        widths: [],
+        widths: (function() {
+          var glyphWidths = {};
+          for (var i = 0; i <= widths.length; i++)
+            glyphWidths[firstChar++] = widths[i];
+          return glyphWidths;
+        })(),
        encoding: {}
      };
-
-      // XXX Encoding and Glyphs should point to the same object so it will
-      // be hard to be out of sync. The object could contains the unicode and
-      // the width of the glyph.
-      for (var i = 0; i <= widths.length; i++)
-        properties.widths[firstChar++] = widths[i];
-
      properties.glyphs = this.extractEncoding(dict, xref, properties);

      return {