Change the way Type 2 CID fonts are encoded. Move the cmap glyphs above the first 255 unicode values so that fillText does not change them and create an encoding to map characters to the glyphs.

2011-08-19 14:04:34 +01:00 · 2011-08-19 14:04:34 +01:00 · 5a528944f0
commit 5a528944f0
parent 170871bbfa
4 changed files with 616 additions and 39 deletions
--- a/fonts.js
+++ b/fonts.js
@ -1032,26 +1032,27 @@ var Font = (function Font() {
      if (properties.type == 'CIDFontType2') {
        // Type2 composite fonts map characters directly to glyphs so the cmap
        // table must be replaced.
+        // canvas fillText will reencode some characters even if the font has a
+        // glyph at that position - e.g. newline is converted to a space and U+00AD
+        // (soft hypen) is not drawn.
+        // So, offset all the glyphs by 0xFF to avoid these cases and use
+        // the encoding to map incoming characters to the new glyph positions

        var glyphs = [];
-        var charset = properties.charset;
-        if (!charset.length) {
-          // Type2 composite fonts map characters directly to glyphs so the cmap
-          for (var i = 1; i < numGlyphs; i++) {
-            glyphs.push({
-              unicode: i
-            });
-          }
-        } else {
-          for (var i = 1; i < charset.length; i++) {
-            var index = charset.indexOf(i);
-            if (index == -1)
-              break;
+        var encoding = properties.encoding;

-            glyphs.push({
-              unicode: index
-            });
-          }
+        for (var i = 1; i < numGlyphs; i++) {
+          glyphs.push({ unicode: i + 0xFF });
+        }
+
+        if ('undefined' == typeof(encoding[0])) {
+          // the font is directly characters to glyphs with no encoding
+          // so create an identity encoding
+          for (i = 0; i < numGlyphs; i++)
+            encoding[i] = i + 0xFF;
+        } else {
+          for (var i in encoding)
+            encoding[i] = encoding[i] + 0xFF;
        }

        if (!cmap) {
@ -1274,31 +1275,26 @@ var Font = (function Font() {
      if (!charsCache)
        charsCache = this.charsCache = Object.create(null);

+      // translate the string using the font's encoding
+      var encoding = this.encoding;
+      if (!encoding)
+        return chars;
+      str = '';
+
      if (this.compositeFont) {
        // composite fonts have multi-byte strings convert the string from
-        // single-byte to multi-byte XXX assuming CIDFonts are two-byte - later
-        // need to extract the correct byte encoding according to the PDF spec
-        str = '';
-        var multiByteStr = '';
-        var length = chars.length;
+        // single-byte to multi-byte
+        // XXX assuming CIDFonts are two-byte - later need to extract the
+        // correct byte encoding according to the PDF spec
+        var length = chars.length - 1; // looping over two bytes at a time so
+                                       // loop should never end on the last byte
        for (var i = 0; i < length; i++) {
-          var byte1 = chars.charCodeAt(i++) & 0xFF;
-          var byte2;
-          if (i == length)
-            byte2 = 0;
-          else
-            byte2 = chars.charCodeAt(i) & 0xFF;
-          multiByteStr += String.fromCharCode((byte1 << 8) | byte2);
+          var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
+          var unicode = encoding[charcode];
+          str += String.fromCharCode(unicode);
        }
-        str = multiByteStr;
      }
      else {
-        // translate the string using the font's encoding
-        var encoding = this.encoding;
-        if (!encoding)
-          return chars;
-
-        str = '';
        for (var i = 0; i < chars.length; ++i) {
          var charcode = chars.charCodeAt(i);
          var unicode = encoding[charcode];
--- a/pdf.js
+++ b/pdf.js
@ -4028,14 +4028,15 @@ var PartialEvaluator = (function() {
        if (subType.name == 'CIDFontType2') {
          var cidToGidMap = descendant.get('CIDToGIDMap');
          if (cidToGidMap && IsRef(cidToGidMap)) {
-            // Extract the charset from the CIDToGIDMap
+            // Extract the encoding from the CIDToGIDMap
            var glyphsStream = xref.fetchIfRef(cidToGidMap);
            var glyphsData = glyphsStream.getBytes(0);
-            var i = 0;
            // Glyph ids are big-endian 2-byte values
+            encodingMap[0] = 0; //set this to 0 to verify the font has an encoding
            for (var j = 0; j < glyphsData.length; j++) {
              var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
-              charset.push(glyphID);
+              if (glyphID != 0)
+                encodingMap[j>>1] = glyphID;
            }
          }
        }
--- a/test/pdfs/complex_ttf_font.pdf
+++ b/test/pdfs/complex_ttf_font.pdf
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@ -64,6 +64,11 @@
       "rounds": 1,
       "type": "load"
    },
+    {  "id": "complexttffont-pdf",
+       "file": "pdfs/complex_ttf_font.pdf",
+       "rounds": 1,
+       "type": "load"
+    },
    {  "id": "i9-pdf",
       "file": "pdfs/i9.pdf",
       "link": true,