Second pass CIDFont support - make Arial Unicode in OpenOffice PDF

- supports PDF fonts with CIDtoGIDMap and no cmap
2011-07-11 17:41:47 +01:00 · 2011-07-11 17:41:47 +01:00 · 7b8542c6a7
commit 7b8542c6a7
parent ea2d651709
2 changed files with 125 additions and 43 deletions
--- a/fonts.js
+++ b/fonts.js
@ -404,12 +404,21 @@ var Font = (function() {
        data = this.checkAndRepair(name, file, properties);
        break;
      case 'Type0':
        //this is a Truetype font
        this.mimetype = 'font/opentype';
        // Repair the TrueType file if it is can be damaged in the point of
        // view of the sanitizer
        data = this.checkAndRepair(name, file, properties);
        break;
      default:
        warn('Font ' + properties.type + ' is not supported');
        break;
    }
    this.data = data;
-
+    this.type = properties.type; //use the type to test if the string is single or multi-byte
    this.id = Fonts.registerFont(name, data, properties);
    this.loadedName = 'pdfFont' + this.id;
  };
@ -856,8 +865,26 @@ var Font = (function() {
          data: stringToArray(createOS2Table(properties))
        });
-        // Replace the old CMAP table with a shiny new one
+        if (!cmap) {
-        replaceCMapTable(cmap, font, properties);
+          var glyphs = [];
          var charset = properties.charset;
          for (var i=1; i < charset.length; i++) {
            if (charset.indexOf(i) != -1) {
              glyphs.push({
                unicode: charset.indexOf(i)
              });
            } else {
              break;
            }
          }
          tables.push({
            tag: 'cmap',
            data: createCMapTable(glyphs)
          })
        } else {
          // Replace the old CMAP table with a shiny new one
          replaceCMapTable(cmap, font, properties);          
        }
        // Rewrite the 'post' table if needed
        if (!post) {
@ -1110,44 +1137,63 @@ var Font = (function() {
    charsToUnicode: function fonts_chars2Unicode(chars) {
      var charsCache = this.charsCache;
      var str;
      // if we translated this string before, just grab it from the cache
      if (charsCache) {
-        var str = charsCache[chars];
+        str = charsCache[chars];
        if (str)
          return str;
      }
      // translate the string using the font's encoding
      var encoding = this.encoding;
      if (!encoding)
        return chars;
      // lazily create the translation cache
      if (!charsCache)
        charsCache = this.charsCache = Object.create(null);
-      str = '';
+      if (this.type == "Type0") {
-      for (var i = 0; i < chars.length; ++i) {
+        //string needs to be converted from byte to multi-byte assume for now two-byte
-        var charcode = chars.charCodeAt(i);
+        str = '';
-        var unicode = encoding[charcode];
+        var multiByteStr = "";
-        if ('undefined' == typeof(unicode)) {
+        var length = chars.length;
-          // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
+        for (var i = 0; i < length; i++) {
-          // at the moment, for unknown reasons.
+          var byte1 = chars.charCodeAt(i++) & 0xFF;
-          warn('Unencoded charcode '+ charcode);
+          var byte2;
-          unicode = charcode;
+          if (i == length)
            byte2 = 0;
          else
            byte2 = chars.charCodeAt(i) & 0xFF;
          multiByteStr += String.fromCharCode((byte1<<8) | byte2);
        }
        str = multiByteStr;
      }
      else {
        // translate the string using the font's encoding
        var encoding = this.encoding;
        if (!encoding)
          return chars;
-        // Check if the glyph has already been converted
+        str = '';
-        if (!IsNum(unicode))
+        for (var i = 0; i < chars.length; ++i) {
-          unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
+          var charcode = chars.charCodeAt(i);
          var unicode = encoding[charcode];
          if ('undefined' == typeof(unicode)) {
            // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
            // at the moment, for unknown reasons.
            warn('Unencoded charcode '+ charcode);
            unicode = charcode;
          }
-        // Handle surrogate pairs
+          // Check if the glyph has already been converted
-        if (unicode > 0xFFFF) {
+          if (!IsNum(unicode))
-          str += String.fromCharCode(unicode & 0xFFFF);
+            unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
-          unicode >>= 16;
+  
          // Handle surrogate pairs
          if (unicode > 0xFFFF) {
            str += String.fromCharCode(unicode & 0xFFFF);
            unicode >>= 16;
          }
          str += String.fromCharCode(unicode);
        }
        str += String.fromCharCode(unicode);
      }
      // Enter the translated string into the cache
--- a/pdf.js
+++ b/pdf.js
@ -64,6 +64,14 @@ function stringToBytes(str) {
  return bytes;
 }
 function singleByteToMultiByteString (str) {
  var multiByteStr = "";
  var bytes = stringToBytes(e);
  for (var j = 0; j<bytes.length; j++) {
    multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]);
  }
  return multiByteStr;
 }
 var Stream = (function() {
  function constructor(arrayBuffer, start, length, dict) {
    this.bytes = new Uint8Array(arrayBuffer);
@ -3624,19 +3632,26 @@ var PartialEvaluator = (function() {
    },
    translateFont: function(fontDict, xref, resources) {
-      var fd = fontDict.get('FontDescriptor');
+      var fd;
-      if (!fd)
+      var descendant = [];
      var subType = fontDict.get('Subtype');
      assertWellFormed(IsName(subType), 'invalid font Subtype');
      //If font is a composite get the FontDescriptor from the descendant font
      if (subType.name == "Type0")
      {
        //If font is a composite get the FontDescriptor from the descendant
        var df = fontDict.get("DescendantFonts");
        if (!df)
          return null;
-        var descendant = xref.fetch(df[0]);
+        descendant = xref.fetch(df[0]);
        fd = descendant.get("FontDescriptor");
-        if (!fd)
+      } else {
-          return null;
+        fd = fontDict.get('FontDescriptor');
        fontDict.set("FontDescriptor", fd);
      }
      if (!fd)
        return null;
      var descriptor = xref.fetch(fd);
      var fontName = descriptor.get('FontName');
@ -3650,7 +3665,32 @@ var PartialEvaluator = (function() {
      var encodingMap = {};
      var charset = [];
-      if (fontDict.has('Encoding')) {
+      if (subType.name == 'Type0') {
        //XXX CIDFont support - only identity CID Encoding for now
        var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
        if (IsName(encoding)) {
          //Encoding is a predefined CMap
          if (encoding.name == 'Identity-H') {
            if (descendant.get('Subtype').name == 'CIDFontType2')
            {
              //Extract an encoding from the CIDToGIDMap
              var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap'));
              var glyphsData = glyphsStream.getBytes(0);
              var i = 0;
              for (var j=0; j<glyphsData.length; j++) {
                var glyphID = (glyphsData[j++]*0x100)+glyphsData[j];
                //encodingMap[glyphID] = i++;
                charset.push(glyphID);
              }
              encoding[0] = 0;
            }
          } else {
            TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
          }
        } else {
          TODO ('Need to support encoding streams see PDF 32000-1:2008  9.7.5.3'); 
        }
      } else if (fontDict.has('Encoding')) {
        var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
        if (IsDict(encoding)) {
          // Build a map of between codes and glyphs
@ -3682,7 +3722,6 @@ var PartialEvaluator = (function() {
          }
        } else if (IsName(encoding)) {
          var encoding = Encodings[encoding.name];
          //XXX CIDFont support - get the CID Encoding especially support japan1 and identity
          if (!encoding)
            error('Unknown font encoding');
@ -3767,9 +3806,6 @@ var PartialEvaluator = (function() {
        }
      }
      var subType = fontDict.get('Subtype');
      assertWellFormed(IsName(subType), 'invalid font Subtype');
      var properties = {
        type: subType.name,
        encoding: encodingMap,