From d58dac0fd384a803d41b92928706446bdbaae53c Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Mon, 22 Aug 2011 22:50:17 -0500
Subject: [PATCH 1/2] Encoding for standard fonts; symbols encoding

---
 fonts.js |   3 --
 pdf.js   | 109 ++++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 79 insertions(+), 33 deletions(-)

diff --git a/fonts.js b/fonts.js
index 3d47ef4a2..9049255e7 100755
--- a/fonts.js
+++ b/fonts.js
@@ -404,9 +404,6 @@ var Font = (function Font() {
                     (fontName.indexOf('Italic') != -1);
       this.loadedName = fontName.split('-')[0];
       this.loading = false;
-      this.charsToUnicode = function(s) {
-        return s;
-      };
       return;
     }
 
diff --git a/pdf.js b/pdf.js
index 469eec578..53d3b9853 100644
--- a/pdf.js
+++ b/pdf.js
@@ -3851,6 +3851,44 @@ var Encodings = {
       'ucircumflex', 'udieresis', 'yacute', 'thorn', 'ydieresis'
     ]);
   },
+  get symbolsEncoding() {
+    return shadow(this, 'symbolsEncoding',
+      [,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+      'space', 'exclam', 'universal', 'numbersign', 'existential', 'percent',
+      'ampersand', 'suchthat', 'parenleft', 'parenright', 'asteriskmath',
+      'plus', 'comma', 'minus', 'period', 'slash', 'zero', 'one', 'two',
+      'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon',
+      'semicolon', 'less', 'equal', 'greater', 'question', 'congruent',
+      'Alpha', 'Beta', 'Chi', 'Delta', 'Epsilon', 'Phi', 'Gamma', 'Eta',
+      'Iota', 'theta1', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Omicron', 'Pi',
+      'Theta', 'Rho', 'Sigma', 'Tau', 'Upsilon', 'sigma1', 'Omega', 'Xi',
+      'Psi', 'Zeta', 'bracketleft', 'therefore', 'bracketright',
+      'perpendicular', 'underscore', 'radicalex', 'alpha', 'beta', 'chi',
+      'delta', 'epsilon', 'phi', 'gamma', 'eta', 'iota', 'phi1', 'kappa',
+      'lambda', 'mu', 'nu', 'omicron', 'pi', 'theta', 'rho', 'sigma', 'tau',
+      'upsilon', 'omega1', 'omega', 'xi', 'psi', 'zeta', 'braceleft', 'bar',
+      'braceright', 'similar',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 'Euro',
+      'Upsilon1', 'minute', 'lessequal', 'fraction', 'infinity', 'florin',
+      'club', 'diamond', 'heart', 'spade', 'arrowboth', 'arrowleft', 'arrowup',
+      'arrowright', 'arrowdown', 'degree', 'plusminus', 'second',
+      'greaterequal', 'multiply', 'proportional', 'partialdiff', 'bullet',
+      'divide', 'notequal', 'equivalence', 'approxequal', 'ellipsis',
+      'arrowvertex', 'arrowhorizex', 'carriagereturn', 'aleph', 'Ifraktur',
+      'Rfraktur', 'weierstrass', 'circlemultiply', 'circleplus', 'emptyset',
+      'intersection', 'union', 'propersuperset', 'reflexsuperset', 'notsubset',
+      'propersubset', 'reflexsubset', 'element', 'notelement', 'angle',
+      'gradient', 'registerserif', 'copyrightserif', 'trademarkserif',
+      'product', 'radical', 'dotmath', 'logicalnot', 'logicaland', 'logicalor',
+      'arrowdblboth', 'arrowdblleft', 'arrowdblup', 'arrowdblright',
+      'arrowdbldown', 'lozenge', 'angleleft', 'registersans', 'copyrightsans',
+      'trademarksans', 'summation', 'parenlefttp', 'parenleftex',
+      'parenleftbt', 'bracketlefttp', 'bracketleftex', 'bracketleftbt',
+      'bracelefttp', 'braceleftmid', 'braceleftbt', 'braceex', ,'angleright',
+      'integral', 'integraltp', 'integralex', 'integralbt', 'parenrighttp',
+      'parenrightex', 'parenrightbt', 'bracketrighttp', 'bracketrightex',
+      'bracketrightbt', 'bracerighttp', 'bracerightmid', 'bracerightbt'
+    ]);
+  },
   get zapfDingbatsEncoding() {
     return shadow(this, 'zapfDingbatsEncoding',
       [,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
@@ -4118,24 +4156,6 @@ var PartialEvaluator = (function() {
         fd = fontDict.get('FontDescriptor');
       }
 
-      if (!fd) {
-        var baseFontName = fontDict.get('BaseFont');
-        if (!IsName(baseFontName))
-          return null;
-        // Using base font name as a font name.
-        return {
-          name: baseFontName.name.replace(/[\+,\-]/g, '_'),
-          fontDict: fontDict,
-          properties: {}
-        };
-      }
-
-      var descriptor = xref.fetch(fd);
-
-      var fontName = xref.fetchIfRef(descriptor.get('FontName'));
-      assertWellFormed(IsName(fontName), 'invalid font name');
-      fontName = fontName.name.replace(/[\+,\-]/g, '_');
-
       var encodingMap = {};
       var charset = [];
       if (compositeFont) {
@@ -4200,14 +4220,6 @@ var PartialEvaluator = (function() {
                                      GlyphsUnicode[data.name];
             }
           }
-
-          // Get the font charset if any
-          var charset = descriptor.get('CharSet');
-          if (charset) {
-            assertWellFormed(IsString(charset), 'invalid charset');
-            charset = charset.split('/');
-            charset.shift();
-          }
         } else if (IsName(encoding)) {
           var encoding = Encodings[encoding.name];
           if (!encoding)
@@ -4217,11 +4229,10 @@ var PartialEvaluator = (function() {
           for (var j = 0; j < encoding.length; j++)
             encodingMap[index++] = GlyphsUnicode[encoding[j]];
 
+          // firstChar and width are required
+          // (except for 14 standard fonts)
           var firstChar = xref.fetchIfRef(fontDict.get('FirstChar'));
-          var widths = xref.fetchIfRef(fontDict.get('Widths'));
-          assertWellFormed(IsArray(widths) && IsInt(firstChar),
-                           'invalid font Widths or FirstChar');
-
+          var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
           for (var j = 0; j < widths.length; j++) {
             if (widths[j])
               charset.push(encoding[j + firstChar]);
@@ -4296,6 +4307,36 @@ var PartialEvaluator = (function() {
         }
       }
 
+      if (!fd) {
+        var baseFontName = fontDict.get('BaseFont');
+        if (!IsName(baseFontName))
+          return null;
+        // Using base font name as a font name.
+        baseFontName = baseFontName.name.replace(/[\+,\-]/g, '_');
+        if (baseFontName == 'Symbol') {
+          // special case for symbols
+          var encoding = Encodings.symbolsEncoding;
+          for (var i = 0, n = encoding.length, j; i < n; i++) {
+            if (!(j = encoding[i]))
+              continue;
+            encodingMap[i] = GlyphsUnicode[j] || 0;
+          }
+        }
+        return {
+          name: baseFontName,
+          fontDict: fontDict,
+          properties: {
+            encoding: encodingMap
+          }
+        };
+      }
+
+      var descriptor = xref.fetch(fd);
+
+      var fontName = xref.fetchIfRef(descriptor.get('FontName'));
+      assertWellFormed(IsName(fontName), 'invalid font name');
+      fontName = fontName.name.replace(/[\+,\-]/g, '_');
+
       var fontFile = descriptor.get('FontFile', 'FontFile2', 'FontFile3');
       if (fontFile) {
         fontFile = xref.fetchIfRef(fontFile);
@@ -4307,6 +4348,14 @@ var PartialEvaluator = (function() {
         }
       }
 
+      if (descriptor.has('CharSet')) {
+        // Get the font charset if any (meaningful only in Type 1)
+        charset = descriptor.get('CharSet');
+        assertWellFormed(IsString(charset), 'invalid charset');
+        charset = charset.split('/');
+        charset.shift();
+      }
+
       var widths = fontDict.get('Widths');
       if (widths) {
         var glyphWidths = {};

From b90c6945b81db9de81f0a55f25b0bd75d01f770b Mon Sep 17 00:00:00 2001
From: notmasteryet <async.processingjs@yahoo.com>
Date: Tue, 23 Aug 2011 17:06:45 -0500
Subject: [PATCH 2/2] Sanitizing base font name before comparison with 'Symbol'

---
 pdf.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pdf.js b/pdf.js
index 53d3b9853..47363401b 100644
--- a/pdf.js
+++ b/pdf.js
@@ -4313,7 +4313,7 @@ var PartialEvaluator = (function() {
           return null;
         // Using base font name as a font name.
         baseFontName = baseFontName.name.replace(/[\+,\-]/g, '_');
-        if (baseFontName == 'Symbol') {
+        if (/^Symbol(_?(Bold|Italic))*$/.test(baseFontName)) {
           // special case for symbols
           var encoding = Encodings.symbolsEncoding;
           for (var i = 0, n = encoding.length, j; i < n; i++) {