From e086cf36f4d9d7f021b218de77edefde60ab5214 Mon Sep 17 00:00:00 2001
From: Yury Delendik <ydelendik@mozilla.com>
Date: Thu, 13 Sep 2012 08:09:46 -0700
Subject: [PATCH] Refactor text extraction / font loading logic

---
 src/canvas.js    |  14 ++---
 src/evaluator.js | 131 ++++++++++++++++++++++-------------------------
 src/fonts.js     |  72 +++++++++-----------------
 web/viewer.js    |  25 ++++-----
 4 files changed, 98 insertions(+), 144 deletions(-)

diff --git a/src/canvas.js b/src/canvas.js
index e5a204cb7..cca2c9212 100644
--- a/src/canvas.js
+++ b/src/canvas.js
@@ -782,15 +782,9 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
           x += charWidth;
 
           var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode;
-          var glyphUnicodeLength = glyphUnicode.length;
-          //reverse an arabic ligature
-          if (glyphUnicodeLength > 1 &&
-              isRTLRangeFor(glyphUnicode.charCodeAt(0))) {
-            for (var ii = glyphUnicodeLength - 1; ii >= 0; ii--)
-              text.str += glyphUnicode[ii];
-          } else
-            text.str += glyphUnicode;
-          text.length += glyphUnicodeLength;
+          if (glyphUnicode in NormalizedUnicodes)
+            glyphUnicode = NormalizedUnicodes[glyphUnicode];
+          text.str += reverseIfRtl(glyphUnicode);
           text.canvasWidth += charWidth;
         }
         current.x += x * textHScale2;
@@ -842,7 +836,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
               var numFakeSpaces = Math.round(-e / text.geom.spaceWidth);
               if (numFakeSpaces > 0) {
                 text.str += '\u00A0';
-                text.length++;
               }
             }
           }
@@ -856,7 +849,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
               text.str += shownText.str;
             }
             text.canvasWidth += shownText.canvasWidth;
-            text.length += shownText.length;
           }
         } else {
           error('TJ array element ' + e + ' is not string or num');
diff --git a/src/evaluator.js b/src/evaluator.js
index 704091149..59fec61a6 100644
--- a/src/evaluator.js
+++ b/src/evaluator.js
@@ -26,6 +26,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
     this.handler = handler;
     this.uniquePrefix = uniquePrefix;
     this.objIdCounter = 0;
+    this.fontIdCounter = 0;
   }
 
   var OP_MAP = {
@@ -138,6 +139,35 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
   };
 
   PartialEvaluator.prototype = {
+    loadFont: function PartialEvaluator_loadFont(fontName, font, xref,
+                                                 resources, dependency) {
+      var fontRes = resources.get('Font');
+
+      assert(fontRes, 'fontRes not available');
+
+      font = xref.fetchIfRef(font) || fontRes.get(fontName);
+      assertWellFormed(isDict(font));
+
+      ++this.fontIdCounter;
+      var loadedName = font.loadedName;
+      if (!loadedName) {
+        // keep track of each font we translated so the caller can
+        // load them asynchronously before calling display on a page
+        loadedName = 'font_' + this.uniquePrefix + this.fontIdCounter;
+        font.loadedName = loadedName;
+
+        var translated;
+        try {
+          translated = this.translateFont(font, xref, resources,
+                                          dependency);
+        } catch (e) {
+          translated = { error: e };
+        }
+        font.translated = translated;
+      }
+      return font;
+    },
+
     getOperatorList: function PartialEvaluator_getOperatorList(stream,
                                                                resources,
                                                                dependency,
@@ -160,45 +190,35 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       }
 
       function handleSetFont(fontName, font) {
-        var loadedName = null;
+        font = self.loadFont(fontName, font, xref, resources, dependency);
 
-        var fontRes = resources.get('Font');
+        var loadedName = font.loadedName;
+        if (!font.sent) {
+          var data = font.translated;
+          if (data.loadCharProcs) {
+            delete data.loadCharProcs;
 
-        assert(fontRes, 'fontRes not available');
-
-        font = xref.fetchIfRef(font) || fontRes.get(fontName);
-        assertWellFormed(isDict(font));
-
-        ++self.objIdCounter;
-        if (!font.loadedName) {
-          var translated = self.translateFont(font, xref, resources,
-                                              dependency);
-          if (translated) {
-            // keep track of each font we translated so the caller can
-            // load them asynchronously before calling display on a page
-            loadedName = 'font_' + uniquePrefix + self.objIdCounter;
-            translated.properties.loadedName = loadedName;
-            font.loadedName = loadedName;
-            font.translated = translated;
-
-            var data;
-            try {
-              var fontObj = new Font(translated.name,
-                                     translated.file,
-                                     translated.properties);
-              data = fontObj.export();
-            } catch (e) {
-              data = { error: e };
+            var charProcs = font.get('CharProcs').getAll();
+            var fontResources = font.get('Resources') || resources;
+            var charProcOperatorList = {};
+            for (var key in charProcs) {
+              var glyphStream = charProcs[key];
+              charProcOperatorList[key] =
+                self.getOperatorList(glyphStream, fontResources, dependency);
             }
-
-            handler.send('obj', [
-                loadedName,
-                'Font',
-                data
-            ]);
+            data.charProcOperatorList = charProcOperatorList;
           }
+
+          if (data instanceof Font)
+            data = data.export();
+
+          handler.send('obj', [
+              loadedName,
+              'Font',
+              data
+          ]);
+          font.sent = true;
         }
-        loadedName = loadedName || font.loadedName;
 
         // Ensure the font is ready before the font is set
         // and later on used for drawing.
@@ -491,20 +511,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       var xref = this.xref;
 
       function handleSetFont(fontName, fontRef) {
-        var fontRes = resources.get('Font');
-
-        // TODO: TOASK: Is it possible to get here? If so, what does
-        // args[0].name should be like???
-        assert(fontRes, 'fontRes not available');
-
-        fontRes = xref.fetchIfRef(fontRes);
-        fontRef = fontRef || fontRes.get(fontName);
-        var font = xref.fetchIfRef(fontRef), tra;
-        assertWellFormed(isDict(font));
-        if (!font.translated) {
-          font.translated = self.translateFont(font, xref, resources);
-        }
-        return font;
+        return self.loadFont(fontName, fontRef, xref, resources, null);
       }
 
       resources = xref.fetchIfRef(resources) || new Dict();
@@ -546,7 +553,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
               break;
           } // switch
           if (chunk !== '') {
-            text += fontCharsToUnicode(chunk, font.translated.properties);
+            text += fontCharsToUnicode(chunk, font.translated);
             chunk = '';
           }
 
@@ -853,7 +860,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         //  - get the FontDescriptor from the descendant font
         var df = dict.get('DescendantFonts');
         if (!df)
-          return null;
+          error('Descendant fonts are not specified');
 
         dict = isArray(df) ? xref.fetchIfRef(df[0]) : df;
 
@@ -876,7 +883,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
           // This case is here for compatibility.
           var baseFontName = dict.get('BaseFont');
           if (!isName(baseFontName))
-            return null;
+            error('Base font is not specified');
 
           // Using base font name as a font name.
           baseFontName = baseFontName.name.replace(/[,_]/g, '-');
@@ -899,11 +906,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
           };
           this.extractDataStructures(dict, dict, xref, properties);
 
-          return {
-            name: baseFontName,
-            dict: baseDict,
-            properties: properties
-          };
+          return new Font(baseFontName, null, properties);
         }
       }
 
@@ -939,6 +942,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
         file: fontFile,
         length1: length1,
         length2: length2,
+        loadedName: baseDict.loadedName,
         composite: composite,
         wideChars: composite,
         fixedPitch: false,
@@ -959,22 +963,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
       if (type.name === 'Type3') {
         properties.coded = true;
-        var charProcs = dict.get('CharProcs').getAll();
-        var fontResources = dict.get('Resources') || resources;
-        properties.charProcOperatorList = {};
-        for (var key in charProcs) {
-          var glyphStream = charProcs[key];
-          properties.charProcOperatorList[key] =
-            this.getOperatorList(glyphStream, fontResources, dependency);
-        }
       }
 
-      return {
-        name: fontName.name,
-        dict: baseDict,
-        file: fontFile,
-        properties: properties
-      };
+      return new Font(fontName.name, fontFile, properties);
     }
   };
 
diff --git a/src/fonts.js b/src/fonts.js
index 820ada8a0..7707bffb6 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -1464,54 +1464,30 @@ var NormalizedUnicodes = {
   '\uFE4F': '\u005F'
 };
 
-function fontCharsToUnicode(charCodes, fontProperties) {
-  var toUnicode = fontProperties.toUnicode;
-  var composite = fontProperties.composite;
-  var encoding, differences, cidToUnicode;
-  var result = '';
-  if (composite) {
-    cidToUnicode = fontProperties.cidToUnicode;
-    for (var i = 0, ii = charCodes.length; i < ii; i += 2) {
-      var charCode = (charCodes.charCodeAt(i) << 8) |
-        charCodes.charCodeAt(i + 1);
-      if (toUnicode && charCode in toUnicode) {
-        var unicode = toUnicode[charCode];
-        result += typeof unicode !== 'number' ? unicode :
-          String.fromCharCode(unicode);
-        continue;
-      }
-      result += String.fromCharCode(!cidToUnicode ? charCode :
-        cidToUnicode[charCode] || charCode);
-    }
-  } else {
-    differences = fontProperties.differences;
-    encoding = fontProperties.baseEncoding;
-    for (var i = 0, ii = charCodes.length; i < ii; i++) {
-      var charCode = charCodes.charCodeAt(i);
-      var unicode;
-      if (toUnicode && charCode in toUnicode) {
-        var unicode = toUnicode[charCode];
-        result += typeof unicode !== 'number' ? unicode :
-          String.fromCharCode(unicode);
-        continue;
-      }
+function reverseIfRtl(chars) {
+  var charsLength = chars.length;
+  //reverse an arabic ligature
+  if (charsLength <= 1 || !isRTLRangeFor(chars.charCodeAt(0)))
+    return chars;
 
-      var glyphName = charCode in differences ? differences[charCode] :
-        encoding[charCode];
-      if (glyphName in GlyphsUnicode) {
-        result += String.fromCharCode(GlyphsUnicode[glyphName]);
-        continue;
-      }
-      result += String.fromCharCode(charCode);
-    }
-  }
-  // normalizing the unicode characters
-  for (var i = 0, ii = result.length; i < ii; i++) {
-    if (!(result[i] in NormalizedUnicodes))
+  var s = '';
+  for (var ii = charsLength - 1; ii >= 0; ii--)
+    s += chars[ii];
+  return s;
+}
+
+function fontCharsToUnicode(charCodes, font) {
+  var glyphs = font.charsToGlyphs(charCodes);
+  var result = '';
+  for (var i = 0, ii = glyphs.length; i < ii; i++) {
+    var glyph = glyphs[i];
+    if (!glyph)
       continue;
-    result = result.substring(0, i) + NormalizedUnicodes[result[i]] +
-      result.substring(i + 1);
-    ii = result.length;
+
+    var glyphUnicode = glyph.unicode;
+    if (glyphUnicode in NormalizedUnicodes)
+      glyphUnicode = NormalizedUnicodes[glyphUnicode];
+    result += reverseIfRtl(glyphUnicode);
   }
   return result;
 }
@@ -1536,8 +1512,9 @@ var Font = (function FontClosure() {
     }
 
     this.name = name;
+    this.loadedName = properties.loadedName;
     this.coded = properties.coded;
-    this.charProcOperatorList = properties.charProcOperatorList;
+    this.loadCharProcs = properties.coded;
     this.sizes = [];
 
     var names = name.split('+');
@@ -1641,7 +1618,6 @@ var Font = (function FontClosure() {
     this.widthMultiplier = !properties.fontMatrix ? 1.0 :
       1.0 / properties.fontMatrix[0];
     this.encoding = properties.baseEncoding;
-    this.loadedName = properties.loadedName;
     this.loading = true;
   };
 
diff --git a/web/viewer.js b/web/viewer.js
index 2fe30debe..bf2c44012 100644
--- a/web/viewer.js
+++ b/web/viewer.js
@@ -1844,22 +1844,18 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
         return;
       }
       var textDiv = textDivs.shift();
-      if (textDiv.dataset.textLength > 0) {
-        textLayerDiv.appendChild(textDiv);
+      textLayerDiv.appendChild(textDiv);
 
-        if (textDiv.dataset.textLength > 1) { // avoid div by zero
-          // Adjust div width to match canvas text
+      ctx.font = textDiv.style.fontSize + ' sans-serif';
+      var width = ctx.measureText(textDiv.textContent).width;
 
-          ctx.font = textDiv.style.fontSize + ' sans-serif';
-          var width = ctx.measureText(textDiv.textContent).width;
+      if (width > 0) {
+        var textScale = textDiv.dataset.canvasWidth / width;
 
-          var textScale = textDiv.dataset.canvasWidth / width;
-
-          CustomStyle.setProp('transform' , textDiv,
-            'scale(' + textScale + ', 1)');
-          CustomStyle.setProp('transformOrigin' , textDiv, '0% 0%');
-        }
-      } // textLength > 0
+        CustomStyle.setProp('transform' , textDiv,
+          'scale(' + textScale + ', 1)');
+        CustomStyle.setProp('transformOrigin' , textDiv, '0% 0%');
+      }
     }
     renderTimer = setInterval(renderTextLayer, renderInterval);
 
@@ -1899,7 +1895,6 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
     textDiv.style.top = (text.geom.y - fontHeight) + 'px';
     textDiv.textContent = PDFJS.bidi(text, -1);
     textDiv.dir = text.direction;
-    textDiv.dataset.textLength = text.length;
     this.textDivs.push(textDiv);
   };
 };
@@ -2068,7 +2063,7 @@ document.addEventListener('DOMContentLoaded', function webViewerLoad(evt) {
     });
 
   document.getElementById('searchTermsInput').addEventListener('keydown',
-    function() {
+    function(event) {
       if (event.keyCode == 13) {
         PDFView.search();
       }