Guessing pdf char size based on the CMap numbers

2012-04-23 17:44:51 -05:00 · 2012-04-23 17:44:51 -05:00 · 78213e826e
commit 78213e826e
parent 8b7cd47798
2 changed files with 10 additions and 3 deletions
--- a/src/evaluator.js
+++ b/src/evaluator.js
@ -481,7 +481,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
      var toUnicode = dict.get('ToUnicode') ||
        baseDict.get('ToUnicode');
      if (toUnicode)
-        properties.toUnicode = this.readToUnicode(toUnicode, xref);
+        properties.toUnicode = this.readToUnicode(toUnicode, xref, properties);

      if (properties.composite) {
        // CIDSystemInfo helps to match CID to glyphs
@ -537,7 +537,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
      properties.hasEncoding = hasEncoding;
    },

-    readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref) {
+    readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref,
+                                                           properties) {
      var cmapObj = toUnicode;
      var charToUnicode = [];
      if (isName(cmapObj)) {
@ -626,6 +627,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
            }
          } else if (octet == 0x3E) {
            if (token.length) {
+              // XXX guessing chars size by checking number size in the CMap
+              if (token.length <= 2 && properties.composite)
+                properties.wideChars = false;
+
              if (token.length <= 4) {
                // parsing hex number
                tokens.push(parseInt(token, 16));
@ -843,6 +848,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
        length1: length1,
        length2: length2,
        composite: composite,
+        wideChars: composite,
        fixedPitch: false,
        fontMatrix: dict.get('FontMatrix') || IDENTITY_MATRIX,
        firstChar: firstChar || 0,
--- a/src/fonts.js
+++ b/src/fonts.js
@ -789,6 +789,7 @@ var Font = (function FontClosure() {
    this.widths = properties.widths;
    this.defaultWidth = properties.defaultWidth;
    this.composite = properties.composite;
+    this.wideChars = properties.wideChars;
    this.hasEncoding = properties.hasEncoding;

    this.fontMatrix = properties.fontMatrix;
@ -2520,7 +2521,7 @@ var Font = (function FontClosure() {

      glyphs = [];

-      if (this.composite) {
+      if (this.wideChars) {
        // composite fonts have multi-byte strings convert the string from
        // single-byte to multi-byte
        // XXX assuming CIDFonts are two-byte - later need to extract the