Restructure/rewrite of the Type1 font parser.

2013-03-27 17:15:44 -07:00 · 2013-03-27 17:15:44 -07:00 · 028151d13a
commit 028151d13a
parent 921f3211a4
2 changed files with 325 additions and 245 deletions
--- a/src/fonts.js
+++ b/src/fonts.js
@ -17,7 +17,7 @@
 /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
           ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode,
           info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream,
-           stringToBytes, TextDecoder, TODO, warn */
+           stringToBytes, TextDecoder, TODO, warn, Lexer */
 'use strict';
@ -5057,8 +5057,11 @@ var Type1CharString = (function Type1CharStringClosure() {
 * Type1Parser encapsulate the needed code for parsing a Type1 font
 * program. Some of its logic depends on the Type2 charstrings
 * structure.
 * Note: this doesn't really parse the font since that would require evaluation
 * of PostScript, but it is possible in most cases to extract what we need
 * without a full parse.
 */
-var Type1Parser = function type1Parser() {
+var Type1Parser = (function Type1ParserClosure() {
  /*
   * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
   * of Plaintext Bytes. The function took a key as a parameter which can be
@ -5081,271 +5084,258 @@ var Type1Parser = function type1Parser() {
    return decryptedString.slice(discardNumber);
  }
-  /*
+  function isSpecial(c) {
-   * Returns an object containing a Subrs array and a CharStrings
+    return c === '/' ||
-   * array extracted from and eexec encrypted block of data
+           c === '[' || c === ']' ||
-   */
+           c === '{' || c === '}' ||
-  function readNumberArray(str, index) {
+           c === '(' || c === ')';
    var start = index;
    while (str[index++] != '[')
      start++;
    start++;
    var count = 0;
    while (str[index++] != ']')
      count++;
    str = str.substr(start, count);
    str = str.trim();
    // Remove adjacent spaces
    str = str.replace(/\s+/g, ' ');
    var array = str.split(' ');
    for (var i = 0, ii = array.length; i < ii; i++)
      array[i] = parseFloat(array[i] || 0);
    return array;
  }
-  function readNumber(str, index) {
+  function Type1Parser(stream, encrypted) {
-    while (str[index] == ' ')
+    if (encrypted) {
-      index++;
+      stream = new Stream(decrypt(stream.getBytes(), EEXEC_ENCRYPT_KEY, 4));
    var start = index;
    var count = 0;
    while (str[index++] != ' ')
      count++;
    return parseFloat(str.substr(start, count) || 0);
  }
  function readBoolean(str, index) {
    while (str[index] == ' ')
      index++;
    var start = index;
    var count = 0;
    var length = str.length;
    while (index < length && str[index++] != ' ') {
      count++;
    }
-
+    this.stream = stream;
    // Use 1 and 0 since that's what type2 charstrings use.
    return str.substr(start, count) === 'true' ? 1 : 0;
  }
-
+  Type1Parser.prototype = {
-  function isSeparator(c) {
+    readNumberArray: function Type1Parser_readNumberArray() {
-    return c == ' ' || c == '\n' || c == '\x0d';
+      this.getToken(); // read '[' or '{' (arrays can start with either)
-  }
+      var array = [];
-
+      while (true) {
-  this.extractFontProgram = function Type1Parser_extractFontProgram(stream) {
+        var token = this.getToken();
-    var eexec = decrypt(stream, EEXEC_ENCRYPT_KEY, 4);
+        if (token === null || token === ']' || token === '}') {
-    var eexecStr = '';
+          break;
    for (var i = 0, ii = eexec.length; i < ii; i++)
      eexecStr += String.fromCharCode(eexec[i]);
    var glyphsSection = false, subrsSection = false;
    var subrs = [], charstrings = [];
    var program = {
      subrs: [],
      charstrings: [],
      properties: {
        'privateData': {
          'lenIV': 4
        }
        array.push(parseFloat(token || 0));
      }
-    };
+      return array;
    },
-    var glyph = '';
+    readNumber: function Type1Parser_readNumber() {
-    var token = '';
+      var token = this.getToken();
-    var length = 0;
+      return parseFloat(token || 0);
    },
-    var c = '';
+    readInt: function Type1Parser_readInt() {
-    var count = eexecStr.length;
+      // Use '| 0' to prevent setting a double into length such as the double
-    for (var i = 0; i < count; i++) {
+      // does not flow into the loop variable.
-      var getToken = function getToken() {
+      var token = this.getToken();
-        while (i < count && isSeparator(eexecStr[i]))
+      return parseInt(token || 0, 10) | 0;
-          ++i;
+    },
-        var token = '';
+    readBoolean: function Type1Parser_readBoolean() {
-        while (i < count && !isSeparator(eexecStr[i]))
+      var token = this.getToken();
          token += eexecStr[i++];
-        return token;
+      // Use 1 and 0 since that's what type2 charstrings use.
-      };
+      return token === 'true' ? 1 : 0;
-      var c = eexecStr[i];
+    },
-      if ((glyphsSection || subrsSection) &&
+    getToken: function Type1Parser_getToken() {
-          (token == 'RD' || token == '-|')) {
+      // Eat whitespace and comments.
-        i++;
+      var comment = false;
-        var data = eexec.slice(i, i + length);
+      var ch;
-        var lenIV = program.properties.privateData['lenIV'];
+      var stream = this.stream;
-        var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV);
+      while (true) {
        if ((ch = stream.lookChar()) === null)
          return null;
-        if (glyphsSection) {
+        if (comment) {
-          charstrings.push({
+          if (ch === '\x0a' || ch === '\x0d') {
-            glyph: glyph,
+            comment = false;
            encoded: encoded
          });
        } else {
          subrs.push(encoded);
        }
        i += length;
        token = '';
      } else if (isSeparator(c)) {
        // Use '| 0' to prevent setting a double into length such as the double
        // does not flow into the loop variable.
        length = parseInt(token, 10) | 0;
        token = '';
      } else {
        token += c;
        if (!glyphsSection) {
          switch (token) {
            case '/CharString':
              glyphsSection = true;
              break;
            case '/Subrs':
              ++i;
              var num = parseInt(getToken(), 10);
              getToken(); // read in 'array'
              for (var j = 0; j < num; ++j) {
                var t = getToken(); // read in 'dup'
                if (t == 'ND' || t == '|-' || t == 'noaccess')
                  break;
                var index = parseInt(getToken(), 10);
                if (index > j)
                  j = index;
                var length = parseInt(getToken(), 10);
                getToken(); // read in 'RD'
                var data = eexec.slice(i + 1, i + 1 + length);
                var lenIV = program.properties.privateData['lenIV'];
                var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV);
                i = i + 1 + length;
                t = getToken(); // read in 'NP'
                if (t == 'noaccess')
                  getToken(); // read in 'put'
                subrs[index] = encoded;
              }
              break;
            case '/BlueValues':
            case '/OtherBlues':
            case '/FamilyBlues':
            case '/FamilyOtherBlues':
              var blueArray = readNumberArray(eexecStr, i + 1);
              // *Blue* values may contain invalid data: disables reading of
              // those values when hinting is disabled.
              if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
                  HINTING_ENABLED) {
                program.properties.privateData[token.substring(1)] = blueArray;
              }
              break;
            case '/StemSnapH':
            case '/StemSnapV':
              program.properties.privateData[token.substring(1)] =
                readNumberArray(eexecStr, i + 1);
              break;
            case '/StdHW':
            case '/StdVW':
              program.properties.privateData[token.substring(1)] =
                readNumberArray(eexecStr, i + 1)[0];
              break;
            case '/BlueShift':
            case '/lenIV':
            case '/BlueFuzz':
            case '/BlueScale':
            case '/LanguageGroup':
            case '/ExpansionFactor':
              program.properties.privateData[token.substring(1)] =
                readNumber(eexecStr, i + 1);
              break;
            case '/ForceBold':
              program.properties.privateData[token.substring(1)] =
                readBoolean(eexecStr, i + 1);
              break;
          }
-        } else if (c == '/') {
+        } else if (ch === '%') {
-          token = glyph = '';
+          comment = true;
-          while ((c = eexecStr[++i]) != ' ')
+        } else if (!Lexer.isSpace(ch)) {
-            glyph += c;
+          break;
        }
        stream.skip();
      }
-    }
+      if (isSpecial(ch)) {
-
+        stream.skip();
-    for (var i = 0; i < charstrings.length; i++) {
+        return ch;
      var glyph = charstrings[i].glyph;
      var encoded = charstrings[i].encoded;
      var charString = new Type1CharString();
      var error = charString.convert(encoded, subrs);
      var output = charString.output;
      if (error) {
        // It seems when FreeType encounters an error while evaluating a glyph
        // that it completely ignores the glyph so we'll mimic that behaviour
        // here and put an endchar to make the validator happy.
        output = [14];
      }
-      program.charstrings.push({
+      var token = '';
-        glyph: glyph,
+      do {
-        data: output,
+        token += ch;
-        seac: charString.seac,
+        stream.skip();
-        lsb: charString.lsb,
+        ch = stream.lookChar();
-        width: charString.width
+      } while (ch !== null && !Lexer.isSpace(ch) && !isSpecial(ch));
-      });
+      return token;
-    }
+    },
-    return program;
+    /*
-  };
+     * Returns an object containing a Subrs array and a CharStrings
     * array extracted from and eexec encrypted block of data
     */
    extractFontProgram: function Type1Parser_extractFontProgram() {
      var stream = this.stream;
-  this.extractFontHeader = function Type1Parser_extractFontHeader(stream,
+      var subrs = [], charstrings = [];
-                                                                  properties) {
+      var program = {
-    var headerString = '';
+        subrs: [],
-    for (var i = 0, ii = stream.length; i < ii; i++)
+        charstrings: [],
-      headerString += String.fromCharCode(stream[i]);
+        properties: {
-
+          'privateData': {
-    var token = '';
+            'lenIV': 4
-    var count = headerString.length;
+          }
    for (var i = 0; i < count; i++) {
      var getToken = function getToken() {
        var character = headerString[i];
        while (i < count && (isSeparator(character) || character == '/'))
          character = headerString[++i];
        var token = '';
        while (i < count && !(isSeparator(character) || character == '/')) {
          token += character;
          character = headerString[++i];
        }
        return token;
      };
-
+      var token;
-      var c = headerString[i];
+      while ((token = this.getToken()) !== null) {
-      if (isSeparator(c)) {
+        if (token !== '/') {
          continue;
        }
        token = this.getToken();
        switch (token) {
-          case '/FontMatrix':
+          case 'CharStrings':
-            var matrix = readNumberArray(headerString, i + 1);
+            // The number immediately following CharStrings must be greater or
            // equal to the number of CharStrings.
            this.getToken();
            this.getToken(); // read in 'dict'
            this.getToken(); // read in 'dup'
            this.getToken(); // read in 'begin'
            while(true) {
              token = this.getToken();
              if (token === null || token === 'end') {
                break;
              }
              if (token !== '/') {
                continue;
              }
              var glyph = this.getToken();
              var length = this.readInt();
              this.getToken(); // read in 'RD' or '-|'
              var data = stream.makeSubStream(stream.pos + 1, length);
              var lenIV = program.properties.privateData['lenIV'];
              var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
                                    lenIV);
              // Skip past the required space and binary data.
              stream.skip(1 + length);
              token = this.getToken(); // read in 'ND' or '|-'
              if (token === 'noaccess') {
                this.getToken(); // read in 'def'
              }
              charstrings.push({
                glyph: glyph,
                encoded: encoded
              });
            }
            break;
          case 'Subrs':
            var num = this.readInt();
            this.getToken(); // read in 'array'
            for (var j = 0; j < num; ++j) {
              token = this.getToken(); // read in 'dup'
              var index = this.readInt();
              if (index > j)
                j = index;
              var length = this.readInt();
              this.getToken(); // read in 'RD' or '-|'
              var data = stream.makeSubStream(stream.pos + 1, length);
              var lenIV = program.properties.privateData['lenIV'];
              var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
                                    lenIV);
              // Skip past the required space and binary data.
              stream.skip(1 + length);
              token = this.getToken(); // read in 'NP' or '|'
              if (token === 'noaccess') {
                this.getToken(); // read in 'put'
              }
              subrs[index] = encoded;
            }
            break;
          case 'BlueValues':
          case 'OtherBlues':
          case 'FamilyBlues':
          case 'FamilyOtherBlues':
            var blueArray = this.readNumberArray();
            // *Blue* values may contain invalid data: disables reading of
            // those values when hinting is disabled.
            if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
                HINTING_ENABLED) {
              program.properties.privateData[token] = blueArray;
            }
            break;
          case 'StemSnapH':
          case 'StemSnapV':
            program.properties.privateData[token] = this.readNumberArray();
            break;
          case 'StdHW':
          case 'StdVW':
            program.properties.privateData[token] =
              this.readNumberArray()[0];
            break;
          case 'BlueShift':
          case 'lenIV':
          case 'BlueFuzz':
          case 'BlueScale':
          case 'LanguageGroup':
          case 'ExpansionFactor':
            program.properties.privateData[token] = this.readNumber();
            break;
          case 'ForceBold':
            program.properties.privateData[token] = this.readBoolean();
            break;
        }
      }
      for (var i = 0; i < charstrings.length; i++) {
        var glyph = charstrings[i].glyph;
        var encoded = charstrings[i].encoded;
        var charString = new Type1CharString();
        var error = charString.convert(encoded, subrs);
        var output = charString.output;
        if (error) {
          // It seems when FreeType encounters an error while evaluating a glyph
          // that it completely ignores the glyph so we'll mimic that behaviour
          // here and put an endchar to make the validator happy.
          output = [14];
        }
        program.charstrings.push({
          glyph: glyph,
          data: output,
          seac: charString.seac,
          lsb: charString.lsb,
          width: charString.width
        });
      }
      return program;
    },
    extractFontHeader: function Type1Parser_extractFontHeader(properties) {
      var token;
      while ((token = this.getToken()) !== null) {
        if (token !== '/') {
          continue;
        }
        token = this.getToken();
        switch (token) {
          case 'FontMatrix':
            var matrix = this.readNumberArray();
            properties.fontMatrix = matrix;
            break;
-          case '/Encoding':
+          case 'Encoding':
-            var encodingArg = getToken();
+            var encodingArg = this.getToken();
            var encoding;
            if (!/^\d+$/.test(encodingArg)) {
              // encoding name is specified
              encoding = Encodings[encodingArg];
            } else {
              encoding = [];
-              var size = parseInt(encodingArg, 10);
+              var size = parseInt(encodingArg, 10) | 0;
-              getToken(); // read in 'array'
+              this.getToken(); // read in 'array'
              for (var j = 0; j < size; j++) {
-                var token = getToken();
+                var token = this.getToken();
-                if (token == 'dup') {
+                if (token === 'dup') {
-                  var index = parseInt(getToken(), 10);
+                  var index = this.readInt();
-                  var glyph = getToken();
+                  this.getToken(); // read in '/'
                  var glyph = this.getToken();
                  encoding[index] = glyph;
-                  getToken(); // read the in 'put'
+                  this.getToken(); // read the in 'put'
                }
              }
            }
@ -5355,13 +5345,12 @@ var Type1Parser = function type1Parser() {
            }
            break;
        }
        token = '';
      } else {
        token += c;
      }
    }
  };
-};
+
  return Type1Parser;
 })();
 /**
 * The CFF class takes a Type1 file and wrap it into a
@ -5435,17 +5424,17 @@ var CFFStandardStrings = [
  'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold'
 ];
 var type1Parser = new Type1Parser();
 // Type1Font is also a CIDFontType0.
 var Type1Font = function Type1Font(name, file, properties) {
  // Get the data block containing glyphs and subrs informations
-  var headerBlock = file.getBytes(properties.length1);
+  var headerBlock = new Stream(file.getBytes(properties.length1));
-  type1Parser.extractFontHeader(headerBlock, properties);
+  var headerBlockParser = new Type1Parser(headerBlock);
  headerBlockParser.extractFontHeader(properties);
  // Decrypt the data blocks and retrieve it's content
-  var eexecBlock = file.getBytes(properties.length2);
+  var eexecBlock = new Stream(file.getBytes(properties.length2));
-  var data = type1Parser.extractFontProgram(eexecBlock);
+  var eexecBlockParser = new Type1Parser(eexecBlock, true);
  var data = eexecBlockParser.extractFontProgram();
  for (var info in data.properties)
    properties[info] = data.properties[info];
--- a/test/unit/font_spec.js
+++ b/test/unit/font_spec.js
@ -1,7 +1,7 @@
 /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
 /* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings,
-           SEAC_ANALYSIS_ENABLED:true */
+           SEAC_ANALYSIS_ENABLED:true, Type1Parser, StringStream */
 'use strict';
@ -297,4 +297,95 @@ describe('font', function() {
    });
    // TODO a lot more compiler tests
  });
  describe('Type1Parser', function() {
    it('splits tokens', function() {
      var stream = new StringStream('/BlueValues[-17 0]noaccess def');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('/');
      expect(parser.getToken()).toEqual('BlueValues');
      expect(parser.getToken()).toEqual('[');
      expect(parser.getToken()).toEqual('-17');
      expect(parser.getToken()).toEqual('0');
      expect(parser.getToken()).toEqual(']');
      expect(parser.getToken()).toEqual('noaccess');
      expect(parser.getToken()).toEqual('def');
      expect(parser.getToken()).toEqual(null);
    });
    it('handles glued tokens', function() {
      var stream = new StringStream('dup/CharStrings');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('dup');
      expect(parser.getToken()).toEqual('/');
      expect(parser.getToken()).toEqual('CharStrings');
    });
    it('ignores whitespace', function() {
      var stream = new StringStream('\nab   c\t');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('ab');
      expect(parser.getToken()).toEqual('c');
    });
    it('parses numbers', function() {
      var stream = new StringStream('123');
      var parser = new Type1Parser(stream);
      expect(parser.readNumber()).toEqual(123);
    });
    it('parses booleans', function() {
      var stream = new StringStream('true false');
      var parser = new Type1Parser(stream);
      expect(parser.readBoolean()).toEqual(1);
      expect(parser.readBoolean()).toEqual(0);
    });
    it('parses number arrays', function() {
      var stream = new StringStream('[1 2]');
      var parser = new Type1Parser(stream);
      expect(parser.readNumberArray()).toEqual([1, 2]);
      // Variation on spacing.
      var stream = new StringStream('[ 1 2 ]');
      parser = new Type1Parser(stream);
      expect(parser.readNumberArray()).toEqual([1, 2]);
    });
    it('skips comments', function() {
      var stream = new StringStream(
        '%!PS-AdobeFont-1.0: CMSY10 003.002\n' +
        '%%Title: CMSY10\n' +
        '%Version: 003.002\n' +
        'FontDirectory');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('FontDirectory');
    });
    it('parses font program', function() {
      var stream = new StringStream(
        '/ExpansionFactor  99\n' +
        '/Subrs 1 array\n' +
        'dup 0 1 RD x noaccess put\n'+
        '/CharStrings 46 dict dup begin\n' +
        '/.notdef 1 RD x ND' + '\n' +
        'end');
      var parser = new Type1Parser(stream);
      var program = parser.extractFontProgram();
      expect(program.charstrings.length).toEqual(1);
      expect(program.properties.privateData.ExpansionFactor).toEqual(99);
    });
    it('parses font header font matrix', function() {
      var stream = new StringStream(
        '/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n');
      var parser = new Type1Parser(stream);
      var props = {};
      var program = parser.extractFontHeader(props);
      expect(props.fontMatrix).toEqual([0.001, 0, 0, 0.001, 0, 0]);
    });
    it('parses font header encoding', function() {
      var stream = new StringStream(
        '/Encoding 256 array\n' +
        '0 1 255 {1 index exch /.notdef put} for\n' +
        'dup 33 /arrowright put\n' +
        'readonly def\n');
      var parser = new Type1Parser(stream);
      var props = {};
      var program = parser.extractFontHeader(props);
      expect(props.baseEncoding[33]).toEqual('arrowright');
    });
  });
 });