Restructure/rewrite of the Type1 font parser.

2013-03-27 17:15:44 -07:00 · 2013-03-27 17:15:44 -07:00 · 028151d13a
commit 028151d13a
parent 921f3211a4
2 changed files with 325 additions and 245 deletions
--- a/src/fonts.js
+++ b/src/fonts.js
@ -17,7 +17,7 @@
 /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
           ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode,
           info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream,
-           stringToBytes, TextDecoder, TODO, warn */
+           stringToBytes, TextDecoder, TODO, warn, Lexer */
 'use strict';
@ -5057,8 +5057,11 @@ var Type1CharString = (function Type1CharStringClosure() {
 * Type1Parser encapsulate the needed code for parsing a Type1 font
 * program. Some of its logic depends on the Type2 charstrings
 * structure.
 * Note: this doesn't really parse the font since that would require evaluation
 * of PostScript, but it is possible in most cases to extract what we need
 * without a full parse.
 */
-var Type1Parser = function type1Parser() {
+var Type1Parser = (function Type1ParserClosure() {
  /*
   * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
   * of Plaintext Bytes. The function took a key as a parameter which can be
@ -5081,73 +5084,93 @@ var Type1Parser = function type1Parser() {
    return decryptedString.slice(discardNumber);
  }
  function isSpecial(c) {
    return c === '/' ||
           c === '[' || c === ']' ||
           c === '{' || c === '}' ||
           c === '(' || c === ')';
  }
  function Type1Parser(stream, encrypted) {
    if (encrypted) {
      stream = new Stream(decrypt(stream.getBytes(), EEXEC_ENCRYPT_KEY, 4));
    }
    this.stream = stream;
  }
  Type1Parser.prototype = {
    readNumberArray: function Type1Parser_readNumberArray() {
      this.getToken(); // read '[' or '{' (arrays can start with either)
      var array = [];
      while (true) {
        var token = this.getToken();
        if (token === null || token === ']' || token === '}') {
          break;
        }
        array.push(parseFloat(token || 0));
      }
      return array;
    },
    readNumber: function Type1Parser_readNumber() {
      var token = this.getToken();
      return parseFloat(token || 0);
    },
    readInt: function Type1Parser_readInt() {
      // Use '| 0' to prevent setting a double into length such as the double
      // does not flow into the loop variable.
      var token = this.getToken();
      return parseInt(token || 0, 10) | 0;
    },
    readBoolean: function Type1Parser_readBoolean() {
      var token = this.getToken();
      // Use 1 and 0 since that's what type2 charstrings use.
      return token === 'true' ? 1 : 0;
    },
    getToken: function Type1Parser_getToken() {
      // Eat whitespace and comments.
      var comment = false;
      var ch;
      var stream = this.stream;
      while (true) {
        if ((ch = stream.lookChar()) === null)
          return null;
        if (comment) {
          if (ch === '\x0a' || ch === '\x0d') {
            comment = false;
          }
        } else if (ch === '%') {
          comment = true;
        } else if (!Lexer.isSpace(ch)) {
          break;
        }
        stream.skip();
      }
      if (isSpecial(ch)) {
        stream.skip();
        return ch;
      }
      var token = '';
      do {
        token += ch;
        stream.skip();
        ch = stream.lookChar();
      } while (ch !== null && !Lexer.isSpace(ch) && !isSpecial(ch));
      return token;
    },
    /*
     * Returns an object containing a Subrs array and a CharStrings
     * array extracted from and eexec encrypted block of data
     */
-  function readNumberArray(str, index) {
+    extractFontProgram: function Type1Parser_extractFontProgram() {
-    var start = index;
+      var stream = this.stream;
    while (str[index++] != '[')
      start++;
    start++;
    var count = 0;
    while (str[index++] != ']')
      count++;
    str = str.substr(start, count);
    str = str.trim();
    // Remove adjacent spaces
    str = str.replace(/\s+/g, ' ');
    var array = str.split(' ');
    for (var i = 0, ii = array.length; i < ii; i++)
      array[i] = parseFloat(array[i] || 0);
    return array;
  }
  function readNumber(str, index) {
    while (str[index] == ' ')
      index++;
    var start = index;
    var count = 0;
    while (str[index++] != ' ')
      count++;
    return parseFloat(str.substr(start, count) || 0);
  }
  function readBoolean(str, index) {
    while (str[index] == ' ')
      index++;
    var start = index;
    var count = 0;
    var length = str.length;
    while (index < length && str[index++] != ' ') {
      count++;
    }
    // Use 1 and 0 since that's what type2 charstrings use.
    return str.substr(start, count) === 'true' ? 1 : 0;
  }
  function isSeparator(c) {
    return c == ' ' || c == '\n' || c == '\x0d';
  }
  this.extractFontProgram = function Type1Parser_extractFontProgram(stream) {
    var eexec = decrypt(stream, EEXEC_ENCRYPT_KEY, 4);
    var eexecStr = '';
    for (var i = 0, ii = eexec.length; i < ii; i++)
      eexecStr += String.fromCharCode(eexec[i]);
    var glyphsSection = false, subrsSection = false;
      var subrs = [], charstrings = [];
      var program = {
        subrs: [],
@ -5158,120 +5181,104 @@ var Type1Parser = function type1Parser() {
          }
        }
      };
      var token;
      while ((token = this.getToken()) !== null) {
        if (token !== '/') {
          continue;
        }
        token = this.getToken();
        switch (token) {
          case 'CharStrings':
            // The number immediately following CharStrings must be greater or
            // equal to the number of CharStrings.
            this.getToken();
            this.getToken(); // read in 'dict'
            this.getToken(); // read in 'dup'
            this.getToken(); // read in 'begin'
            while(true) {
              token = this.getToken();
              if (token === null || token === 'end') {
                break;
              }
-    var glyph = '';
+              if (token !== '/') {
-    var token = '';
+                continue;
-    var length = 0;
+              }
-
+              var glyph = this.getToken();
-    var c = '';
+              var length = this.readInt();
-    var count = eexecStr.length;
+              this.getToken(); // read in 'RD' or '-|'
-    for (var i = 0; i < count; i++) {
+              var data = stream.makeSubStream(stream.pos + 1, length);
      var getToken = function getToken() {
        while (i < count && isSeparator(eexecStr[i]))
          ++i;
        var token = '';
        while (i < count && !isSeparator(eexecStr[i]))
          token += eexecStr[i++];
        return token;
      };
      var c = eexecStr[i];
      if ((glyphsSection || subrsSection) &&
          (token == 'RD' || token == '-|')) {
        i++;
        var data = eexec.slice(i, i + length);
              var lenIV = program.properties.privateData['lenIV'];
-        var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV);
+              var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
-
+                                    lenIV);
-        if (glyphsSection) {
+              // Skip past the required space and binary data.
              stream.skip(1 + length);
              token = this.getToken(); // read in 'ND' or '|-'
              if (token === 'noaccess') {
                this.getToken(); // read in 'def'
              }
              charstrings.push({
                glyph: glyph,
                encoded: encoded
              });
        } else {
          subrs.push(encoded);
            }
        i += length;
        token = '';
      } else if (isSeparator(c)) {
        // Use '| 0' to prevent setting a double into length such as the double
        // does not flow into the loop variable.
        length = parseInt(token, 10) | 0;
        token = '';
      } else {
        token += c;
        if (!glyphsSection) {
          switch (token) {
            case '/CharString':
              glyphsSection = true;
            break;
-            case '/Subrs':
+          case 'Subrs':
-              ++i;
+            var num = this.readInt();
-              var num = parseInt(getToken(), 10);
+            this.getToken(); // read in 'array'
              getToken(); // read in 'array'
            for (var j = 0; j < num; ++j) {
-                var t = getToken(); // read in 'dup'
+              token = this.getToken(); // read in 'dup'
-                if (t == 'ND' || t == '|-' || t == 'noaccess')
+              var index = this.readInt();
                  break;
                var index = parseInt(getToken(), 10);
              if (index > j)
                j = index;
-                var length = parseInt(getToken(), 10);
+              var length = this.readInt();
-                getToken(); // read in 'RD'
+              this.getToken(); // read in 'RD' or '-|'
-                var data = eexec.slice(i + 1, i + 1 + length);
+              var data = stream.makeSubStream(stream.pos + 1, length);
              var lenIV = program.properties.privateData['lenIV'];
-                var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV);
+              var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
-                i = i + 1 + length;
+                                    lenIV);
-                t = getToken(); // read in 'NP'
+              // Skip past the required space and binary data.
-                if (t == 'noaccess')
+              stream.skip(1 + length);
-                  getToken(); // read in 'put'
+              token = this.getToken(); // read in 'NP' or '|'
              if (token === 'noaccess') {
                this.getToken(); // read in 'put'
              }
              subrs[index] = encoded;
            }
            break;
-            case '/BlueValues':
+          case 'BlueValues':
-            case '/OtherBlues':
+          case 'OtherBlues':
-            case '/FamilyBlues':
+          case 'FamilyBlues':
-            case '/FamilyOtherBlues':
+          case 'FamilyOtherBlues':
-              var blueArray = readNumberArray(eexecStr, i + 1);
+            var blueArray = this.readNumberArray();
            // *Blue* values may contain invalid data: disables reading of
            // those values when hinting is disabled.
            if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
                HINTING_ENABLED) {
-                program.properties.privateData[token.substring(1)] = blueArray;
+              program.properties.privateData[token] = blueArray;
            }
            break;
-            case '/StemSnapH':
+          case 'StemSnapH':
-            case '/StemSnapV':
+          case 'StemSnapV':
-              program.properties.privateData[token.substring(1)] =
+            program.properties.privateData[token] = this.readNumberArray();
                readNumberArray(eexecStr, i + 1);
            break;
-            case '/StdHW':
+          case 'StdHW':
-            case '/StdVW':
+          case 'StdVW':
-              program.properties.privateData[token.substring(1)] =
+            program.properties.privateData[token] =
-                readNumberArray(eexecStr, i + 1)[0];
+              this.readNumberArray()[0];
            break;
-            case '/BlueShift':
+          case 'BlueShift':
-            case '/lenIV':
+          case 'lenIV':
-            case '/BlueFuzz':
+          case 'BlueFuzz':
-            case '/BlueScale':
+          case 'BlueScale':
-            case '/LanguageGroup':
+          case 'LanguageGroup':
-            case '/ExpansionFactor':
+          case 'ExpansionFactor':
-              program.properties.privateData[token.substring(1)] =
+            program.properties.privateData[token] = this.readNumber();
                readNumber(eexecStr, i + 1);
            break;
-            case '/ForceBold':
+          case 'ForceBold':
-              program.properties.privateData[token.substring(1)] =
+            program.properties.privateData[token] = this.readBoolean();
                readBoolean(eexecStr, i + 1);
            break;
        }
        } else if (c == '/') {
          token = glyph = '';
          while ((c = eexecStr[++i]) != ' ')
            glyph += c;
        }
      }
      }
      for (var i = 0; i < charstrings.length; i++) {
@ -5296,56 +5303,39 @@ var Type1Parser = function type1Parser() {
      }
      return program;
-  };
+    },
-  this.extractFontHeader = function Type1Parser_extractFontHeader(stream,
+    extractFontHeader: function Type1Parser_extractFontHeader(properties) {
-                                                                  properties) {
+      var token;
-    var headerString = '';
+      while ((token = this.getToken()) !== null) {
-    for (var i = 0, ii = stream.length; i < ii; i++)
+        if (token !== '/') {
-      headerString += String.fromCharCode(stream[i]);
+          continue;
    var token = '';
    var count = headerString.length;
    for (var i = 0; i < count; i++) {
      var getToken = function getToken() {
        var character = headerString[i];
        while (i < count && (isSeparator(character) || character == '/'))
          character = headerString[++i];
        var token = '';
        while (i < count && !(isSeparator(character) || character == '/')) {
          token += character;
          character = headerString[++i];
        }
-
+        token = this.getToken();
        return token;
      };
      var c = headerString[i];
      if (isSeparator(c)) {
        switch (token) {
-          case '/FontMatrix':
+          case 'FontMatrix':
-            var matrix = readNumberArray(headerString, i + 1);
+            var matrix = this.readNumberArray();
            properties.fontMatrix = matrix;
            break;
-          case '/Encoding':
+          case 'Encoding':
-            var encodingArg = getToken();
+            var encodingArg = this.getToken();
            var encoding;
            if (!/^\d+$/.test(encodingArg)) {
              // encoding name is specified
              encoding = Encodings[encodingArg];
            } else {
              encoding = [];
-              var size = parseInt(encodingArg, 10);
+              var size = parseInt(encodingArg, 10) | 0;
-              getToken(); // read in 'array'
+              this.getToken(); // read in 'array'
              for (var j = 0; j < size; j++) {
-                var token = getToken();
+                var token = this.getToken();
-                if (token == 'dup') {
+                if (token === 'dup') {
-                  var index = parseInt(getToken(), 10);
+                  var index = this.readInt();
-                  var glyph = getToken();
+                  this.getToken(); // read in '/'
                  var glyph = this.getToken();
                  encoding[index] = glyph;
-                  getToken(); // read the in 'put'
+                  this.getToken(); // read the in 'put'
                }
              }
            }
@ -5355,13 +5345,12 @@ var Type1Parser = function type1Parser() {
            }
            break;
        }
        token = '';
      } else {
        token += c;
      }
    }
  };
-};
+
  return Type1Parser;
 })();
 /**
 * The CFF class takes a Type1 file and wrap it into a
@ -5435,17 +5424,17 @@ var CFFStandardStrings = [
  'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold'
 ];
 var type1Parser = new Type1Parser();
 // Type1Font is also a CIDFontType0.
 var Type1Font = function Type1Font(name, file, properties) {
  // Get the data block containing glyphs and subrs informations
-  var headerBlock = file.getBytes(properties.length1);
+  var headerBlock = new Stream(file.getBytes(properties.length1));
-  type1Parser.extractFontHeader(headerBlock, properties);
+  var headerBlockParser = new Type1Parser(headerBlock);
  headerBlockParser.extractFontHeader(properties);
  // Decrypt the data blocks and retrieve it's content
-  var eexecBlock = file.getBytes(properties.length2);
+  var eexecBlock = new Stream(file.getBytes(properties.length2));
-  var data = type1Parser.extractFontProgram(eexecBlock);
+  var eexecBlockParser = new Type1Parser(eexecBlock, true);
  var data = eexecBlockParser.extractFontProgram();
  for (var info in data.properties)
    properties[info] = data.properties[info];
--- a/test/unit/font_spec.js
+++ b/test/unit/font_spec.js
@ -1,7 +1,7 @@
 /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
 /* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings,
-           SEAC_ANALYSIS_ENABLED:true */
+           SEAC_ANALYSIS_ENABLED:true, Type1Parser, StringStream */
 'use strict';
@ -297,4 +297,95 @@ describe('font', function() {
    });
    // TODO a lot more compiler tests
  });
  describe('Type1Parser', function() {
    it('splits tokens', function() {
      var stream = new StringStream('/BlueValues[-17 0]noaccess def');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('/');
      expect(parser.getToken()).toEqual('BlueValues');
      expect(parser.getToken()).toEqual('[');
      expect(parser.getToken()).toEqual('-17');
      expect(parser.getToken()).toEqual('0');
      expect(parser.getToken()).toEqual(']');
      expect(parser.getToken()).toEqual('noaccess');
      expect(parser.getToken()).toEqual('def');
      expect(parser.getToken()).toEqual(null);
    });
    it('handles glued tokens', function() {
      var stream = new StringStream('dup/CharStrings');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('dup');
      expect(parser.getToken()).toEqual('/');
      expect(parser.getToken()).toEqual('CharStrings');
    });
    it('ignores whitespace', function() {
      var stream = new StringStream('\nab   c\t');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('ab');
      expect(parser.getToken()).toEqual('c');
    });
    it('parses numbers', function() {
      var stream = new StringStream('123');
      var parser = new Type1Parser(stream);
      expect(parser.readNumber()).toEqual(123);
    });
    it('parses booleans', function() {
      var stream = new StringStream('true false');
      var parser = new Type1Parser(stream);
      expect(parser.readBoolean()).toEqual(1);
      expect(parser.readBoolean()).toEqual(0);
    });
    it('parses number arrays', function() {
      var stream = new StringStream('[1 2]');
      var parser = new Type1Parser(stream);
      expect(parser.readNumberArray()).toEqual([1, 2]);
      // Variation on spacing.
      var stream = new StringStream('[ 1 2 ]');
      parser = new Type1Parser(stream);
      expect(parser.readNumberArray()).toEqual([1, 2]);
    });
    it('skips comments', function() {
      var stream = new StringStream(
        '%!PS-AdobeFont-1.0: CMSY10 003.002\n' +
        '%%Title: CMSY10\n' +
        '%Version: 003.002\n' +
        'FontDirectory');
      var parser = new Type1Parser(stream);
      expect(parser.getToken()).toEqual('FontDirectory');
    });
    it('parses font program', function() {
      var stream = new StringStream(
        '/ExpansionFactor  99\n' +
        '/Subrs 1 array\n' +
        'dup 0 1 RD x noaccess put\n'+
        '/CharStrings 46 dict dup begin\n' +
        '/.notdef 1 RD x ND' + '\n' +
        'end');
      var parser = new Type1Parser(stream);
      var program = parser.extractFontProgram();
      expect(program.charstrings.length).toEqual(1);
      expect(program.properties.privateData.ExpansionFactor).toEqual(99);
    });
    it('parses font header font matrix', function() {
      var stream = new StringStream(
        '/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n');
      var parser = new Type1Parser(stream);
      var props = {};
      var program = parser.extractFontHeader(props);
      expect(props.fontMatrix).toEqual([0.001, 0, 0, 0.001, 0, 0]);
    });
    it('parses font header encoding', function() {
      var stream = new StringStream(
        '/Encoding 256 array\n' +
        '0 1 255 {1 index exch /.notdef put} for\n' +
        'dup 33 /arrowright put\n' +
        'readonly def\n');
      var parser = new Type1Parser(stream);
      var props = {};
      var program = parser.extractFontHeader(props);
      expect(props.baseEncoding[33]).toEqual('arrowright');
    });
  });
 });