Decode charStrings and stop the interpreter on every unknow token
This commit is contained in:
		
							parent
							
								
									3064305d91
								
							
						
					
					
						commit
						e936f305d7
					
				
							
								
								
									
										166
									
								
								PDFFont.js
									
									
									
									
									
								
							
							
						
						
									
										166
									
								
								PDFFont.js
									
									
									
									
									
								
							@ -20,10 +20,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
 | 
			
		||||
  function decrypt(aStream, aKey, aDiscardNumber) {
 | 
			
		||||
    var r = aKey, c1 = 52845, c2 = 22719;
 | 
			
		||||
 | 
			
		||||
    var decryptedString = [];
 | 
			
		||||
    var value = null;
 | 
			
		||||
 | 
			
		||||
    var value = "";
 | 
			
		||||
    var count = aStream.length;
 | 
			
		||||
    for (var i = 0; i < count; i++) {
 | 
			
		||||
      value = aStream.getByte();
 | 
			
		||||
@ -33,6 +32,108 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
    return decryptedString.slice(aDiscardNumber);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * CharStrings are encoded following the the CharString Encoding sequence
 | 
			
		||||
   * describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
 | 
			
		||||
   * The value in a byte indicates a command, a number, or subsequent bytes
 | 
			
		||||
   * that are to be interpreted in a special way.
 | 
			
		||||
   *
 | 
			
		||||
   * CharString Number Encoding:
 | 
			
		||||
   *  A CharString byte containing the values from 32 through 255 inclusive
 | 
			
		||||
   *  indicate an integer. These values are decoded in four ranges.
 | 
			
		||||
   * 
 | 
			
		||||
   * 1. A CharString byte containing a value, v, between 32 and 246 inclusive,
 | 
			
		||||
   * indicate the integer v - 139. Thus, the integer values from -107 through
 | 
			
		||||
   * 107 inclusive may be encoded in single byte.
 | 
			
		||||
   *
 | 
			
		||||
   * 2. A CharString byte containing a value, v, between 247 and 250 inclusive,
 | 
			
		||||
   * indicates an integer involving the next byte, w, according to the formula:
 | 
			
		||||
   * [(v - 247) x 256] + w + 108
 | 
			
		||||
   *
 | 
			
		||||
   * 3. A CharString byte containing a value, v, between 251 and 254 inclusive,
 | 
			
		||||
   * indicates an integer involving the next byte, w, according to the formula:
 | 
			
		||||
   * -[(v - 251) * 256] - w - 108
 | 
			
		||||
   * 
 | 
			
		||||
   * 4. A CharString containing the value 255 indicates that the next 4 bytes
 | 
			
		||||
   * are a two complement signed integer. The first of these bytes contains the
 | 
			
		||||
   * highest order bits, the second byte contains the next higher order bits
 | 
			
		||||
   * and the fourth byte contain the lowest order bits.
 | 
			
		||||
   *
 | 
			
		||||
   *
 | 
			
		||||
   * CharString Command Encoding:
 | 
			
		||||
   *  CharStrings commands are encoded in 1 or 2 bytes.
 | 
			
		||||
   *
 | 
			
		||||
   *  Single byte commands are encoded in 1 byte that contains a value between
 | 
			
		||||
   *  0 and 31 inclusive.
 | 
			
		||||
   *  If a command byte contains the value 12, then the value in the next byte
 | 
			
		||||
   *  indicates a command. This "escape" mechanism allows many extra commands
 | 
			
		||||
   * to be encoded and this encoding technique helps to minimize the length of
 | 
			
		||||
   * the charStrings.
 | 
			
		||||
   */
 | 
			
		||||
  function decodeCharString(aStream) {
 | 
			
		||||
    var charString = [];
 | 
			
		||||
    var cmd = {
 | 
			
		||||
      "1": "hstem",
 | 
			
		||||
      "3": "vstem",
 | 
			
		||||
      "4": "vmoveto",
 | 
			
		||||
      "5": "rlineto",
 | 
			
		||||
      "6": "hlineto",
 | 
			
		||||
      "7": "vlineto",
 | 
			
		||||
      "8": "rrcurveto",
 | 
			
		||||
      "9": "closepath",
 | 
			
		||||
      "10": "callsubr",
 | 
			
		||||
      "11": "return",
 | 
			
		||||
      "12": {
 | 
			
		||||
        "0": "dotsection",
 | 
			
		||||
        "1": "vstem3",
 | 
			
		||||
        "3": "hstem3",
 | 
			
		||||
        "6": "seac",
 | 
			
		||||
        "7": "sbw",
 | 
			
		||||
        "12": "div",
 | 
			
		||||
        "16": "callothersubr",
 | 
			
		||||
        "17": "pop",
 | 
			
		||||
        "33": "setcurrentpoint"
 | 
			
		||||
      },
 | 
			
		||||
      "13": "hsbw",
 | 
			
		||||
      "14": "endchar",
 | 
			
		||||
      "21": "rmoveto",
 | 
			
		||||
      "22": "hmoveto",
 | 
			
		||||
      "30": "vhcurveto",
 | 
			
		||||
      "31": "hcurveto"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    var value = "";
 | 
			
		||||
    var count = aStream.length;
 | 
			
		||||
    for (var i = 0; i < count; i++) {
 | 
			
		||||
      value = aStream.getByte();
 | 
			
		||||
      
 | 
			
		||||
      if (value < 0) {
 | 
			
		||||
        continue;
 | 
			
		||||
      } else if (value < 32) {
 | 
			
		||||
        if (value == 12) {
 | 
			
		||||
          value = cmd["12"][aStream.getByte()];
 | 
			
		||||
          count++;
 | 
			
		||||
        } else {
 | 
			
		||||
          value = cmd[value];
 | 
			
		||||
        }
 | 
			
		||||
      } else if (value <= 246) {
 | 
			
		||||
        value = parseInt(value) - 139;
 | 
			
		||||
      } else if (value <= 250) {
 | 
			
		||||
        value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108;
 | 
			
		||||
        count++;
 | 
			
		||||
      } else if (value <= 254) {
 | 
			
		||||
        value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108;
 | 
			
		||||
        count++;
 | 
			
		||||
      } else {
 | 
			
		||||
        error("Two complement signed integers are ignored for the moment");
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      charString.push(value);
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    return charString;    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * The operand stack holds arbitrary PostScript objects that are the operands
 | 
			
		||||
   * and results of PostScript operators being executed. The interpreter pushes
 | 
			
		||||
@ -63,7 +164,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
   };
 | 
			
		||||
 | 
			
		||||
   // Flag indicating if the topmost operand of the operandStack is an array
 | 
			
		||||
   var operandIsArray = false;
 | 
			
		||||
   var operandIsArray = 0;
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * The dictionary stack holds only dictionary objects. The current set of
 | 
			
		||||
@ -113,23 +214,31 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
   */
 | 
			
		||||
  var executionStack = [];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * Parse a font file from the first segment to the last assuming the eexec
 | 
			
		||||
   * block is binary data.
 | 
			
		||||
   * 
 | 
			
		||||
   * The method thrown an error if it encounters an unknown token.
 | 
			
		||||
   */
 | 
			
		||||
  this.getObj = function() {
 | 
			
		||||
    var obj = lexer.getObj();
 | 
			
		||||
 | 
			
		||||
    if (operandIsArray && !IsCmd(obj, "}") && !IsCmd(obj, "]")) {
 | 
			
		||||
    if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && 
 | 
			
		||||
                          !IsCmd(obj, "}") && !IsCmd(obj, "]")) {
 | 
			
		||||
      operandStack.peek().push(obj);
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "{") || IsCmd(obj, "[")) {
 | 
			
		||||
      dump("Start Array: " + obj);
 | 
			
		||||
      operandStack.push([]);
 | 
			
		||||
      operandIsArray = true;
 | 
			
		||||
      operandIsArray++;
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "}") || IsCmd(obj, "]")) {
 | 
			
		||||
      dump("End Array: " + obj);
 | 
			
		||||
      operandIsArray = false;
 | 
			
		||||
      operandIsArray--;
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) {
 | 
			
		||||
      dump("Value: " + obj);
 | 
			
		||||
      //dump("Value: " + obj);
 | 
			
		||||
      operandStack.push(obj);
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "dup")) {
 | 
			
		||||
@ -145,11 +254,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
      operandStack.push(systemDict);
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") ||
 | 
			
		||||
               IsCmd(obj, "currentfile")) {
 | 
			
		||||
               IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) {
 | 
			
		||||
      // Do nothing for the moment
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsName(obj)) {
 | 
			
		||||
      dump("Name: " + obj.name);
 | 
			
		||||
      //dump("Name: " + obj.name);
 | 
			
		||||
      operandStack.push(obj.name);
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "dict")) {
 | 
			
		||||
@ -191,20 +300,32 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
 | 
			
		||||
      var size = operandStack.pop();
 | 
			
		||||
      var key = operandStack.pop();
 | 
			
		||||
 | 
			
		||||
      var stream = lexer.stream.makeSubStream(lexer.stream.pos, size);
 | 
			
		||||
      // Add '1' because of the space separator, this is dirty
 | 
			
		||||
      var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size);
 | 
			
		||||
      lexer.stream.skip(size + 1);
 | 
			
		||||
 | 
			
		||||
      var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join("");
 | 
			
		||||
      var charStream = new StringStream(charString);
 | 
			
		||||
 | 
			
		||||
      // XXX do we want to store that on the top dictionary or somewhere else
 | 
			
		||||
      dictionaryStack.peek().set(key, new StringStream(charString));
 | 
			
		||||
      log (new StringStream(charString));
 | 
			
		||||
      dictionaryStack.peek().set(key, charStream);
 | 
			
		||||
 | 
			
		||||
      var decodedCharString = decodeCharString(charStream);
 | 
			
		||||
      log(decodedCharString);
 | 
			
		||||
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else if (IsCmd(obj, "LenIV")) {
 | 
			
		||||
      error("LenIV: argh! we need to modify the length of discard characters for charStrings");
 | 
			
		||||
    } else {
 | 
			
		||||
      dump("Getting an unknow token, adding it to the stack just in case");
 | 
			
		||||
      dump(obj);
 | 
			
		||||
      operandStack.push(obj);
 | 
			
		||||
    } else if (IsCmd(obj, "closefile")) {
 | 
			
		||||
      // End of binary data;
 | 
			
		||||
    } else if (IsCmd(obj, "StandardEncoding")) {
 | 
			
		||||
      // For some reason the value is considered as a command, maybe it is
 | 
			
		||||
      // because of the uppercae 'S'
 | 
			
		||||
      operandStack.push(obj.cmd);
 | 
			
		||||
      this.getObj();
 | 
			
		||||
    } else {
 | 
			
		||||
      dump(obj);
 | 
			
		||||
      error("Unknow token while parsing font");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return operandStack.peek();
 | 
			
		||||
@ -215,22 +336,11 @@ var hack = false;
 | 
			
		||||
 | 
			
		||||
var Type1Font = function(aFontName, aFontFile) {
 | 
			
		||||
  // All Type1 font program should begin with the comment %!
 | 
			
		||||
  var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21;
 | 
			
		||||
  if (!validHeader)
 | 
			
		||||
  if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
 | 
			
		||||
    error("Invalid file header");
 | 
			
		||||
 | 
			
		||||
  var programType = "PS-AdobeFont";
 | 
			
		||||
  for (var i = 0; i < programType.length; i++)
 | 
			
		||||
    aFontFile.getChar();
 | 
			
		||||
 | 
			
		||||
  // Ignore the '-' separator
 | 
			
		||||
  aFontFile.getChar();
 | 
			
		||||
 | 
			
		||||
  var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar());
 | 
			
		||||
 | 
			
		||||
  if (!hack) {
 | 
			
		||||
    log(aFontName);
 | 
			
		||||
    log("Version is: " + version);
 | 
			
		||||
 | 
			
		||||
    var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
 | 
			
		||||
    var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user