Decode charStrings and stop the interpreter on every unknow token
This commit is contained in:
parent
3064305d91
commit
e936f305d7
166
PDFFont.js
166
PDFFont.js
@ -20,10 +20,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
|
||||
function decrypt(aStream, aKey, aDiscardNumber) {
|
||||
var r = aKey, c1 = 52845, c2 = 22719;
|
||||
|
||||
var decryptedString = [];
|
||||
var value = null;
|
||||
|
||||
var value = "";
|
||||
var count = aStream.length;
|
||||
for (var i = 0; i < count; i++) {
|
||||
value = aStream.getByte();
|
||||
@ -33,6 +32,108 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
return decryptedString.slice(aDiscardNumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* CharStrings are encoded following the the CharString Encoding sequence
|
||||
* describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
|
||||
* The value in a byte indicates a command, a number, or subsequent bytes
|
||||
* that are to be interpreted in a special way.
|
||||
*
|
||||
* CharString Number Encoding:
|
||||
* A CharString byte containing the values from 32 through 255 inclusive
|
||||
* indicate an integer. These values are decoded in four ranges.
|
||||
*
|
||||
* 1. A CharString byte containing a value, v, between 32 and 246 inclusive,
|
||||
* indicate the integer v - 139. Thus, the integer values from -107 through
|
||||
* 107 inclusive may be encoded in single byte.
|
||||
*
|
||||
* 2. A CharString byte containing a value, v, between 247 and 250 inclusive,
|
||||
* indicates an integer involving the next byte, w, according to the formula:
|
||||
* [(v - 247) x 256] + w + 108
|
||||
*
|
||||
* 3. A CharString byte containing a value, v, between 251 and 254 inclusive,
|
||||
* indicates an integer involving the next byte, w, according to the formula:
|
||||
* -[(v - 251) * 256] - w - 108
|
||||
*
|
||||
* 4. A CharString containing the value 255 indicates that the next 4 bytes
|
||||
* are a two complement signed integer. The first of these bytes contains the
|
||||
* highest order bits, the second byte contains the next higher order bits
|
||||
* and the fourth byte contain the lowest order bits.
|
||||
*
|
||||
*
|
||||
* CharString Command Encoding:
|
||||
* CharStrings commands are encoded in 1 or 2 bytes.
|
||||
*
|
||||
* Single byte commands are encoded in 1 byte that contains a value between
|
||||
* 0 and 31 inclusive.
|
||||
* If a command byte contains the value 12, then the value in the next byte
|
||||
* indicates a command. This "escape" mechanism allows many extra commands
|
||||
* to be encoded and this encoding technique helps to minimize the length of
|
||||
* the charStrings.
|
||||
*/
|
||||
function decodeCharString(aStream) {
|
||||
var charString = [];
|
||||
var cmd = {
|
||||
"1": "hstem",
|
||||
"3": "vstem",
|
||||
"4": "vmoveto",
|
||||
"5": "rlineto",
|
||||
"6": "hlineto",
|
||||
"7": "vlineto",
|
||||
"8": "rrcurveto",
|
||||
"9": "closepath",
|
||||
"10": "callsubr",
|
||||
"11": "return",
|
||||
"12": {
|
||||
"0": "dotsection",
|
||||
"1": "vstem3",
|
||||
"3": "hstem3",
|
||||
"6": "seac",
|
||||
"7": "sbw",
|
||||
"12": "div",
|
||||
"16": "callothersubr",
|
||||
"17": "pop",
|
||||
"33": "setcurrentpoint"
|
||||
},
|
||||
"13": "hsbw",
|
||||
"14": "endchar",
|
||||
"21": "rmoveto",
|
||||
"22": "hmoveto",
|
||||
"30": "vhcurveto",
|
||||
"31": "hcurveto"
|
||||
}
|
||||
|
||||
var value = "";
|
||||
var count = aStream.length;
|
||||
for (var i = 0; i < count; i++) {
|
||||
value = aStream.getByte();
|
||||
|
||||
if (value < 0) {
|
||||
continue;
|
||||
} else if (value < 32) {
|
||||
if (value == 12) {
|
||||
value = cmd["12"][aStream.getByte()];
|
||||
count++;
|
||||
} else {
|
||||
value = cmd[value];
|
||||
}
|
||||
} else if (value <= 246) {
|
||||
value = parseInt(value) - 139;
|
||||
} else if (value <= 250) {
|
||||
value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108;
|
||||
count++;
|
||||
} else if (value <= 254) {
|
||||
value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108;
|
||||
count++;
|
||||
} else {
|
||||
error("Two complement signed integers are ignored for the moment");
|
||||
}
|
||||
|
||||
charString.push(value);
|
||||
}
|
||||
|
||||
return charString;
|
||||
}
|
||||
|
||||
/*
|
||||
* The operand stack holds arbitrary PostScript objects that are the operands
|
||||
* and results of PostScript operators being executed. The interpreter pushes
|
||||
@ -63,7 +164,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
};
|
||||
|
||||
// Flag indicating if the topmost operand of the operandStack is an array
|
||||
var operandIsArray = false;
|
||||
var operandIsArray = 0;
|
||||
|
||||
/*
|
||||
* The dictionary stack holds only dictionary objects. The current set of
|
||||
@ -113,23 +214,31 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
*/
|
||||
var executionStack = [];
|
||||
|
||||
|
||||
/*
|
||||
* Parse a font file from the first segment to the last assuming the eexec
|
||||
* block is binary data.
|
||||
*
|
||||
* The method thrown an error if it encounters an unknown token.
|
||||
*/
|
||||
this.getObj = function() {
|
||||
var obj = lexer.getObj();
|
||||
|
||||
if (operandIsArray && !IsCmd(obj, "}") && !IsCmd(obj, "]")) {
|
||||
if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") &&
|
||||
!IsCmd(obj, "}") && !IsCmd(obj, "]")) {
|
||||
operandStack.peek().push(obj);
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "{") || IsCmd(obj, "[")) {
|
||||
dump("Start Array: " + obj);
|
||||
operandStack.push([]);
|
||||
operandIsArray = true;
|
||||
operandIsArray++;
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "}") || IsCmd(obj, "]")) {
|
||||
dump("End Array: " + obj);
|
||||
operandIsArray = false;
|
||||
operandIsArray--;
|
||||
this.getObj();
|
||||
} else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) {
|
||||
dump("Value: " + obj);
|
||||
//dump("Value: " + obj);
|
||||
operandStack.push(obj);
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "dup")) {
|
||||
@ -145,11 +254,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
operandStack.push(systemDict);
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") ||
|
||||
IsCmd(obj, "currentfile")) {
|
||||
IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) {
|
||||
// Do nothing for the moment
|
||||
this.getObj();
|
||||
} else if (IsName(obj)) {
|
||||
dump("Name: " + obj.name);
|
||||
//dump("Name: " + obj.name);
|
||||
operandStack.push(obj.name);
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "dict")) {
|
||||
@ -191,20 +300,32 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
||||
var size = operandStack.pop();
|
||||
var key = operandStack.pop();
|
||||
|
||||
var stream = lexer.stream.makeSubStream(lexer.stream.pos, size);
|
||||
// Add '1' because of the space separator, this is dirty
|
||||
var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size);
|
||||
lexer.stream.skip(size + 1);
|
||||
|
||||
var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join("");
|
||||
var charStream = new StringStream(charString);
|
||||
|
||||
// XXX do we want to store that on the top dictionary or somewhere else
|
||||
dictionaryStack.peek().set(key, new StringStream(charString));
|
||||
log (new StringStream(charString));
|
||||
dictionaryStack.peek().set(key, charStream);
|
||||
|
||||
var decodedCharString = decodeCharString(charStream);
|
||||
log(decodedCharString);
|
||||
|
||||
this.getObj();
|
||||
} else if (IsCmd(obj, "LenIV")) {
|
||||
error("LenIV: argh! we need to modify the length of discard characters for charStrings");
|
||||
} else {
|
||||
dump("Getting an unknow token, adding it to the stack just in case");
|
||||
dump(obj);
|
||||
operandStack.push(obj);
|
||||
} else if (IsCmd(obj, "closefile")) {
|
||||
// End of binary data;
|
||||
} else if (IsCmd(obj, "StandardEncoding")) {
|
||||
// For some reason the value is considered as a command, maybe it is
|
||||
// because of the uppercae 'S'
|
||||
operandStack.push(obj.cmd);
|
||||
this.getObj();
|
||||
} else {
|
||||
dump(obj);
|
||||
error("Unknow token while parsing font");
|
||||
}
|
||||
|
||||
return operandStack.peek();
|
||||
@ -215,22 +336,11 @@ var hack = false;
|
||||
|
||||
var Type1Font = function(aFontName, aFontFile) {
|
||||
// All Type1 font program should begin with the comment %!
|
||||
var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21;
|
||||
if (!validHeader)
|
||||
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
|
||||
error("Invalid file header");
|
||||
|
||||
var programType = "PS-AdobeFont";
|
||||
for (var i = 0; i < programType.length; i++)
|
||||
aFontFile.getChar();
|
||||
|
||||
// Ignore the '-' separator
|
||||
aFontFile.getChar();
|
||||
|
||||
var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar());
|
||||
|
||||
if (!hack) {
|
||||
log(aFontName);
|
||||
log("Version is: " + version);
|
||||
|
||||
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
|
||||
var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
|
||||
|
Loading…
x
Reference in New Issue
Block a user