Decode charStrings and stop the interpreter on every unknow token
This commit is contained in:
parent
3064305d91
commit
e936f305d7
166
PDFFont.js
166
PDFFont.js
@ -20,10 +20,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
|
|
||||||
function decrypt(aStream, aKey, aDiscardNumber) {
|
function decrypt(aStream, aKey, aDiscardNumber) {
|
||||||
var r = aKey, c1 = 52845, c2 = 22719;
|
var r = aKey, c1 = 52845, c2 = 22719;
|
||||||
|
|
||||||
var decryptedString = [];
|
var decryptedString = [];
|
||||||
var value = null;
|
|
||||||
|
|
||||||
|
var value = "";
|
||||||
var count = aStream.length;
|
var count = aStream.length;
|
||||||
for (var i = 0; i < count; i++) {
|
for (var i = 0; i < count; i++) {
|
||||||
value = aStream.getByte();
|
value = aStream.getByte();
|
||||||
@ -33,6 +32,108 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
return decryptedString.slice(aDiscardNumber);
|
return decryptedString.slice(aDiscardNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CharStrings are encoded following the the CharString Encoding sequence
|
||||||
|
* describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
|
||||||
|
* The value in a byte indicates a command, a number, or subsequent bytes
|
||||||
|
* that are to be interpreted in a special way.
|
||||||
|
*
|
||||||
|
* CharString Number Encoding:
|
||||||
|
* A CharString byte containing the values from 32 through 255 inclusive
|
||||||
|
* indicate an integer. These values are decoded in four ranges.
|
||||||
|
*
|
||||||
|
* 1. A CharString byte containing a value, v, between 32 and 246 inclusive,
|
||||||
|
* indicate the integer v - 139. Thus, the integer values from -107 through
|
||||||
|
* 107 inclusive may be encoded in single byte.
|
||||||
|
*
|
||||||
|
* 2. A CharString byte containing a value, v, between 247 and 250 inclusive,
|
||||||
|
* indicates an integer involving the next byte, w, according to the formula:
|
||||||
|
* [(v - 247) x 256] + w + 108
|
||||||
|
*
|
||||||
|
* 3. A CharString byte containing a value, v, between 251 and 254 inclusive,
|
||||||
|
* indicates an integer involving the next byte, w, according to the formula:
|
||||||
|
* -[(v - 251) * 256] - w - 108
|
||||||
|
*
|
||||||
|
* 4. A CharString containing the value 255 indicates that the next 4 bytes
|
||||||
|
* are a two complement signed integer. The first of these bytes contains the
|
||||||
|
* highest order bits, the second byte contains the next higher order bits
|
||||||
|
* and the fourth byte contain the lowest order bits.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* CharString Command Encoding:
|
||||||
|
* CharStrings commands are encoded in 1 or 2 bytes.
|
||||||
|
*
|
||||||
|
* Single byte commands are encoded in 1 byte that contains a value between
|
||||||
|
* 0 and 31 inclusive.
|
||||||
|
* If a command byte contains the value 12, then the value in the next byte
|
||||||
|
* indicates a command. This "escape" mechanism allows many extra commands
|
||||||
|
* to be encoded and this encoding technique helps to minimize the length of
|
||||||
|
* the charStrings.
|
||||||
|
*/
|
||||||
|
function decodeCharString(aStream) {
|
||||||
|
var charString = [];
|
||||||
|
var cmd = {
|
||||||
|
"1": "hstem",
|
||||||
|
"3": "vstem",
|
||||||
|
"4": "vmoveto",
|
||||||
|
"5": "rlineto",
|
||||||
|
"6": "hlineto",
|
||||||
|
"7": "vlineto",
|
||||||
|
"8": "rrcurveto",
|
||||||
|
"9": "closepath",
|
||||||
|
"10": "callsubr",
|
||||||
|
"11": "return",
|
||||||
|
"12": {
|
||||||
|
"0": "dotsection",
|
||||||
|
"1": "vstem3",
|
||||||
|
"3": "hstem3",
|
||||||
|
"6": "seac",
|
||||||
|
"7": "sbw",
|
||||||
|
"12": "div",
|
||||||
|
"16": "callothersubr",
|
||||||
|
"17": "pop",
|
||||||
|
"33": "setcurrentpoint"
|
||||||
|
},
|
||||||
|
"13": "hsbw",
|
||||||
|
"14": "endchar",
|
||||||
|
"21": "rmoveto",
|
||||||
|
"22": "hmoveto",
|
||||||
|
"30": "vhcurveto",
|
||||||
|
"31": "hcurveto"
|
||||||
|
}
|
||||||
|
|
||||||
|
var value = "";
|
||||||
|
var count = aStream.length;
|
||||||
|
for (var i = 0; i < count; i++) {
|
||||||
|
value = aStream.getByte();
|
||||||
|
|
||||||
|
if (value < 0) {
|
||||||
|
continue;
|
||||||
|
} else if (value < 32) {
|
||||||
|
if (value == 12) {
|
||||||
|
value = cmd["12"][aStream.getByte()];
|
||||||
|
count++;
|
||||||
|
} else {
|
||||||
|
value = cmd[value];
|
||||||
|
}
|
||||||
|
} else if (value <= 246) {
|
||||||
|
value = parseInt(value) - 139;
|
||||||
|
} else if (value <= 250) {
|
||||||
|
value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108;
|
||||||
|
count++;
|
||||||
|
} else if (value <= 254) {
|
||||||
|
value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108;
|
||||||
|
count++;
|
||||||
|
} else {
|
||||||
|
error("Two complement signed integers are ignored for the moment");
|
||||||
|
}
|
||||||
|
|
||||||
|
charString.push(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return charString;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The operand stack holds arbitrary PostScript objects that are the operands
|
* The operand stack holds arbitrary PostScript objects that are the operands
|
||||||
* and results of PostScript operators being executed. The interpreter pushes
|
* and results of PostScript operators being executed. The interpreter pushes
|
||||||
@ -63,7 +164,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Flag indicating if the topmost operand of the operandStack is an array
|
// Flag indicating if the topmost operand of the operandStack is an array
|
||||||
var operandIsArray = false;
|
var operandIsArray = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The dictionary stack holds only dictionary objects. The current set of
|
* The dictionary stack holds only dictionary objects. The current set of
|
||||||
@ -113,23 +214,31 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
*/
|
*/
|
||||||
var executionStack = [];
|
var executionStack = [];
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse a font file from the first segment to the last assuming the eexec
|
||||||
|
* block is binary data.
|
||||||
|
*
|
||||||
|
* The method thrown an error if it encounters an unknown token.
|
||||||
|
*/
|
||||||
this.getObj = function() {
|
this.getObj = function() {
|
||||||
var obj = lexer.getObj();
|
var obj = lexer.getObj();
|
||||||
|
|
||||||
if (operandIsArray && !IsCmd(obj, "}") && !IsCmd(obj, "]")) {
|
if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") &&
|
||||||
|
!IsCmd(obj, "}") && !IsCmd(obj, "]")) {
|
||||||
operandStack.peek().push(obj);
|
operandStack.peek().push(obj);
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "{") || IsCmd(obj, "[")) {
|
} else if (IsCmd(obj, "{") || IsCmd(obj, "[")) {
|
||||||
dump("Start Array: " + obj);
|
dump("Start Array: " + obj);
|
||||||
operandStack.push([]);
|
operandStack.push([]);
|
||||||
operandIsArray = true;
|
operandIsArray++;
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "}") || IsCmd(obj, "]")) {
|
} else if (IsCmd(obj, "}") || IsCmd(obj, "]")) {
|
||||||
dump("End Array: " + obj);
|
dump("End Array: " + obj);
|
||||||
operandIsArray = false;
|
operandIsArray--;
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) {
|
} else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) {
|
||||||
dump("Value: " + obj);
|
//dump("Value: " + obj);
|
||||||
operandStack.push(obj);
|
operandStack.push(obj);
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "dup")) {
|
} else if (IsCmd(obj, "dup")) {
|
||||||
@ -145,11 +254,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
operandStack.push(systemDict);
|
operandStack.push(systemDict);
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") ||
|
} else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") ||
|
||||||
IsCmd(obj, "currentfile")) {
|
IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) {
|
||||||
// Do nothing for the moment
|
// Do nothing for the moment
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsName(obj)) {
|
} else if (IsName(obj)) {
|
||||||
dump("Name: " + obj.name);
|
//dump("Name: " + obj.name);
|
||||||
operandStack.push(obj.name);
|
operandStack.push(obj.name);
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "dict")) {
|
} else if (IsCmd(obj, "dict")) {
|
||||||
@ -191,20 +300,32 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
|
|||||||
var size = operandStack.pop();
|
var size = operandStack.pop();
|
||||||
var key = operandStack.pop();
|
var key = operandStack.pop();
|
||||||
|
|
||||||
var stream = lexer.stream.makeSubStream(lexer.stream.pos, size);
|
// Add '1' because of the space separator, this is dirty
|
||||||
|
var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size);
|
||||||
|
lexer.stream.skip(size + 1);
|
||||||
|
|
||||||
var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join("");
|
var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join("");
|
||||||
|
var charStream = new StringStream(charString);
|
||||||
|
|
||||||
// XXX do we want to store that on the top dictionary or somewhere else
|
// XXX do we want to store that on the top dictionary or somewhere else
|
||||||
dictionaryStack.peek().set(key, new StringStream(charString));
|
dictionaryStack.peek().set(key, charStream);
|
||||||
log (new StringStream(charString));
|
|
||||||
|
var decodedCharString = decodeCharString(charStream);
|
||||||
|
log(decodedCharString);
|
||||||
|
|
||||||
this.getObj();
|
this.getObj();
|
||||||
} else if (IsCmd(obj, "LenIV")) {
|
} else if (IsCmd(obj, "LenIV")) {
|
||||||
error("LenIV: argh! we need to modify the length of discard characters for charStrings");
|
error("LenIV: argh! we need to modify the length of discard characters for charStrings");
|
||||||
} else {
|
} else if (IsCmd(obj, "closefile")) {
|
||||||
dump("Getting an unknow token, adding it to the stack just in case");
|
// End of binary data;
|
||||||
dump(obj);
|
} else if (IsCmd(obj, "StandardEncoding")) {
|
||||||
operandStack.push(obj);
|
// For some reason the value is considered as a command, maybe it is
|
||||||
|
// because of the uppercae 'S'
|
||||||
|
operandStack.push(obj.cmd);
|
||||||
this.getObj();
|
this.getObj();
|
||||||
|
} else {
|
||||||
|
dump(obj);
|
||||||
|
error("Unknow token while parsing font");
|
||||||
}
|
}
|
||||||
|
|
||||||
return operandStack.peek();
|
return operandStack.peek();
|
||||||
@ -215,22 +336,11 @@ var hack = false;
|
|||||||
|
|
||||||
var Type1Font = function(aFontName, aFontFile) {
|
var Type1Font = function(aFontName, aFontFile) {
|
||||||
// All Type1 font program should begin with the comment %!
|
// All Type1 font program should begin with the comment %!
|
||||||
var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21;
|
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
|
||||||
if (!validHeader)
|
|
||||||
error("Invalid file header");
|
error("Invalid file header");
|
||||||
|
|
||||||
var programType = "PS-AdobeFont";
|
|
||||||
for (var i = 0; i < programType.length; i++)
|
|
||||||
aFontFile.getChar();
|
|
||||||
|
|
||||||
// Ignore the '-' separator
|
|
||||||
aFontFile.getChar();
|
|
||||||
|
|
||||||
var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar());
|
|
||||||
|
|
||||||
if (!hack) {
|
if (!hack) {
|
||||||
log(aFontName);
|
log(aFontName);
|
||||||
log("Version is: " + version);
|
|
||||||
|
|
||||||
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
|
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
|
||||||
var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
|
var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user