Fix some bugs and add the beginning of a Type2 reader

This commit is contained in:
Vivien Nicolas 2011-06-08 17:26:29 +02:00
parent 4d261759d9
commit c098f0b31f
4 changed files with 828 additions and 9 deletions

View File

@ -17,6 +17,8 @@ var Base64Encoder = {
};
var TrueTypeFont = function(aFontName, aFontFile) {
if (Fonts.get(aFontName))
return;
@ -30,6 +32,7 @@ var TrueTypeFont = function(aFontName, aFontFile) {
document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0);
};
var Type1Parser = function(aAsciiStream, aBinaryStream) {
var lexer = new Lexer(aAsciiStream);
@ -211,6 +214,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
},
pop: function() {
if (!this.length)
throw new Error("stackunderflow");
return this.__innerStack__.pop();
},
@ -220,10 +225,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
return this.__innerStack__[this.__innerStack__.length - 1];
},
toString: function() {
dump: function() {
log("=== Start Dumping operandStack ===");
var str = [];
for (var i = 0; i < this.__innerStack__.length; i++)
for (var i = 0; i < this.length; i++)
log(this.__innerStack__[i]);
log("=== End Dumping operandStack ===");
},
@ -257,7 +262,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
},
pop: function() {
if (this.__innerStack__.length == 2)
if (this.__innerStack__.length == 3)
return null;
return this.__innerStack__.pop();
@ -275,7 +280,15 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
get length() {
return this.__innerStack__.length;
}
},
dump: function() {
log("=== Start Dumping dictionaryStack ===");
var str = [];
for (var i = 0; i < this.length; i++)
log(this.__innerStack__[i]);
log("=== End Dumping dictionaryStack ===");
},
};
/*
@ -433,10 +446,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var data = operandStack.pop();
var indexOrKey = operandStack.pop();
var object = operandStack.pop();
//dump("put " + data + " in " + object + "[" + indexOrKey + "]");
dump("put " + data + " in " + object + "[" + indexOrKey + "]");
object.set ? object.set(indexOrKey, data)
: object[indexOrKey] = data;
break;
case "pop":
@ -454,7 +466,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var indexOrKey = operandStack.pop();
var object = operandStack.pop();
var data = object.get ? object.get(indexOrKey) : object[indexOrKey];
dump("get " + obj + "[" + indexOrKey + "]: " + data);
dump("get " + object + "[" + indexOrKey + "]: " + data);
operandStack.push(data);
break;
@ -501,6 +513,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var value = operandStack.pop();
var key = operandStack.pop();
// XXX we don't want to do that here but for some reasons the names
// are different between what is declared and the FontName directive
if (key == "FontName" && Fonts.get(value)) {
// The font has already be decoded, stop!
return true;
@ -515,6 +529,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var key = operandStack.pop();
dump("definefont " + font + " with key: " + key);
Fonts.set(key, font);
operandStack.push(font);
break;
case "known":
@ -532,7 +547,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
case "eexec":
// All the first segment data has been read, decrypt the second segment
// and start interpreting it in order to decode it
var file = operandStack.pop();
var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join("");
dump(eexecString);
lexer = new Lexer(new StringStream(eexecString));
break;
@ -553,7 +570,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var newOperand = operandStack.peek();
for (var i = 0; i < operands.length; i++)
while (operands.length)
operandStack.push(operands.pop());
operandStack.push(newOperand);
@ -620,11 +637,14 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
};
var type1hack = false;
var Type1Font = function(aFontName, aFontFile) {
// All Type1 font program should begin with the comment %!
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
error("Invalid file header");
if (!type1hack) {
type1hack= true;
var start = Date.now();
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
@ -635,5 +655,248 @@ var Type1Font = function(aFontName, aFontFile) {
var end = Date.now();
//log("Time to parse font is:" + (end - start));
this.convert();
}
};
var hack = false;
Type1Font.prototype = {
convert: function() {
var fontName = "TACTGM+NimbusRomNo9L-Medi";
var fontData = null;
for (var font in Fonts.map) {
if (font == fontName) {
fontData = Fonts.get(font);
break;
}
}
if (!fontData || hack)
return;
hack = true;
var t1Only = [
"callothersubr",
"closepath",
"dotsection",
"hsbw",
"hstem3",
"pop",
"sbw",
"seac",
"setcurrentpoint",
"vstem3"
];
/*
* The sequence and form of a Type 2 charstring program may be
* represented as:
* w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar
*
*/
var t2CharStrings = new Dict();
var t1CharStrings = fontData.get("CharStrings");
for (var key in t1CharStrings.map) {
var font = t1CharStrings.get(key);
var t2font = [];
for (var i = 0; i < font.length; i++) {
var token = font[i];
switch (token) {
case "hsbw":
var width = t2font.pop();
var leftSidebearingPoint = t2font.pop();
font.push(width);
break;
default:
if (t1Only.indexOf(token) != -1) {
log(token + " need convert!\n");
throw new Error("Type1 Only token");
}
t2font.push(token);
break;
}
}
log(key + "::" + t1CharStrings.get(key));
log("type2::" + t2font);
}
}
};
function decodeType2DictData(aString, aDictionary) {
var data = [];
var value = "";
var count = aString.length;
for (var i = 0; i < count; i) {
value = aString[i++];
if (value < 0) {
continue;
} else if (value == 28) {
value = aString[i++] << 8 | aString[i++];
} else if (value == 29) {
value = aString[i++] << 24 |
aString[i++] << 16 |
aString[i++] << 8 |
aString[i++];
} else if (value < 32) {
if (value == 12) {
value = aDictionary["12"][aString[i++]];
} else {
value = aDictionary[value];
}
} else if (value <= 246) {
value = parseInt(value) - 139;
} else if (value <= 250) {
value = ((value - 247) * 256) + parseInt(aString[i++]) + 108;
} else if (value <= 254) {
value = -((value - 251) * 256) - parseInt(aString[i++]) - 108;
} else {
throw new Error("Value should not be 255");
}
data.push(value);
}
return data;
}
var Type2Parser = function(aFilePath) {
var font = new Dict();
// Turn on this flag for additional debugging logs
var debug = true;
function dump(aStr) {
if (debug)
log(aStr);
};
function readIndex(aStream, aIsByte) {
var count = aStream.getByte() + aStream.getByte();
var offsize = aStream.getByte();
var offsets = [];
for (var i = 0; i < count + 1; i++) {
var offset = 0;
for (var j = 0; j < offsize; j++) {
// XXX need to do some better code here
var byte = aStream.getByte();
offset += byte;
}
offsets.push(offset);
}
dump("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")");
var dataOffset = aStream.pos;
var objects = [];
for (var i = 0; i < count; i++) {
var offset = offsets[i];
aStream.pos = dataOffset + offset - 1;
var data = [];
var length = offsets[i + 1] - 1;
for (var j = offset - 1; j < length; j++)
data.push(aIsByte ? aStream.getByte() : aStream.getChar());
dump("object at offset " + offset + " is: " + data);
objects.push(data);
}
return objects;
};
function parseAsToken(aArray) {
var objects = [];
var count = aArray.length;
for (var i = 0; i < count; i++) {
var decoded = decodeType2DictData(aArray[i], CFFDictOps);
var stack = [];
var count = decoded.length;
for (var i = 0; i < count; i++) {
var token = decoded[i];
if (IsNum(token)) {
stack.push(token);
} else {
switch (token.operand) {
case "SID":
font.set(token.name, CFFStrings[stack.pop()]);
break;
case "number number":
font.set(token.name, {
size: stack.pop(),
offset: stack.pop()
});
break;
case "boolean":
font.set(token.name, stack.pop());
break;
case "delta":
font.set(token.name, stack.pop());
break;
default:
if (token.operand && token.operand.length) {
var array = [];
for (var j = 0; j < token.operand.length; j++)
array.push(stack.pop());
font.set(token.name, array);
} else {
font.set(token.name, stack.pop());
}
break;
}
}
}
}
return objects;
};
this.parse = function(aStream) {
font.set("major", aStream.getByte());
font.set("minor", aStream.getByte());
font.set("hdrSize", aStream.getByte());
font.set("offsize", aStream.getByte());
// Move the cursor after the header
aStream.skip(font.get("hdrSize") - aStream.pos);
// Read the NAME Index
dump("Reading Index: Names");
font.set("Names", readIndex(aStream));
dump(font.get("Names"));
// Read the Top Dict Index
dump("Reading Index: TopDict");
var topDict = readIndex(aStream, true);
// Read the String Index
dump("Reading Index: Strings");
var strings = readIndex(aStream);
// Fill up the Strings dictionary with the new unique strings
for (var i = 0; i < strings.length; i++)
CFFStrings.push(strings[i].join(""));
// Parse the TopDict operator
parseAsToken(topDict);
for (var p in font.map) {
log(p + "::" + font.get(p));
}
}
};
//
var xhr = new XMLHttpRequest();
xhr.open("GET", "titi.cff", false);
xhr.mozResponseType = xhr.responseType = "arraybuffer";
xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200;
xhr.send(null);
var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response;
var cff = new Type2Parser("titi.cff");
cff.parse(new Stream(cffData));

552
cffStandardStrings.js Normal file
View File

@ -0,0 +1,552 @@
var CFFStrings = [
".notdef",
"space",
"exclam",
"quotedbl",
"numbersign",
"dollar",
"percent",
"ampersand",
"quoteright",
"parenleft",
"parenright",
"asterisk",
"plus",
"comma",
"hyphen",
"period",
"slash",
"zero",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"colon",
"semicolon",
"less",
"equal",
"greater",
"question",
"at",
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"bracketleft",
"backslash",
"bracketright",
"asciicircum",
"underscore",
"quoteleft",
"95 asciitilde",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"braceleft",
"bar",
"braceright",
"asciitilde",
"exclamdown",
"cent",
"sterling",
"fraction",
"yen",
"florin",
"section",
"currency",
"quotesingle",
"quotedblleft",
"guillemotleft",
"guilsinglleft",
"guilsinglright",
"fi",
"fl",
"endash",
"dagger",
"daggerdbl",
"periodcentered",
"paragraph",
"bullet",
"quotesinglbase",
"quotedblbase",
"quotedblright",
"guillemotright",
"ellipsis",
"perthousand",
"questiondown",
"grave",
"acute",
"circumflex",
"tilde",
"macron",
"breve",
"dotaccent",
"dieresis",
"ring",
"cedilla",
"hungarumlaut",
"ogonek",
"caron",
"emdash",
"AE",
"ordfeminine",
"Lslash",
"Oslash",
"OE",
"ordmasculine",
"ae",
"dotlessi",
"lslash",
"oslash",
"oe",
"germandbls",
"onesuperior",
"logicalnot",
"mu",
"trademark",
"Eth",
"onehalf",
"plusminus",
"Thorn",
"onequarter",
"divide",
"brokenbar",
"degree",
"thorn",
"threequarters",
"twosuperior",
"registered",
"minus",
"eth",
"multiply",
"threesuperior",
"copyright",
"Aacute",
"Acircumflex",
"Adieresis",
"Agrave",
"Aring",
"Atilde",
"Ccedilla",
"Eacute",
"Ecircumflex",
"Edieresis",
"Egrave",
"Iacute",
"Icircumflex",
"Idieresis",
"Igrave",
"Ntilde",
"Oacute",
"Ocircumflex",
"Odieresis",
"Ograve",
"Otilde",
"Scaron",
"Uacute",
"Ucircumflex",
"Udieresis",
"Ugrave",
"Yacute",
"Ydieresis",
"Zcaron",
"aacute",
"acircumflex",
"adieresis",
"agrave",
"aring",
"atilde",
"ccedilla",
"eacute",
"ecircumflex",
"edieresis",
"egrave",
"iacute",
"icircumflex",
"idieresis",
"igrave",
"ntilde",
"oacute",
"ocircumflex",
"odieresis",
"ograve",
"otilde",
"scaron",
"uacute",
"ucircumflex",
"udieresis",
"ugrave",
"yacute",
"ydieresis",
"zcaron",
"exclamsmall",
"Hungarumlautsmall",
"dollaroldstyle",
"dollarsuperior",
"ampersandsmall",
"Acutesmall",
"parenleftsuperior",
"parenrightsuperior",
"266 ff",
"onedotenleader",
"zerooldstyle",
"oneoldstyle",
"twooldstyle",
"threeoldstyle",
"fouroldstyle",
"fiveoldstyle",
"sixoldstyle",
"sevenoldstyle",
"eightoldstyle",
"nineoldstyle",
"commasuperior",
"threequartersemdash",
"periodsuperior",
"questionsmall",
"asuperior",
"bsuperior",
"centsuperior",
"dsuperior",
"esuperior",
"isuperior",
"lsuperior",
"msuperior",
"nsuperior",
"osuperior",
"rsuperior",
"ssuperior",
"tsuperior",
"ff",
"ffi",
"ffl",
"parenleftinferior",
"parenrightinferior",
"Circumflexsmall",
"hyphensuperior",
"Gravesmall",
"Asmall",
"Bsmall",
"Csmall",
"Dsmall",
"Esmall",
"Fsmall",
"Gsmall",
"Hsmall",
"Ismall",
"Jsmall",
"Ksmall",
"Lsmall",
"Msmall",
"Nsmall",
"Osmall",
"Psmall",
"Qsmall",
"Rsmall",
"Ssmall",
"Tsmall",
"Usmall",
"Vsmall",
"Wsmall",
"Xsmall",
"Ysmall",
"Zsmall",
"colonmonetary",
"onefitted",
"rupiah",
"Tildesmall",
"exclamdownsmall",
"centoldstyle",
"Lslashsmall",
"Scaronsmall",
"Zcaronsmall",
"Dieresissmall",
"Brevesmall",
"Caronsmall",
"Dotaccentsmall",
"Macronsmall",
"figuredash",
"hypheninferior",
"Ogoneksmall",
"Ringsmall",
"Cedillasmall",
"questiondownsmall",
"oneeighth",
"threeeighths",
"fiveeighths",
"seveneighths",
"onethird",
"twothirds",
"zerosuperior",
"foursuperior",
"fivesuperior",
"sixsuperior",
"sevensuperior",
"eightsuperior",
"ninesuperior",
"zeroinferior",
"oneinferior",
"twoinferior",
"threeinferior",
"fourinferior",
"fiveinferior",
"sixinferior",
"seveninferior",
"eightinferior",
"nineinferior",
"centinferior",
"dollarinferior",
"periodinferior",
"commainferior",
"Agravesmall",
"Aacutesmall",
"Acircumflexsmall",
"Atildesmall",
"Adieresissmall",
"Aringsmall",
"AEsmall",
"Ccedillasmall",
"Egravesmall",
"Eacutesmall",
"Ecircumflexsmall",
"Edieresissmall",
"Igravesmall",
"Iacutesmall",
"Icircumflexsmall",
"Idieresissmall",
"Ethsmall",
"Ntildesmall",
"Ogravesmall",
"Oacutesmall",
"Ocircumflexsmall",
"Otildesmall",
"Odieresissmall",
"OEsmall",
"Oslashsmall",
"Ugravesmall",
"Uacutesmall",
"Ucircumflexsmall",
"Udieresissmall",
"Yacutesmall",
"Thornsmall",
"Ydieresissmall",
"001.000",
"001.001",
"001.002",
"001.003",
"Black",
"Bold",
"Book",
"Light",
"Medium",
"Regular",
"Roman",
"Semibold"
];
var CFFDictOps = {
"0": {
name: "version",
operand: "SID"
},
"1": {
name: "Notice",
operand: "SID"
},
"2": {
name: "FullName",
operand: "SID"
},
"3": {
name: "FamilyName",
operand: "SID"
},
"4": {
name: "Weight",
operand: "SID"
},
"5": {
name: "FontBBox",
operand: [0, 0, 0, 0]
},
"6": {
name: "BlueValues"
},
"7": {
name: "OtherBlues"
},
"8": {
name: "FamilyBlues"
},
"9": {
name: "FamilyOtherBlues"
},
"10": {
name: "StdHW"
},
"11": {
name: "StdVW"
},
"12": {
"0": {
name: "Copyright",
operand: "SID"
},
"1": {
name: "IsFixedPitch",
operand: false
},
"2": {
name: "ItalicAngle",
operand: 0
},
"3": {
name: "UnderlinePosition",
operand: -100
},
"4": {
name: "UnderlineThickness",
operand: 50
},
"5": {
name: "PaintType",
operand: 0
},
"6": {
name: "CharstringType",
operand: 2
},
"7": {
name: "FontMatrix",
operand: [0.001, 0, 0, 0.001, 0 ,0]
},
"8": {
name: "StrokeWidth",
operand: 0
},
"9": {
name: "BlueScale"
},
"10": {
name: "BlueShift"
},
"11": {
name: "BlueFuzz"
},
"12": {
name: "StemSnapH"
},
"13": {
name: "StemSnapV"
},
"14": {
name: "ForceBold"
},
"17": {
name: "LanguageGroup"
},
"18": {
name: "ExpansionFactor"
},
"9": {
name: "initialRandomSeed"
},
"20": {
name: "SyntheticBase",
operand: null
},
"21": {
name: "PostScript",
operand: "SID"
},
"22": {
name: "BaseFontName",
operand: "SID"
},
"23": {
name: "BaseFontBlend",
operand: "delta"
}
},
"13": {
name: "UniqueID",
operand: null
},
"14": {
name: "XUID",
operand: []
},
"15": {
name: "charset",
operand: 0
},
"16": {
name: "Encoding",
operand: 0
},
"17": {
name: "CharStrings",
operand: null
},
"18": {
name: "Private",
operand: "number number"
},
"19": {
name: "Subrs"
},
"20": {
name: "defaultWidthX"
},
"21": {
name: "nominalWidthX"
}
};

5
pdf.js
View File

@ -5,6 +5,7 @@ var ERRORS = 0, WARNINGS = 1, TODOS = 5;
var verbosity = WARNINGS;
function log(msg) {
msg = msg.toString ? msg.toString() : msg;
if (console && console.log)
console.log(msg);
else if (print)
@ -78,7 +79,7 @@ var Stream = (function() {
return ch;
},
skip: function(n) {
if (!n)
if (!n && !IsNum(n))
n = 1;
this.pos += n;
},
@ -2279,6 +2280,7 @@ var CanvasGraphics = (function() {
var subtype = font.get("Subtype").name;
switch (subtype) {
case "Type1":
break;
var fontDescriptor = font.get("FontDescriptor");
if (fontDescriptor.num) {
var fontDescriptor = this.xref.fetchIfRef(fontDescriptor);
@ -2292,6 +2294,7 @@ var CanvasGraphics = (function() {
break;
case "TrueType":
break;
var fontDescriptor = font.get("FontDescriptor");
if (fontDescriptor.num) {
var fontDescriptor = this.xref.fetchIfRef(fontDescriptor);

View File

@ -5,6 +5,7 @@
<script type="text/javascript" src="pdf.js"></script>
<script type="text/javascript" src="test.js"></script>
<script type="text/javascript" src="cffStandardStrings.js"></script>
<script type="text/javascript" src="PDFFont.js"></script>
</head>