From 74abf984d5823f56179ca8f266d3eb320003aeaf Mon Sep 17 00:00:00 2001
From: Vivien Nicolas <21@vingtetun.org>
Date: Fri, 10 Jun 2011 01:20:00 +0200
Subject: [PATCH] Add the beginning of a Type1 to Type2 charstring converter
---
PDFFont.js | 358 ++++++++++++++++++++++++++++++++++++++++++++++++-----
pdf.js | 2 +-
test.html | 1 +
3 files changed, 328 insertions(+), 33 deletions(-)
diff --git a/PDFFont.js b/PDFFont.js
index 084174dad..5685e6874 100644
--- a/PDFFont.js
+++ b/PDFFont.js
@@ -1,9 +1,15 @@
-
-/*
- * This dictionary hold the decoded fonts
+/**
+ * This dictionary holds decoded fonts data.
*/
var Fonts = new Dict();
+/**
+ * This simple object keep a trace of the fonts that have already been decoded
+ * by storing a map between the name given by the PDF and the name gather from
+ * the font (aka the PostScript code of the font itself for Type1 font).
+ */
+var _Fonts = {};
+
var Base64Encoder = {
encode: function(aData) {
@@ -16,12 +22,10 @@ var Base64Encoder = {
}
};
-
-
-
var TrueTypeFont = function(aFontName, aFontFile) {
- if (Fonts.get(aFontName))
+ if (_Fonts[aFontName])
return;
+ _Fonts[aFontName] = true;
//log("Loading a TrueType font: " + aFontName);
var fontData = Base64Encoder.encode(aFontFile);
@@ -36,7 +40,16 @@ var TrueTypeFont = function(aFontName, aFontFile) {
var Type1Parser = function(aAsciiStream, aBinaryStream) {
- var lexer = new Lexer(aAsciiStream);
+ if (IsStream(aAsciiStream)) {
+ var lexer = new Lexer(aAsciiStream);
+ } else {
+ var lexer = {
+ __data__: aAsciiStream.slice(),
+ getObj: function() {
+ return this.__data__.shift();
+ }
+ }
+ }
// Turn on this flag for additional debugging logs
var debug = false;
@@ -46,6 +59,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
log(aData);
};
+ // Hold the fontName as declared inside the /FontName postscript directive
+ // XXX This is a hack but at the moment I need it to map the name declared
+ // in the PDF and the name in the PS code.
+ var fontName = "";
+
/*
* Parse a whole Type1 font stream (from the first segment to the last)
* assuming the 'eexec' block is binary data and fill up the 'Fonts'
@@ -55,6 +73,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
this.parse = function() {
if (!debug) {
while (!processNextToken()) {};
+ return fontName;
} else {
// debug mode is used to debug postcript processing
setTimeout(function() {
@@ -62,7 +81,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
self.parse();
}, 0);
}
- }
+ };
/*
* Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
@@ -87,7 +106,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var end = Date.now();
dump("Time to decrypt string of length " + count + " is " + (end - start));
return decryptedString.slice(aDiscardNumber);
- }
+ };
/*
* CharStrings are encoded following the the CharString Encoding sequence
@@ -98,7 +117,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
* CharString Number Encoding:
* A CharString byte containing the values from 32 through 255 inclusive
* indicate an integer. These values are decoded in four ranges.
- *
+ *
* 1. A CharString byte containing a value, v, between 32 and 246 inclusive,
* indicate the integer v - 139. Thus, the integer values from -107 through
* 107 inclusive may be encoded in single byte.
@@ -110,7 +129,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
* 3. A CharString byte containing a value, v, between 251 and 254 inclusive,
* indicates an integer involving the next byte, w, according to the formula:
* -[(v - 251) * 256] - w - 108
- *
+ *
* 4. A CharString containing the value 255 indicates that the next 4 bytes
* are a two complement signed integer. The first of these bytes contains the
* highest order bits, the second byte contains the next higher order bits
@@ -157,7 +176,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
"31": "hcurveto"
};
- // XXX Is count++ the right thing to do? Is it not i++?
function decodeCharString(aStream) {
var start = Date.now();
var charString = [];
@@ -167,12 +185,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
for (var i = 0; i < count; i++) {
value = aStream.getByte();
- if (value < 0) {
- continue;
- } else if (value < 32) {
+ if (value < 32) {
if (value == 12) {
value = charStringDictionary["12"][aStream.getByte()];
- count++;
+ i++;
} else {
value = charStringDictionary[value];
}
@@ -180,16 +196,16 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
value = parseInt(value) - 139;
} else if (value <= 250) {
value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108;
- count++;
+ i++;
} else if (value <= 254) {
value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108;
- count++;
+ i++;
} else {
var byte = aStream.getByte();
var high = (byte >> 1);
value = (byte - high) << 24 | aStream.getByte() << 16 |
aStream.getByte() << 8 | aStream.getByte();
- count += 4;
+ i += 4;
}
charString.push(value);
@@ -228,6 +244,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
return this.__innerStack__[this.__innerStack__.length - 1];
},
+ get: function(aIndex) {
+ return this.__innerStack__[aIndex];
+ },
+
dump: function() {
log("=== Start Dumping operandStack ===");
var str = [];
@@ -345,7 +365,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
return lexer.getObj();
};
-
/*
* Get the next token from the executionStack and process it.
* Actually the function does not process the third segment of a Type1 font
@@ -531,7 +550,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var font = operandStack.pop();
var key = operandStack.pop();
dump("definefont " + font + " with key: " + key);
+
+ // The key will be the identifier to recognize this font
+ fontName = key;
Fonts.set(key, font);
+
operandStack.push(font);
break;
@@ -600,6 +623,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var decodedCharString = decodeCharString(charStream);
dump("decodedCharString: " + decodedCharString);
operandStack.push(decodedCharString);
+
// boolean indicating if the operation is a success or not
operandStack.push(true);
break;
@@ -630,36 +654,305 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
}
break;
}
- } else if (obj){
+ } else if (obj) {
dump("unknow: " + obj);
operandStack.push(obj);
+ } else { // The End!
+ operandStack.dump();
+ return true;
}
return false;
}
+
+ function aggregateCommand(aCommand) {
+ var command = aCommand;
+ switch (command) {
+ case "hstem":
+ case "vstem":
+ break;
+
+ case "rrcurveto":
+ var stack = [operandStack.pop(), operandStack.pop(),
+ operandStack.pop(), operandStack.pop(),
+ operandStack.pop(), operandStack.pop()];
+ var next = true;
+ while (next) {
+ var op = operandStack.peek();
+ if (op == "rrcurveto") {
+ operandStack.pop();
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ } else {
+ next = false;
+ }
+ }
+ break;
+
+ case "hlineto":
+ case "vlineto":
+ var last = command;
+ var stack = [operandStack.pop()];
+ var next = true;
+ while (next) {
+ var op = operandStack.peek();
+ if (op == "vlineto" && last == "hlineto") {
+ operandStack.pop();
+ stack.push(operandStack.pop());
+ } else if (op == "hlineto" && last == "vlineto") {
+ operandStack.pop();
+ stack.push(operandStack.pop());
+ } else if (op == "rlineto" && command == "hlineto") {
+ operandStack.pop();
+ var x = stack.pop();
+ operandStack.push(0);
+ operandStack.push(x);
+ command = "rlineto";
+ } else if (op == "rlineto" && command == "vlineto") {
+ operandStack.pop();
+ operandStack.push(0);
+ command = "rlineto";
+ } else {
+ next = false;
+ }
+ last = op;
+ }
+ break;
+
+ case "rlineto":
+ var stack = [operandStack.pop(), operandStack.pop()];
+ var next = true;
+ while (next) {
+ var op = operandStack.peek();
+ if (op == "rlineto") {
+ operandStack.pop();
+ stack.push(operandStack.pop());
+ stack.push(operandStack.pop());
+ } else if (op == "hlineto") {
+ operandStack.pop();
+ stack.push(0);
+ stack.push(operandStack.pop());
+ } else if (op == "vlineto") {
+ operandStack.pop();
+ stack.push(operandStack.pop());
+ stack.push(0);
+ } else {
+ next= false;
+ }
+ }
+ break;
+ }
+
+ while (stack.length)
+ operandStack.push(stack.pop());
+ operandStack.push(command);
+ };
+
+
+ /*
+ * Flatten the commands by interpreting the postscript code and replacing
+ * every 'callsubr', 'callothersubr' by the real commands.
+ * At the moment OtherSubrs are not fully supported and only otherSubrs 0-4
+ * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format',
+ * chapter 8.
+ */
+ this.flattenCharstring = function(aCharString, aDefaultWidth, aNominalWidth, aSubrs) {
+ var leftSidebearing = 0;
+ var lastPoint = 0;
+ while (true) {
+ var obj = nextInStack();
+ if (IsBool(obj) || IsInt(obj) || IsNum(obj)) {
+ dump("Value: " + obj);
+ operandStack.push(obj);
+ } else if (IsString(obj)) {
+ dump("String: " + obj);
+ switch (obj) {
+ case "hsbw":
+ var charWidthVector = operandStack.pop();
+ leftSidebearing = operandStack.pop();
+
+ if (charWidthVector != aDefaultWidth)
+ operandStack.push(charWidthVector - aNominalWidth);
+ break;
+
+ case "setcurrentpoint":
+ case "dotsection":
+ case "seac":
+ case "sbw":
+ error(obj + " parsing is not implemented (yet)");
+ break;
+
+ case "vstem3":
+ operandStack.push("vstem");
+ break;
+
+ case "vstem":
+ log(obj + " is not converted (yet?)");
+ operandStack.push("vstem");
+ break;
+
+ case "closepath":
+ case "return":
+ break;
+
+ case "hlineto":
+ case "vlineto":
+ case "rlineto":
+ case "rrcurveto":
+ aggregateCommand(obj);
+ break;
+
+ case "rmoveto":
+ var dy = operandStack.pop();
+ var dx = operandStack.pop();
+
+ if (leftSidebearing) {
+ dx += leftSidebearing;
+ leftSidebearing = 0;
+ }
+
+ operandStack.push(dx);
+ operandStack.push(dy);
+ operandStack.push("rmoveto");
+ break;
+
+ case "hstem":
+ case "hstem3":
+ var dy = operandStack.pop();
+ var y = operandStack.pop();
+ if (operandStack.peek() == "hstem" ||
+ operandStack.peek() == "hstem3")
+ operandStack.pop();
+
+ operandStack.push(y - lastPoint);
+ lastPoint = y + dy;
+
+ operandStack.push(dy);
+ operandStack.push("hstem");
+ break;
+
+ case "callsubr":
+ var index = operandStack.pop();
+ executionStack.push(aSubrs[index].slice());
+ break;
+
+ case "callothersubr":
+ log("callothersubr");
+ // XXX need to be improved
+ var index = operandStack.pop();
+ var count = operandStack.pop();
+ var data = operandStack.pop();
+ operandStack.push(3);
+ operandStack.push("callothersubr");
+ break;
+ case "endchar":
+ operandStack.push("endchar");
+ return operandStack.__innerStack__.slice();
+ case "pop":
+ operandStack.pop();
+ break;
+ default:
+ operandStack.push(obj);
+ break;
+ }
+ }
+ }
+ }
};
var type1hack = false;
var Type1Font = function(aFontName, aFontFile) {
+ if (_Fonts[aFontName])
+ return;
+ _Fonts[aFontName] = true;
+
// All Type1 font program should begin with the comment %!
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
error("Invalid file header");
if (!type1hack) {
- type1hack= true;
- var start = Date.now();
+ type1hack = true;
+ var start = Date.now();
- var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
- var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
+ var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
+ var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
- this.parser = new Type1Parser(ASCIIStream, binaryStream);
- this.parser.parse();
+ this.parser = new Type1Parser(ASCIIStream, binaryStream);
+ var fontName = this.parser.parse();
+ this.convertToOTF(fontName);
+ }
+};
- var end = Date.now();
- //log("Time to parse font is:" + (end - start));
+Type1Font.prototype = {
+ convertToOTF: function(aFontName) {
+ var font = Fonts.get(aFontName);
- this.convert();
+ var private = font.get("Private");
+ var subrs = private.get("Subrs");
+ var otherSubrs = private.get("OtherSubrs");
+ var charstrings = font.get("CharStrings")
+
+ // Try to get the most used glyph width
+ var widths = {};
+ for (var glyph in charstrings.map) {
+ var glyphData = charstrings.get(glyph);
+ var glyphWidth = glyphData[1];
+ if (widths[glyphWidth])
+ widths[glyphWidth]++;
+ else
+ widths[glyphWidth] = 1;
+ }
+
+ var defaultWidth = 0;
+ var used = 0;
+ for (var width in widths) {
+ if (widths[width] > used) {
+ defaultWidth = width;
+ used = widths[width];
+ }
+ }
+ log("defaultWidth to used: " + defaultWidth);
+
+ var maxNegDistance = 0;
+ var maxPosDistance = 0;
+ for (var width in widths) {
+ var diff = width - defaultWidth;
+ if (diff < 0 && diff < maxNegDistance) {
+ maxNegDistance = diff;
+ } else if (diff > 0 && diff > maxPosDistance) {
+ maxPosDistance = diff;
+ }
+ }
+
+ var nominalWidth = parseInt(defaultWidth) + (parseInt(maxPosDistance) + parseInt(maxNegDistance)) / 2;
+ log("nominalWidth to used: " + nominalWidth);
+ log("Hack nonimal:" + (nominalWidth = 615));
+
+ for (var glyph in charstrings.map) {
+ if (glyph == ".notdef")
+ continue;
+
+ var glyphData = charstrings.get(glyph);
+ var parser = new Type1Parser(glyphData);
+ log("=================================== " + glyph + " ==============================");
+ log(charstrings.get(glyph));
+ log(parser.flattenCharstring("A", defaultWidth, nominalWidth, subrs));
+ log(validationData[glyph]);
+ }
+
+
+ /*
+ log(charStrings.get("A"));
+ log(newCharStrings.get("A"));
+ log(validationData["A"]);
+ */
+ var end = Date.now();
+ //log("Time to parse font is:" + (end - start));
}
};
@@ -1016,6 +1309,7 @@ var Type2Parser = function(aFilePath) {
// XXX
+/*
var xhr = new XMLHttpRequest();
xhr.open("GET", "titi.cff", false);
xhr.mozResponseType = xhr.responseType = "arraybuffer";
@@ -1025,4 +1319,4 @@ var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response;
var cff = new Type2Parser("titi.cff");
cff.parse(new Stream(cffData));
-
+*/
diff --git a/pdf.js b/pdf.js
index ea6a62f57..ef8a18861 100644
--- a/pdf.js
+++ b/pdf.js
@@ -2280,9 +2280,9 @@ var CanvasGraphics = (function() {
var subtype = font.get("Subtype").name;
switch (subtype) {
case "Type1":
- break;
var fontDescriptor = font.get("FontDescriptor");
if (fontDescriptor.num) {
+ // XXX fetchIfRef looks expensive
var fontDescriptor = this.xref.fetchIfRef(fontDescriptor);
var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile"));
font = new Type1Font(fontDescriptor.get("FontName").name, fontFile);
diff --git a/test.html b/test.html
index 5bd0ea119..83d48741e 100644
--- a/test.html
+++ b/test.html
@@ -6,6 +6,7 @@
+