From c76e2c511421010a6e6dcc86a278f6afe71c1cfc Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 26 May 2011 16:02:52 +0200 Subject: [PATCH 01/72] Separate js and css from the test.html file --- test.css | 35 ++++++++++++++ test.html | 141 ++++++++---------------------------------------------- test.js | 66 +++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 120 deletions(-) create mode 100644 test.css create mode 100644 test.js diff --git a/test.css b/test.css new file mode 100644 index 000000000..0a3449ae9 --- /dev/null +++ b/test.css @@ -0,0 +1,35 @@ +/* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- / +/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ + +body { + margin: 6px; + padding: 0px; + background-color: #c0bdb7; +} + +#controls { + position:fixed; + left: 0px; + top: 0px; + width: 100%; + padding: 7px; + border-bottom: 1px solid black; + background-color: rgb(242, 240, 238); +} + +span#info { + float: right; + font: 14px sans-serif; + margin-right: 10px; +} + +#viewer { + margin: auto; + border: 1px solid black; + width: 8.5in; + height: 11in; +} + +#pageNumber { + text-align: right; +} diff --git a/test.html b/test.html index 10a35e163..f14533add 100644 --- a/test.html +++ b/test.html @@ -1,125 +1,26 @@ - - Simple pdf.js page viewer - - + + Simple pdf.js page viewer + - + + -function open(url) { - document.title = url; - req = new XMLHttpRequest(); - req.open("GET", url); - req.mozResponseType = req.responseType = "arraybuffer"; - req.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; - req.onreadystatechange = xhrstate; - req.send(null); -} - -function xhrstate() { - if (req.readyState == 4 && req.status == req.expected) { - var data = req.mozResponseArrayBuffer || - req.mozResponse || - req.responseArrayBuffer || - req.response; - pdf = new PDFDoc(new Stream(data)); - numPages = pdf.numPages; - displayPage(1); - } -} - -function displayPage(num) { - pageDisplay.value = num; - - var t0 = Date.now(); - - var page = pdf.getPage(pageNum = num); - - var t1 = Date.now(); - - var ctx = canvas.getContext("2d"); - ctx.save(); - ctx.fillStyle = "rgb(255, 255, 255)"; - ctx.fillRect(0, 0, canvas.width, canvas.height); - ctx.restore(); - - var gfx = new CanvasGraphics(ctx); - page.display(gfx); - - var t2 = Date.now(); - - infoDisplay.innerHTML = "Time to render: "+ (t1 - t0) + "/" + (t2 - t1) + " ms"; -} - -function nextPage() { - if (pageNum < numPages) - ++pageNum; - displayPage(pageNum); -} - -function prevPage() { - if (pageNum > 1) - --pageNum; - displayPage(pageNum); -} -function gotoPage(num) { - if (0 <= num && num <= numPages) - pageNum = num; - displayPage(pageNum); -} - - - - -
- - - - -
-
- - -
- + +
+ + + + +
+
+ + +
+ + diff --git a/test.js b/test.js new file mode 100644 index 000000000..ef586cd34 --- /dev/null +++ b/test.js @@ -0,0 +1,66 @@ +/* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- / +/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ + +var pdfDocument, canvas, pageDisplay, pageNum; +function load() { + canvas = document.getElementById("canvas"); + canvas.mozOpaque = true; + open("uncompressed.tracemonkey-pldi-09.pdf"); +} + +function open(url) { + document.title = url; + req = new XMLHttpRequest(); + req.open("GET", url); + req.mozResponseType = req.responseType = "arraybuffer"; + req.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; + req.onreadystatechange = function() { + if (req.readyState == 4 && req.status == req.expected) { + var data = req.mozResponseArrayBuffer || req.mozResponse || + req.responseArrayBuffer || req.response; + pdfDocument = new PDFDoc(new Stream(data)); + displayPage(1); + } + }; + req.send(null); +} + +function displayPage(num) { + document.getElementById("pageNumber").value = num; + + var t0 = Date.now(); + + var page = pdfDocument.getPage(pageNum = num); + + var t1 = Date.now(); + + var ctx = canvas.getContext("2d"); + ctx.save(); + ctx.fillStyle = "rgb(255, 255, 255)"; + ctx.fillRect(0, 0, canvas.width, canvas.height); + ctx.restore(); + + var gfx = new CanvasGraphics(ctx); + page.display(gfx); + + var t2 = Date.now(); + + var infoDisplay = document.getElementById("info"); + infoDisplay.innerHTML = "Time to render: "+ (t1 - t0) + "/" + (t2 - t1) + " ms"; +} + +function nextPage() { + if (pageNum < pdfDocument.numPages) + displayPage(++pageNum); +} + +function prevPage() { + if (pageNum > 1) + displayPage(--pageNum); +} + +function gotoPage(num) { + if (0 <= num && num <= numPages) + displayPage(pageNum = num); +} + From fe11af4b57cbe77a6b70b09db782d54c0892d93d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 31 May 2011 20:22:05 +0200 Subject: [PATCH 02/72] Add a the beggining of a Type1 font reader --- PDFFont.js | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++++ pdf.js | 36 ++++++++- test.html | 1 + 3 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 PDFFont.js diff --git a/PDFFont.js b/PDFFont.js new file mode 100644 index 000000000..8328c8a59 --- /dev/null +++ b/PDFFont.js @@ -0,0 +1,226 @@ + + +var Type1Parser = function(aLexer) { + var lexer = aLexer; + + + /* + * The operand stack holds arbitrary PostScript objects that are the operands + * and results of PostScript operators being executed. The interpreter pushes + * objects on the operand stack when it encounters them as literal data in a + * program being executed. When an operator requires one or more operands, it + * obtains them by popping them off the top of the operand stack. When an + * operator returns one or more results, it does so by pushing them on the + * operand stack. + */ + var operandStack = []; + + + /* + * The dictionary stack holds only dictionary objects. The current set of + * dictionaries on the dictionary stack defines the environment for all + * implicit name searches, such as those that occur when the interpreter + * encounters an executable name. The role of the dictionary stack is + * introduced in Section 3.3, “Data Types and Objects,” and is further + * explained in Section 3.5, “Execution.” of the PostScript Language + * Reference. + */ + var systemDict = new Dict(), + globalDict = new Dict(), + userDict = new Dict(); + + + var dictionaryStack = { + __innerStack__: [systemDict, globalDict], + + push: function(aDictionary) { + this.__innerStack__.push(aDictionary); + }, + + pop: function() { + if (this.__innerStack__.length == 2) + return null; + + return this.__innerStack__.pop(); + }, + + peek: function() { + return this.__innerStack__[this.__innerStack__.length - 1]; + } + } + var currentDict = dictionaryStack.peek(); + + + /* + * The execution stack holds executable objects (mainly procedures and files) + * that are in intermediate stages of execution. At any point in the + * execution of a PostScript program, this stack represents the program’s + * call stack. Whenever the interpreter suspends execution of an object to + * execute some other object, it pushes the new object on the execution + * stack. When the interpreter finishes executing an object, it pops that + * object off the execution stack and resumes executing the suspended object + * beneath it. + */ + var isExecutionStack = false; + var executionStack = []; + + /* Stub to inhibit the logs */ + + this.getObj = function() { + var obj = lexer.getObj(); + if (isExecutionStack && !IsCmd(obj, "}") && !IsCmd(obj, "]")) { + executionStack.push(obj); + this.getObj(); + } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { + log("Value: " + obj); + operandStack.push(obj); + this.getObj(); + } else if (IsCmd(obj, "dup") || IsCmd(obj, "readonly") || + IsCmd(obj, "currentdict") || IsCmd(obj, "currentfile")) { + // Do nothing for the moment + this.getObj(); + } else if (IsName(obj)) { + log("Name: " + obj.name); + operandStack.push(obj.name); + this.getObj(); + } else if (IsCmd(obj, "dict")) { + log("Dict: " + obj); + + // XXX handling of dictionary is wrong here + var size = operandStack.pop(); + var name = operandStack.pop(); + if (!name) { + log ("Creating the global dict"); + currentDict = dictionaryStack.peek(); + } else { + var dict = new Dict(); + log("Assign name: " + name + " for the dictionary"); + currentDict.set(name, dict); + dictionaryStack.push(dict); + } + + this.getObj(); + } else if (IsCmd(obj, "begin")) { + log("begin a dictionary"); + currentDict = dictionaryStack.peek(); + this.getObj(); + } else if (IsCmd(obj, "end")) { + log("Ending a dictionary"); + dictionaryStack.pop(); + currentDict = dictionaryStack.peek(); + this.getObj(); + } else if (IsCmd(obj, "def")) { + if (executionStack.length) { + var value = []; + while (executionStack.length) + value.push(executionStack.shift()); + } else { + var value = operandStack.pop(); + } + + var key = operandStack.pop(); + // XXX this happen because of a bad way to handle dictionary + if (key) { + log("def: " + key + " = " + value); + currentDict.set(key, value); + } + this.getObj(); + } else if (IsCmd(obj, "{")) { + log("Start Proc: " + obj); + executionStack = []; + isExecutionStack = true; + this.getObj(); + } else if (IsCmd(obj, "}")) { + log("End Proc: " + obj); + isExecutionStack = false; + this.getObj(); + } else if (IsCmd(obj, "[")) { + isExecutionStack = true; + executionStack = []; + this.getObj(); + log("Start array: " + obj); + } else if (IsCmd(obj, "]")) { + log("End array: " + obj); + isExecutionStack = false; + this.getObj(); + } else if (IsCmd(obj, "eexec")) { + return; // end of the ASCII header + } else { + log("Getting an unknow token, adding it to the stack just in case"); + log(obj); + operandStack.push(obj); + this.getObj(); + } + return currentDict; + } +}; + +var hack = false; + +var Type1Font = function(aFontName, aFontFile) { + // All type1 font program should begin with the comment %! + var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21; + if (!validHeader) + error("Invalid file header"); + + var programType = "PS-AdobeFont"; + for (var i = 0; i< programType.length; i++) + aFontFile.getChar(); + + // Ignore the '-' separator + aFontFile.getChar(); + + var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar()); + + if (!hack) { + log(aFontName); + log("Version is: " + version); + + var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); + this.parser = new Type1Parser(new Lexer(ASCIIStream)); + + var fontDictionary = this.parser.getObj(); + log(fontDictionary + "\t" + + "fontInfo: " + fontDictionary.get("FontInfo") + "\t" + + "charStrings: " + fontDictionary.get("charStrings")); + + var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); + function decrypt(aBinaryStream, aKey) { + var c1 = 52845, c2 = 22719; + var R = aKey; + + var streamSize = aBinaryStream.length; + var decryptedString = []; + + var value = null; + for (var i = 0; i < streamSize; i++) { + value = aBinaryStream.getByte(); + decryptedString[i] = String.fromCharCode(value ^ (R >> 8)); + R = ((value + R) * c1 + c2) & ((1 << 16) - 1); + } + return decryptedString.slice(4); + } + + var eexecString = decrypt(binaryStream, 55665).join(""); + log(eexecString); + + TODO("decrypt charStrings data with the key 4330"); + hack = true; + } + + + this.info = {}; + this.name = aFontName; + this.encoding = []; + this.paintType = 0; + this.fontType = 0; + this.fontMatrix = []; + this.fontBBox = []; + this.uniqueID = 0; + this.metrics = {}; + this.strokeWidth = 0.0; + this.private = {}; + this.charStrings = {} + this.FID = 0; +}; + diff --git a/pdf.js b/pdf.js index 0628cfe32..8caac0362 100644 --- a/pdf.js +++ b/pdf.js @@ -1032,6 +1032,12 @@ var Dict = (function() { }, set: function(key, value) { this.map[key] = value; + }, + toString: function() { + var keys = []; + for (var key in this.map) + keys.push(key); + return "Dict with " + keys.length + " keys: " + keys; } }; @@ -1360,10 +1366,11 @@ var Lexer = (function() { stream.skip(); return new Cmd(">>"); } + case "{": + case "}": + return new Cmd(ch); // fall through case ')': - case '{': - case '}': error("Illegal character"); return Error; } @@ -2258,11 +2265,34 @@ var CanvasGraphics = (function() { }, setFont: function(fontRef, size) { var font = this.res.get("Font").get(fontRef.name); + font = this.xref.fetchIfRef(font); if (!font) return; + + var fontName = "Nimbus Roman No9 L"; + var subtype = font.get("Subtype").name; + switch (subtype) { + case "Type1": + var fontDescriptor = font.get("FontDescriptor"); + if (fontDescriptor.num) { + var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); + var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); + font = new Type1Font(fontDescriptor.get("FontName").name, fontFile); + } + break; + + case "Type3": + TODO("support Type3 font"); + break; + + default: + error("Unsupported font type: " + subtype); + break; + } + this.current.fontSize = size; TODO("using hard-coded font for testing"); - this.ctx.font = this.current.fontSize +'px "Nimbus Roman No9 L"'; + this.ctx.font = this.current.fontSize +'px "' + fontName + '"'; }, moveText: function (x, y) { this.current.x = this.current.lineX += x; diff --git a/test.html b/test.html index f14533add..ac568473a 100644 --- a/test.html +++ b/test.html @@ -5,6 +5,7 @@ + From 3064305d91451d3e04d5784a29ecbbb33bfc25a7 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 1 Jun 2011 11:52:10 +0200 Subject: [PATCH 03/72] Prepare the way to decode Type1 charStrings --- PDFFont.js | 232 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 99 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 8328c8a59..534817c94 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,10 +1,39 @@ +var Type1Parser = function(aAsciiStream, aBinaryStream) { + var lexer = new Lexer(aAsciiStream); -var Type1Parser = function(aLexer) { - var lexer = aLexer; + // Turn on this flag for additional debugging logs + var debug = false; - - /* + var dump = function(aData) { + if (debug) + log(aData); + }; + + /* + * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence + * of Plaintext Bytes. The function took a key as a parameter which can be + * for decrypting the eexec block of for decoding charStrings. + */ + var kEexecEncryptionKey = 55665; + var kCharStringsEncryptionKey = 4330; + + function decrypt(aStream, aKey, aDiscardNumber) { + var r = aKey, c1 = 52845, c2 = 22719; + + var decryptedString = []; + var value = null; + + var count = aStream.length; + for (var i = 0; i < count; i++) { + value = aStream.getByte(); + decryptedString[i] = String.fromCharCode(value ^ (r >> 8)); + r = ((value + r) * c1 + c2) & ((1 << 16) - 1); + } + return decryptedString.slice(aDiscardNumber); + } + + /* * The operand stack holds arbitrary PostScript objects that are the operands * and results of PostScript operators being executed. The interpreter pushes * objects on the operand stack when it encounters them as literal data in a @@ -13,8 +42,28 @@ var Type1Parser = function(aLexer) { * operator returns one or more results, it does so by pushing them on the * operand stack. */ - var operandStack = []; + var operandStack = { + __innerStack__: [], + push: function(aOperand) { + this.__innerStack__.push(aOperand); + }, + + pop: function() { + return this.__innerStack__.pop(); + }, + + peek: function() { + return this.__innerStack__[this.__innerStack__.length - 1]; + }, + + get length() { + return this.__innerStack__.length; + } + }; + + // Flag indicating if the topmost operand of the operandStack is an array + var operandIsArray = false; /* * The dictionary stack holds only dictionary objects. The current set of @@ -28,7 +77,6 @@ var Type1Parser = function(aLexer) { var systemDict = new Dict(), globalDict = new Dict(), userDict = new Dict(); - var dictionaryStack = { __innerStack__: [systemDict, globalDict], @@ -46,10 +94,12 @@ var Type1Parser = function(aLexer) { peek: function() { return this.__innerStack__[this.__innerStack__.length - 1]; + }, + + get length() { + return this.__innerStack__.length; } } - var currentDict = dictionaryStack.peek(); - /* * The execution stack holds executable objects (mainly procedures and files) @@ -61,150 +111,134 @@ var Type1Parser = function(aLexer) { * object off the execution stack and resumes executing the suspended object * beneath it. */ - var isExecutionStack = false; var executionStack = []; - /* Stub to inhibit the logs */ - this.getObj = function() { var obj = lexer.getObj(); - if (isExecutionStack && !IsCmd(obj, "}") && !IsCmd(obj, "]")) { - executionStack.push(obj); + + if (operandIsArray && !IsCmd(obj, "}") && !IsCmd(obj, "]")) { + operandStack.peek().push(obj); + this.getObj(); + } else if (IsCmd(obj, "{") || IsCmd(obj, "[")) { + dump("Start Array: " + obj); + operandStack.push([]); + operandIsArray = true; + this.getObj(); + } else if (IsCmd(obj, "}") || IsCmd(obj, "]")) { + dump("End Array: " + obj); + operandIsArray = false; this.getObj(); } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { - log("Value: " + obj); + dump("Value: " + obj); operandStack.push(obj); this.getObj(); - } else if (IsCmd(obj, "dup") || IsCmd(obj, "readonly") || - IsCmd(obj, "currentdict") || IsCmd(obj, "currentfile")) { + } else if (IsCmd(obj, "dup")) { + dump("Duplicate"); + operandStack.push(operandStack.peek()); + this.getObj(); + } else if (IsCmd(obj, "currentdict")) { + dump("currentdict"); + operandStack.push(dictionaryStack.peek()); + this.getObj(); + } else if (IsCmd(obj, "systemdict")) { + dump("systemdict"); + operandStack.push(systemDict); + this.getObj(); + } else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") || + IsCmd(obj, "currentfile")) { // Do nothing for the moment this.getObj(); } else if (IsName(obj)) { - log("Name: " + obj.name); + dump("Name: " + obj.name); operandStack.push(obj.name); this.getObj(); } else if (IsCmd(obj, "dict")) { - log("Dict: " + obj); - - // XXX handling of dictionary is wrong here + dump("Dict: " + obj); var size = operandStack.pop(); - var name = operandStack.pop(); - if (!name) { - log ("Creating the global dict"); - currentDict = dictionaryStack.peek(); - } else { - var dict = new Dict(); - log("Assign name: " + name + " for the dictionary"); - currentDict.set(name, dict); - dictionaryStack.push(dict); - } - + var dict = new Dict(size); + operandStack.push(dict); this.getObj(); } else if (IsCmd(obj, "begin")) { - log("begin a dictionary"); - currentDict = dictionaryStack.peek(); + dump("begin a dictionary"); + dictionaryStack.push(operandStack.pop()); this.getObj(); } else if (IsCmd(obj, "end")) { - log("Ending a dictionary"); + dump("Ending a dictionary"); dictionaryStack.pop(); - currentDict = dictionaryStack.peek(); this.getObj(); } else if (IsCmd(obj, "def")) { - if (executionStack.length) { - var value = []; - while (executionStack.length) - value.push(executionStack.shift()); - } else { - var value = operandStack.pop(); - } - + var value = operandStack.pop(); var key = operandStack.pop(); - // XXX this happen because of a bad way to handle dictionary - if (key) { - log("def: " + key + " = " + value); - currentDict.set(key, value); - } - this.getObj(); - } else if (IsCmd(obj, "{")) { - log("Start Proc: " + obj); - executionStack = []; - isExecutionStack = true; - this.getObj(); - } else if (IsCmd(obj, "}")) { - log("End Proc: " + obj); - isExecutionStack = false; - this.getObj(); - } else if (IsCmd(obj, "[")) { - isExecutionStack = true; - executionStack = []; - this.getObj(); - log("Start array: " + obj); - } else if (IsCmd(obj, "]")) { - log("End array: " + obj); - isExecutionStack = false; + dump("def: " + key + " = " + value); + dictionaryStack.peek().set(key, value); this.getObj(); } else if (IsCmd(obj, "eexec")) { - return; // end of the ASCII header + // All the first segment data has been read, decrypt the second segment + // and start interpreting it in order to decode it + var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); + lexer = new Lexer(new StringStream(eexecString)); + + this.getObj(); + } else if (IsCmd(obj, "known")) { + dump("known"); + var name = operandStack.pop(); + var dict = operandStack.pop(); + // returns dict.hasKey(name); + + this.getObj(); + } else if (IsCmd(obj, "RD")) { + dump("RD"); + var size = operandStack.pop(); + var key = operandStack.pop(); + + var stream = lexer.stream.makeSubStream(lexer.stream.pos, size); + var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + + // XXX do we want to store that on the top dictionary or somewhere else + dictionaryStack.peek().set(key, new StringStream(charString)); + log (new StringStream(charString)); + this.getObj(); + } else if (IsCmd(obj, "LenIV")) { + error("LenIV: argh! we need to modify the length of discard characters for charStrings"); } else { - log("Getting an unknow token, adding it to the stack just in case"); - log(obj); + dump("Getting an unknow token, adding it to the stack just in case"); + dump(obj); operandStack.push(obj); this.getObj(); } - return currentDict; + + return operandStack.peek(); } }; var hack = false; var Type1Font = function(aFontName, aFontFile) { - // All type1 font program should begin with the comment %! + // All Type1 font program should begin with the comment %! var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21; if (!validHeader) error("Invalid file header"); var programType = "PS-AdobeFont"; - for (var i = 0; i< programType.length; i++) + for (var i = 0; i < programType.length; i++) aFontFile.getChar(); // Ignore the '-' separator aFontFile.getChar(); var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar()); - + if (!hack) { log(aFontName); log("Version is: " + version); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); - this.parser = new Type1Parser(new Lexer(ASCIIStream)); + var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); + + this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontDictionary = this.parser.getObj(); - log(fontDictionary + "\t" + - "fontInfo: " + fontDictionary.get("FontInfo") + "\t" + - "charStrings: " + fontDictionary.get("charStrings")); - - var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); - function decrypt(aBinaryStream, aKey) { - var c1 = 52845, c2 = 22719; - var R = aKey; - - var streamSize = aBinaryStream.length; - var decryptedString = []; - - var value = null; - for (var i = 0; i < streamSize; i++) { - value = aBinaryStream.getByte(); - decryptedString[i] = String.fromCharCode(value ^ (R >> 8)); - R = ((value + R) * c1 + c2) & ((1 << 16) - 1); - } - return decryptedString.slice(4); - } - - var eexecString = decrypt(binaryStream, 55665).join(""); - log(eexecString); - - TODO("decrypt charStrings data with the key 4330"); + log(fontDictionary + "\t" + "fontInfo: " + fontDictionary.get("FontInfo")); hack = true; } From e936f305d73cdb19c960a15c2a7a6cb05314b1ec Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 1 Jun 2011 16:50:32 +0200 Subject: [PATCH 04/72] Decode charStrings and stop the interpreter on every unknow token --- PDFFont.js | 166 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 138 insertions(+), 28 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 534817c94..e8987dc36 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -20,10 +20,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { function decrypt(aStream, aKey, aDiscardNumber) { var r = aKey, c1 = 52845, c2 = 22719; - var decryptedString = []; - var value = null; + var value = ""; var count = aStream.length; for (var i = 0; i < count; i++) { value = aStream.getByte(); @@ -33,6 +32,108 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return decryptedString.slice(aDiscardNumber); } + /* + * CharStrings are encoded following the the CharString Encoding sequence + * describe in Chapter 6 of the "Adobe Type1 Font Format" specification. + * The value in a byte indicates a command, a number, or subsequent bytes + * that are to be interpreted in a special way. + * + * CharString Number Encoding: + * A CharString byte containing the values from 32 through 255 inclusive + * indicate an integer. These values are decoded in four ranges. + * + * 1. A CharString byte containing a value, v, between 32 and 246 inclusive, + * indicate the integer v - 139. Thus, the integer values from -107 through + * 107 inclusive may be encoded in single byte. + * + * 2. A CharString byte containing a value, v, between 247 and 250 inclusive, + * indicates an integer involving the next byte, w, according to the formula: + * [(v - 247) x 256] + w + 108 + * + * 3. A CharString byte containing a value, v, between 251 and 254 inclusive, + * indicates an integer involving the next byte, w, according to the formula: + * -[(v - 251) * 256] - w - 108 + * + * 4. A CharString containing the value 255 indicates that the next 4 bytes + * are a two complement signed integer. The first of these bytes contains the + * highest order bits, the second byte contains the next higher order bits + * and the fourth byte contain the lowest order bits. + * + * + * CharString Command Encoding: + * CharStrings commands are encoded in 1 or 2 bytes. + * + * Single byte commands are encoded in 1 byte that contains a value between + * 0 and 31 inclusive. + * If a command byte contains the value 12, then the value in the next byte + * indicates a command. This "escape" mechanism allows many extra commands + * to be encoded and this encoding technique helps to minimize the length of + * the charStrings. + */ + function decodeCharString(aStream) { + var charString = []; + var cmd = { + "1": "hstem", + "3": "vstem", + "4": "vmoveto", + "5": "rlineto", + "6": "hlineto", + "7": "vlineto", + "8": "rrcurveto", + "9": "closepath", + "10": "callsubr", + "11": "return", + "12": { + "0": "dotsection", + "1": "vstem3", + "3": "hstem3", + "6": "seac", + "7": "sbw", + "12": "div", + "16": "callothersubr", + "17": "pop", + "33": "setcurrentpoint" + }, + "13": "hsbw", + "14": "endchar", + "21": "rmoveto", + "22": "hmoveto", + "30": "vhcurveto", + "31": "hcurveto" + } + + var value = ""; + var count = aStream.length; + for (var i = 0; i < count; i++) { + value = aStream.getByte(); + + if (value < 0) { + continue; + } else if (value < 32) { + if (value == 12) { + value = cmd["12"][aStream.getByte()]; + count++; + } else { + value = cmd[value]; + } + } else if (value <= 246) { + value = parseInt(value) - 139; + } else if (value <= 250) { + value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108; + count++; + } else if (value <= 254) { + value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108; + count++; + } else { + error("Two complement signed integers are ignored for the moment"); + } + + charString.push(value); + } + + return charString; + } + /* * The operand stack holds arbitrary PostScript objects that are the operands * and results of PostScript operators being executed. The interpreter pushes @@ -63,7 +164,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }; // Flag indicating if the topmost operand of the operandStack is an array - var operandIsArray = false; + var operandIsArray = 0; /* * The dictionary stack holds only dictionary objects. The current set of @@ -113,23 +214,31 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { */ var executionStack = []; + + /* + * Parse a font file from the first segment to the last assuming the eexec + * block is binary data. + * + * The method thrown an error if it encounters an unknown token. + */ this.getObj = function() { var obj = lexer.getObj(); - if (operandIsArray && !IsCmd(obj, "}") && !IsCmd(obj, "]")) { + if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && + !IsCmd(obj, "}") && !IsCmd(obj, "]")) { operandStack.peek().push(obj); this.getObj(); } else if (IsCmd(obj, "{") || IsCmd(obj, "[")) { dump("Start Array: " + obj); operandStack.push([]); - operandIsArray = true; + operandIsArray++; this.getObj(); } else if (IsCmd(obj, "}") || IsCmd(obj, "]")) { dump("End Array: " + obj); - operandIsArray = false; + operandIsArray--; this.getObj(); } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { - dump("Value: " + obj); + //dump("Value: " + obj); operandStack.push(obj); this.getObj(); } else if (IsCmd(obj, "dup")) { @@ -145,11 +254,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { operandStack.push(systemDict); this.getObj(); } else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") || - IsCmd(obj, "currentfile")) { + IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) { // Do nothing for the moment this.getObj(); } else if (IsName(obj)) { - dump("Name: " + obj.name); + //dump("Name: " + obj.name); operandStack.push(obj.name); this.getObj(); } else if (IsCmd(obj, "dict")) { @@ -191,20 +300,32 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var size = operandStack.pop(); var key = operandStack.pop(); - var stream = lexer.stream.makeSubStream(lexer.stream.pos, size); + // Add '1' because of the space separator, this is dirty + var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size); + lexer.stream.skip(size + 1); + var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var charStream = new StringStream(charString); // XXX do we want to store that on the top dictionary or somewhere else - dictionaryStack.peek().set(key, new StringStream(charString)); - log (new StringStream(charString)); + dictionaryStack.peek().set(key, charStream); + + var decodedCharString = decodeCharString(charStream); + log(decodedCharString); + this.getObj(); } else if (IsCmd(obj, "LenIV")) { error("LenIV: argh! we need to modify the length of discard characters for charStrings"); - } else { - dump("Getting an unknow token, adding it to the stack just in case"); - dump(obj); - operandStack.push(obj); + } else if (IsCmd(obj, "closefile")) { + // End of binary data; + } else if (IsCmd(obj, "StandardEncoding")) { + // For some reason the value is considered as a command, maybe it is + // because of the uppercae 'S' + operandStack.push(obj.cmd); this.getObj(); + } else { + dump(obj); + error("Unknow token while parsing font"); } return operandStack.peek(); @@ -215,22 +336,11 @@ var hack = false; var Type1Font = function(aFontName, aFontFile) { // All Type1 font program should begin with the comment %! - var validHeader = aFontFile.getByte() == 0x25 && aFontFile.getByte() == 0x21; - if (!validHeader) + if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); - var programType = "PS-AdobeFont"; - for (var i = 0; i < programType.length; i++) - aFontFile.getChar(); - - // Ignore the '-' separator - aFontFile.getChar(); - - var version = parseFloat(aFontFile.getChar() + aFontFile.getChar() + aFontFile.getChar()); - if (!hack) { log(aFontName); - log("Version is: " + version); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); From e739bcc5154891cec31340121d1cdd4c12ac03a5 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 1 Jun 2011 20:40:25 +0200 Subject: [PATCH 05/72] Starts playing with the execution stack --- PDFFont.js | 148 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 134 insertions(+), 14 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index e8987dc36..923a857fb 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -155,9 +155,19 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }, peek: function() { + if (!this.length) + return null; return this.__innerStack__[this.__innerStack__.length - 1]; }, + toString: function() { + log("=== Start Dumping operandStack ==="); + var str = []; + for (var i = 0; i < this.__innerStack__.length; i++) + log(this.__innerStack__[i]); + log("=== End Dumping operandStack ==="); + }, + get length() { return this.__innerStack__.length; } @@ -194,9 +204,15 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }, peek: function() { + if (!this.length) + return null; return this.__innerStack__[this.__innerStack__.length - 1]; }, + get: function(aIndex) { + return this.__innerStack__[aIndex]; + }, + get length() { return this.__innerStack__.length; } @@ -212,7 +228,42 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * object off the execution stack and resumes executing the suspended object * beneath it. */ - var executionStack = []; + var executionStack = { + __innerStack__: [], + + push: function(aProcedure) { + this.__innerStack__.push(aProcedure); + }, + + pop: function() { + return this.__innerStack__.pop(); + }, + + peek: function() { + if (!this.length) + return null; + return this.__innerStack__[this.__innerStack__.length - 1]; + }, + + get: function(aIndex) { + return this.__innerStack__[aIndex]; + }, + + get length() { + return this.__innerStack__.length; + } + } + + function nextInStack() { + var currentProcedure = executionStack.peek(); + if (currentProcedure) { + var command = currentProcedure.shift(); + if (!currentProcedure.length) + executionStack.pop(); + } + + return lexer.getObj(); + } /* @@ -222,29 +273,65 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * The method thrown an error if it encounters an unknown token. */ this.getObj = function() { - var obj = lexer.getObj(); + var obj = nextInStack(); if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && !IsCmd(obj, "}") && !IsCmd(obj, "]")) { + dump("Adding: " + obj); operandStack.peek().push(obj); this.getObj(); } else if (IsCmd(obj, "{") || IsCmd(obj, "[")) { - dump("Start Array: " + obj); - operandStack.push([]); + dump("Start" + (obj.cmd == "{" ? " Executable " : " ") + "Array"); + operandIsArray ? operandStack.peek().push([]) : operandStack.push([]); operandIsArray++; this.getObj(); } else if (IsCmd(obj, "}") || IsCmd(obj, "]")) { - dump("End Array: " + obj); + dump("End" + (obj.cmd == "}" ? " Executable " : " ") + "Array"); operandIsArray--; this.getObj(); + } else if (IsCmd(obj, "if")) { + log("if"); + var procedure = operandStack.pop(); + var bool = operandStack.pop(); + if (!IsBool(bool)) { + executionStack.push(bool); + log("....."); + this.getObj(); + } + log(bool); + if (bool) + executionStack.push(procedure); + + this.getObj(); + } else if (IsCmd(obj, "ifelse")) { + log("ifelse"); + var procedure1 = operandStack.pop(); + var procedure2 = operandStack.pop(); + var bool = !!operandStack.pop(); + operandStack.push(bool ? procedure1 : procedure2); + this.getObj(); } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { - //dump("Value: " + obj); + dump("Value: " + obj); operandStack.push(obj); this.getObj(); } else if (IsCmd(obj, "dup")) { dump("Duplicate"); operandStack.push(operandStack.peek()); this.getObj(); + } else if (IsCmd(obj, "put") || IsCmd(obj, "NP")) { + operandStack.toString(); + + var data = operandStack.pop(); + var indexOrKey = operandStack.pop(); + var object = operandStack.pop(); + log(object); + log("put " + data + " in " + obj + "[" + indexOrKey + "]"); + + if (object.set) + object.set(indexOrKey, data); + else + object[indexOrKey] = data; + this.getObj(); } else if (IsCmd(obj, "currentdict")) { dump("currentdict"); operandStack.push(dictionaryStack.peek()); @@ -254,13 +341,18 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { operandStack.push(systemDict); this.getObj(); } else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") || - IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) { + IsCmd(obj, "noaccess") || IsCmd(obj, "currentfile")) { // Do nothing for the moment this.getObj(); } else if (IsName(obj)) { //dump("Name: " + obj.name); operandStack.push(obj.name); this.getObj(); + } else if (IsCmd(obj, "array")) { + var size = operandStack.pop(); + var array = new Array(size); + operandStack.push(array); + this.getObj(); } else if (IsCmd(obj, "dict")) { dump("Dict: " + obj); var size = operandStack.pop(); @@ -275,10 +367,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { dump("Ending a dictionary"); dictionaryStack.pop(); this.getObj(); - } else if (IsCmd(obj, "def")) { + } else if (IsCmd(obj, "def") || IsCmd(obj, "ND")) { var value = operandStack.pop(); var key = operandStack.pop(); - dump("def: " + key + " = " + value); + log("def: " + key + " = " + value); dictionaryStack.peek().set(key, value); this.getObj(); } else if (IsCmd(obj, "eexec")) { @@ -292,8 +384,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { dump("known"); var name = operandStack.pop(); var dict = operandStack.pop(); - // returns dict.hasKey(name); - + operandStack.push(!!dict.get(name)); this.getObj(); } else if (IsCmd(obj, "RD")) { dump("RD"); @@ -311,21 +402,50 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { dictionaryStack.peek().set(key, charStream); var decodedCharString = decodeCharString(charStream); - log(decodedCharString); + dump(decodedCharString); this.getObj(); } else if (IsCmd(obj, "LenIV")) { error("LenIV: argh! we need to modify the length of discard characters for charStrings"); } else if (IsCmd(obj, "closefile")) { // End of binary data; + } else if (IsCmd(obj, "index")) { + var operands = []; + var size = operandStack.pop(); + for (var i = 0; i < size; i++) + operands.push(operandStack.pop()); + + var newOperand = operandStack.peek(); + + for (var i = 0; i < operands.length; i++) + operandStack.push(operands.pop()); + + operandStack.push(newOperand); + this.getObj(); } else if (IsCmd(obj, "StandardEncoding")) { // For some reason the value is considered as a command, maybe it is // because of the uppercae 'S' operandStack.push(obj.cmd); this.getObj(); } else { - dump(obj); - error("Unknow token while parsing font"); + var command = null; + if (IsCmd(obj)) { + for (var i = 0; i < dictionaryStack.length; i++) { + command = dictionaryStack.get(i).get(obj.cmd); + if (command) + break; + } + } + + if (command) { + // XXX add the command to the execution stack + this.getObj(); + } else { + log("operandStack: " + operandStack); + log("dictionaryStack: " + dictionaryStack); + dump(obj); + error("Unknow token while parsing font"); + } } return operandStack.peek(); From e302bd93eeaafc693c7e4ac9aa994cf7276953e0 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 3 Jun 2011 17:48:32 +0200 Subject: [PATCH 06/72] Add the necessary bits to handle Arrays, Procedure and a bigger set of instructions for Type1 --- PDFFont.js | 437 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 269 insertions(+), 168 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 923a857fb..00d9bd517 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,9 +1,11 @@ +var Font = new Dict(); + var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = new Lexer(aAsciiStream); // Turn on this flag for additional debugging logs - var debug = false; + var debug = true; var dump = function(aData) { if (debug) @@ -106,7 +108,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var count = aStream.length; for (var i = 0; i < count; i++) { value = aStream.getByte(); - + if (value < 0) { continue; } else if (value < 32) { @@ -130,8 +132,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { charString.push(value); } - - return charString; + + return charString; } /* @@ -190,7 +192,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { userDict = new Dict(); var dictionaryStack = { - __innerStack__: [systemDict, globalDict], + __innerStack__: [systemDict, globalDict, userDict], push: function(aDictionary) { this.__innerStack__.push(aDictionary); @@ -216,7 +218,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { get length() { return this.__innerStack__.length; } - } + }; /* * The execution stack holds executable objects (mainly procedures and files) @@ -252,7 +254,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { get length() { return this.__innerStack__.length; } - } + }; function nextInStack() { var currentProcedure = executionStack.peek(); @@ -260,195 +262,295 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var command = currentProcedure.shift(); if (!currentProcedure.length) executionStack.pop(); + return command; } return lexer.getObj(); - } + }; + var self = this; + function parseNext() { + setTimeout(function() { + self.getObj(); + }, 0); + }; /* * Parse a font file from the first segment to the last assuming the eexec * block is binary data. - * + * * The method thrown an error if it encounters an unknown token. */ this.getObj = function() { var obj = nextInStack(); + if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && + !IsCmd(obj, "]") && !IsCmd(obj, "}")) { + dump("Adding an object: " + obj +" to array " + operandIsArray); + var currentArray = operandStack.peek(); + for (var i = 1; i < operandIsArray; i++) + currentArray = currentArray[currentArray.length - 1]; - if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && - !IsCmd(obj, "}") && !IsCmd(obj, "]")) { - dump("Adding: " + obj); - operandStack.peek().push(obj); - this.getObj(); - } else if (IsCmd(obj, "{") || IsCmd(obj, "[")) { - dump("Start" + (obj.cmd == "{" ? " Executable " : " ") + "Array"); - operandIsArray ? operandStack.peek().push([]) : operandStack.push([]); - operandIsArray++; - this.getObj(); - } else if (IsCmd(obj, "}") || IsCmd(obj, "]")) { - dump("End" + (obj.cmd == "}" ? " Executable " : " ") + "Array"); - operandIsArray--; - this.getObj(); - } else if (IsCmd(obj, "if")) { - log("if"); - var procedure = operandStack.pop(); - var bool = operandStack.pop(); - if (!IsBool(bool)) { - executionStack.push(bool); - log("....."); - this.getObj(); - } - log(bool); - if (bool) - executionStack.push(procedure); - - this.getObj(); - } else if (IsCmd(obj, "ifelse")) { - log("ifelse"); - var procedure1 = operandStack.pop(); - var procedure2 = operandStack.pop(); - var bool = !!operandStack.pop(); - operandStack.push(bool ? procedure1 : procedure2); - this.getObj(); + currentArray.push(obj); + return parseNext(); } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { dump("Value: " + obj); operandStack.push(obj); - this.getObj(); - } else if (IsCmd(obj, "dup")) { - dump("Duplicate"); - operandStack.push(operandStack.peek()); - this.getObj(); - } else if (IsCmd(obj, "put") || IsCmd(obj, "NP")) { - operandStack.toString(); - - var data = operandStack.pop(); - var indexOrKey = operandStack.pop(); - var object = operandStack.pop(); - log(object); - log("put " + data + " in " + obj + "[" + indexOrKey + "]"); - - if (object.set) - object.set(indexOrKey, data); - else - object[indexOrKey] = data; - this.getObj(); - } else if (IsCmd(obj, "currentdict")) { - dump("currentdict"); - operandStack.push(dictionaryStack.peek()); - this.getObj(); - } else if (IsCmd(obj, "systemdict")) { - dump("systemdict"); - operandStack.push(systemDict); - this.getObj(); - } else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") || - IsCmd(obj, "noaccess") || IsCmd(obj, "currentfile")) { - // Do nothing for the moment - this.getObj(); + return parseNext(); } else if (IsName(obj)) { - //dump("Name: " + obj.name); + dump("Name: " + obj.name); operandStack.push(obj.name); - this.getObj(); - } else if (IsCmd(obj, "array")) { - var size = operandStack.pop(); - var array = new Array(size); - operandStack.push(array); - this.getObj(); - } else if (IsCmd(obj, "dict")) { - dump("Dict: " + obj); - var size = operandStack.pop(); - var dict = new Dict(size); - operandStack.push(dict); - this.getObj(); - } else if (IsCmd(obj, "begin")) { - dump("begin a dictionary"); - dictionaryStack.push(operandStack.pop()); - this.getObj(); - } else if (IsCmd(obj, "end")) { - dump("Ending a dictionary"); - dictionaryStack.pop(); - this.getObj(); - } else if (IsCmd(obj, "def") || IsCmd(obj, "ND")) { - var value = operandStack.pop(); - var key = operandStack.pop(); - log("def: " + key + " = " + value); - dictionaryStack.peek().set(key, value); - this.getObj(); - } else if (IsCmd(obj, "eexec")) { - // All the first segment data has been read, decrypt the second segment - // and start interpreting it in order to decode it - var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); - lexer = new Lexer(new StringStream(eexecString)); + return parseNext(); + } else if (IsCmd(obj)) { + var command = obj.cmd; + dump(command); - this.getObj(); - } else if (IsCmd(obj, "known")) { - dump("known"); - var name = operandStack.pop(); - var dict = operandStack.pop(); - operandStack.push(!!dict.get(name)); - this.getObj(); - } else if (IsCmd(obj, "RD")) { - dump("RD"); - var size = operandStack.pop(); - var key = operandStack.pop(); + switch (command) { + case "[": + case "{": + dump("Start" + (command == "{" ? " Executable " : " ") + "Array"); + operandIsArray++; + var currentArray = operandStack; + for (var i = 1; i < operandIsArray; i++) + if (currentArray.peek) + currentArray = currentArray.peek(); + else + currentArray = currentArray[currentArray.length - 1]; + currentArray.push([]); + break; - // Add '1' because of the space separator, this is dirty - var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size); - lexer.stream.skip(size + 1); + case "]": + case "}": + var currentArray = operandStack.peek(); + for (var i = 1; i < operandIsArray; i++) + currentArray = currentArray[currentArray.length - 1]; + dump("End" + (command == "}" ? " Executable " : " ") + "Array: " + currentArray.join(" ")); + operandIsArray--; + break; - var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); - var charStream = new StringStream(charString); + case "if": + var procedure = operandStack.pop(); + var bool = operandStack.pop(); + if (!IsBool(bool)) { + dump("if: " + bool); + // we need to execute things, let be dirty + executionStack.push(bool); + } else { + dump("if ( " + bool + " ) { " + procedure + " }"); + if (bool) + executionStack.push(procedure); + } + break; - // XXX do we want to store that on the top dictionary or somewhere else - dictionaryStack.peek().set(key, charStream); + case "ifelse": + var procedure1 = operandStack.pop(); + var procedure2 = operandStack.pop(); + var bool = !!operandStack.pop(); + dump("if ( " + bool + " ) { " + procedure2 + " } else { " + procedure1 + " }"); + executionStack.push(bool ? procedure2 : procedure1); + break; - var decodedCharString = decodeCharString(charStream); - dump(decodedCharString); + case "dup": + //log("duplicate: " + operandStack.peek()); + operandStack.push(operandStack.peek()); + break; - this.getObj(); - } else if (IsCmd(obj, "LenIV")) { - error("LenIV: argh! we need to modify the length of discard characters for charStrings"); - } else if (IsCmd(obj, "closefile")) { - // End of binary data; - } else if (IsCmd(obj, "index")) { - var operands = []; - var size = operandStack.pop(); - for (var i = 0; i < size; i++) - operands.push(operandStack.pop()); + case "mark": + operandStack.push("mark"); + break; - var newOperand = operandStack.peek(); + case "cleartomark": + var command = ""; + do { + command = operandStack.pop(); + } while (command != "mark"); + break; - for (var i = 0; i < operands.length; i++) - operandStack.push(operands.pop()); + case "put": + var data = operandStack.pop(); + var indexOrKey = operandStack.pop(); + var object = operandStack.pop(); + //dump("put " + data + " in " + object + "[" + indexOrKey + "]"); + object.set ? object.set(indexOrKey, data) + : object[indexOrKey] = data; - operandStack.push(newOperand); - this.getObj(); - } else if (IsCmd(obj, "StandardEncoding")) { - // For some reason the value is considered as a command, maybe it is - // because of the uppercae 'S' - operandStack.push(obj.cmd); - this.getObj(); - } else { - var command = null; - if (IsCmd(obj)) { - for (var i = 0; i < dictionaryStack.length; i++) { - command = dictionaryStack.get(i).get(obj.cmd); - if (command) - break; - } + break; + + case "pop": + operandStack.pop(); + break; + + case "exch": + var operand1 = operandStack.pop(); + var operand2 = operandStack.pop(); + operandStack.push(operand1); + operandStack.push(operand2); + break; + + case "get": + var indexOrKey = operandStack.pop(); + var object = operandStack.pop(); + log("=============="); + operandStack.toString(); + log(dictionaryStack.__innerStack__); + log(object + "::" + indexOrKey); + var data = object.get ? object.get(indexOrKey) : object[indexOrKey]; + dump("get " + obj + "[" + indexOrKey + "]: " + data); + operandStack.push(data); + break; + + case "currentdict": + var dict = dictionaryStack.peek(); + operandStack.push(dict); + break; + + case "systemdict": + operandStack.push(systemDict); + break; + + case "readonly": + case "executeonly": + case "noaccess": + // Do nothing for the moment + break; + + case "currentfile": + operandStack.push("currentfile"); + break; + + case "array": + var size = operandStack.pop(); + var array = new Array(size); + operandStack.push(array); + break; + + case "dict": + var size = operandStack.pop(); + var dict = new Dict(size); + operandStack.push(dict); + break; + + case "begin": + dictionaryStack.push(operandStack.pop()); + break; + + case "end": + dictionaryStack.pop(); + break; + + case "def": + var value = operandStack.pop(); + var key = operandStack.pop(); + dump("def: " + key + " = " + value); + dictionaryStack.peek().set(key, value); + break; + + case "definefont": + var font = operandStack.pop(); + var key = operandStack.pop(); + dump("definefont " + font + " with key: " + key); + Font.set(key, font); + break; + + case "known": + var name = operandStack.pop(); + var dict = operandStack.pop(); + var data = !!dict.get(name); + dump("known: " + data + " :: " + name + " in dict: " + dict); + operandStack.push(data); + break; + + case "exec": + executionStack.push(operandStack.pop()); + break; + + case "eexec": + // All the first segment data has been read, decrypt the second segment + // and start interpreting it in order to decode it + var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); + lexer = new Lexer(new StringStream(eexecString)); + break; + + case "LenIV": + error("LenIV: argh! we need to modify the length of discard characters for charStrings"); + break; + + case "closefile": + var file = operandStack.pop(); + return; + break; + + case "index": + var operands = []; + var size = operandStack.pop(); + for (var i = 0; i < size; i++) + operands.push(operandStack.pop()); + + var newOperand = operandStack.peek(); + + for (var i = 0; i < operands.length; i++) + operandStack.push(operands.pop()); + + operandStack.push(newOperand); + break; + + case "string": + var size = operandStack.pop(); + var str = (new Array(size + 1)).join(" "); + operandStack.push(str); + break; + + case "readstring": + var str = operandStack.pop(); + var size = str.length; + + var file = operandStack.pop(); + + // Add '1' because of the space separator, this is dirty + var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size); + lexer.stream.skip(size + 1); + + var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var charStream = new StringStream(charString); + var decodedCharString = decodeCharString(charStream); + dump("decodedCharString: " + decodedCharString); + operandStack.push(decodedCharString); + // boolean indicating if the operation is a success or not + operandStack.push(true); + break; + + case "StandardEncoding": + // For some reason the value is considered as a command, maybe it is + // because of the uppercase 'S' + operandStack.push(obj.cmd); + break; + + default: + var command = null; + if (IsCmd(obj)) { + for (var i = 0; i < dictionaryStack.length; i++) { + if (command = dictionaryStack.get(i).get(obj.cmd)) { + dump("found in dictionnary for " + obj.cmd + " command: " + command); + executionStack.push(command.slice()); + break; + } + } + } + + if (!command) { + log("operandStack: " + operandStack); + log("dictionaryStack: " + dictionaryStack); + log(obj); + error("Unknow command while parsing font"); + } + break; } - if (command) { - // XXX add the command to the execution stack - this.getObj(); - } else { - log("operandStack: " + operandStack); - log("dictionaryStack: " + dictionaryStack); - dump(obj); - error("Unknow token while parsing font"); - } + return parseNext(); + } else if (obj){ + log (obj); + operandStack.push(obj); + return parseNext(); } - - return operandStack.peek(); } }; @@ -467,8 +569,7 @@ var Type1Font = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); - var fontDictionary = this.parser.getObj(); - log(fontDictionary + "\t" + "fontInfo: " + fontDictionary.get("FontInfo")); + this.parser.getObj(); hack = true; } From c663d181f907ba20304bf59857bd5af032386926 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 3 Jun 2011 17:54:40 +0200 Subject: [PATCH 07/72] Remove some debug leftover --- PDFFont.js | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 00d9bd517..22fe69858 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,11 +1,14 @@ -var Font = new Dict(); +/* + * This dictionary hold the decoded fonts + */ +var Fonts = new Dict(); var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = new Lexer(aAsciiStream); // Turn on this flag for additional debugging logs - var debug = true; + var debug = false; var dump = function(aData) { if (debug) @@ -350,7 +353,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { break; case "dup": - //log("duplicate: " + operandStack.peek()); + dump("duplicate: " + operandStack.peek()); operandStack.push(operandStack.peek()); break; @@ -389,10 +392,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { case "get": var indexOrKey = operandStack.pop(); var object = operandStack.pop(); - log("=============="); - operandStack.toString(); - log(dictionaryStack.__innerStack__); - log(object + "::" + indexOrKey); var data = object.get ? object.get(indexOrKey) : object[indexOrKey]; dump("get " + obj + "[" + indexOrKey + "]: " + data); operandStack.push(data); @@ -448,7 +447,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var font = operandStack.pop(); var key = operandStack.pop(); dump("definefont " + font + " with key: " + key); - Font.set(key, font); + Fonts.set(key, font); break; case "known": @@ -547,7 +546,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return parseNext(); } else if (obj){ - log (obj); + dump("unknow: " + obj); operandStack.push(obj); return parseNext(); } From a08691313f3cfd8e524484a432ab6ef773b39541 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 3 Jun 2011 19:47:07 +0200 Subject: [PATCH 08/72] Clean up some code and add some changes to pdf.js --- PDFFont.js | 142 ++++++++++++++++++++++++++++------------------------- pdf.js | 6 +++ 2 files changed, 81 insertions(+), 67 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 22fe69858..6a7c49b7b 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -15,6 +15,24 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { log(aData); }; + /* + * Parse a whole Type1 font stream (from the first segment to the last) + * assuming the 'eexec' block is binary data and fill up the 'Fonts' + * dictionary with the font informations. + */ + var self = this; + this.parse = function() { + if (!debug) { + while (!processNextToken()) {}; + } else { + // debug mode is used to debug postcript processing + setTimeout(function() { + if (!processNextToken()) + self.parse(); + }, 0); + } + } + /* * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence * of Plaintext Bytes. The function took a key as a parameter which can be @@ -24,6 +42,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var kCharStringsEncryptionKey = 4330; function decrypt(aStream, aKey, aDiscardNumber) { + var start = Date.now(); var r = aKey, c1 = 52845, c2 = 22719; var decryptedString = []; @@ -34,6 +53,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { decryptedString[i] = String.fromCharCode(value ^ (r >> 8)); r = ((value + r) * c1 + c2) & ((1 << 16) - 1); } + var end = Date.now(); + dump("Time to decrypt string of length " + count + " is " + (end - start)); return decryptedString.slice(aDiscardNumber); } @@ -75,37 +96,39 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * to be encoded and this encoding technique helps to minimize the length of * the charStrings. */ + var charStringDictionary = { + "1": "hstem", + "3": "vstem", + "4": "vmoveto", + "5": "rlineto", + "6": "hlineto", + "7": "vlineto", + "8": "rrcurveto", + "9": "closepath", + "10": "callsubr", + "11": "return", + "12": { + "0": "dotsection", + "1": "vstem3", + "3": "hstem3", + "6": "seac", + "7": "sbw", + "12": "div", + "16": "callothersubr", + "17": "pop", + "33": "setcurrentpoint" + }, + "13": "hsbw", + "14": "endchar", + "21": "rmoveto", + "22": "hmoveto", + "30": "vhcurveto", + "31": "hcurveto" + }; + function decodeCharString(aStream) { + var start = Date.now(); var charString = []; - var cmd = { - "1": "hstem", - "3": "vstem", - "4": "vmoveto", - "5": "rlineto", - "6": "hlineto", - "7": "vlineto", - "8": "rrcurveto", - "9": "closepath", - "10": "callsubr", - "11": "return", - "12": { - "0": "dotsection", - "1": "vstem3", - "3": "hstem3", - "6": "seac", - "7": "sbw", - "12": "div", - "16": "callothersubr", - "17": "pop", - "33": "setcurrentpoint" - }, - "13": "hsbw", - "14": "endchar", - "21": "rmoveto", - "22": "hmoveto", - "30": "vhcurveto", - "31": "hcurveto" - } var value = ""; var count = aStream.length; @@ -116,10 +139,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { continue; } else if (value < 32) { if (value == 12) { - value = cmd["12"][aStream.getByte()]; + value = charStringDictionary["12"][aStream.getByte()]; count++; } else { - value = cmd[value]; + value = charStringDictionary[value]; } } else if (value <= 246) { value = parseInt(value) - 139; @@ -136,6 +159,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { charString.push(value); } + var end = Date.now(); + dump("Time to decode charString of length " + count + " is " + (end - start)); return charString; } @@ -259,6 +284,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } }; + /* + * Return the next token in the execution stack + */ function nextInStack() { var currentProcedure = executionStack.peek(); if (currentProcedure) { @@ -271,20 +299,15 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return lexer.getObj(); }; - var self = this; - function parseNext() { - setTimeout(function() { - self.getObj(); - }, 0); - }; /* - * Parse a font file from the first segment to the last assuming the eexec - * block is binary data. + * Get the next token from the executionStack and process it. + * Actually the function does not process the third segment of a Type1 font + * and end on 'closefile'. * * The method thrown an error if it encounters an unknown token. */ - this.getObj = function() { + function processNextToken() { var obj = nextInStack(); if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && !IsCmd(obj, "]") && !IsCmd(obj, "}")) { @@ -294,15 +317,12 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { currentArray = currentArray[currentArray.length - 1]; currentArray.push(obj); - return parseNext(); } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { dump("Value: " + obj); operandStack.push(obj); - return parseNext(); } else if (IsName(obj)) { dump("Name: " + obj.name); operandStack.push(obj.name); - return parseNext(); } else if (IsCmd(obj)) { var command = obj.cmd; dump(command); @@ -475,7 +495,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { case "closefile": var file = operandStack.pop(); - return; + return true; break; case "index": @@ -543,48 +563,36 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } break; } - - return parseNext(); } else if (obj){ dump("unknow: " + obj); operandStack.push(obj); - return parseNext(); } + + return false; } }; -var hack = false; + +var hack = true; var Type1Font = function(aFontName, aFontFile) { // All Type1 font program should begin with the comment %! if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); - if (!hack) { - log(aFontName); + if (hack) { + var start = Date.now(); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); this.parser = new Type1Parser(ASCIIStream, binaryStream); + this.parser.parse(); - this.parser.getObj(); - hack = true; + var end = Date.now(); + dump("Time to parse font is:" + (end - start)); + + hack = false; } - - - this.info = {}; - this.name = aFontName; - this.encoding = []; - this.paintType = 0; - this.fontType = 0; - this.fontMatrix = []; - this.fontBBox = []; - this.uniqueID = 0; - this.metrics = {}; - this.strokeWidth = 0.0; - this.private = {}; - this.charStrings = {} - this.FID = 0; }; diff --git a/pdf.js b/pdf.js index 8caac0362..001882ad3 100644 --- a/pdf.js +++ b/pdf.js @@ -1002,6 +1002,9 @@ var Name = (function() { } constructor.prototype = { + toString: function() { + return this.name; + } }; return constructor; @@ -1013,6 +1016,9 @@ var Cmd = (function() { } constructor.prototype = { + toString: function() { + return this.cmd; + } }; return constructor; From c166db13fd1061e710ca5eb366a151345769510d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sat, 4 Jun 2011 00:43:50 +0200 Subject: [PATCH 09/72] Add the 'for' keyword and support 2's complement signed integer --- PDFFont.js | 43 +++++++++++++++++++++++++++++-------------- pdf.js | 4 ++++ 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 6a7c49b7b..51e8282d1 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -153,7 +153,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108; count++; } else { - error("Two complement signed integers are ignored for the moment"); + var byte = aStream.getByte(); + var high = (byte >> 1); + value = (byte - high) * 16777216 + aStream.getByte() * 65536 + + aStream.getByte() * 256 * + aStream.getByte(); + count += 4; } charString.push(value); @@ -372,6 +376,17 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { executionStack.push(bool ? procedure2 : procedure1); break; + case "for": + var procedure = operandStack.pop(); + var limit = operandStack.pop(); + var increment = operandStack.pop(); + var initial = operandStack.pop(); + for (var i = 0; i < limit; i += increment) { + operandStack.push(i); + executionStack.push(procedure.slice()); + } + break; + case "dup": dump("duplicate: " + operandStack.peek()); operandStack.push(operandStack.peek()); @@ -459,6 +474,12 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { case "def": var value = operandStack.pop(); var key = operandStack.pop(); + + if (key == "FontName" && Fonts.get(value)) { + // The font has already be decoded, stop! + return true; + } + dump("def: " + key + " = " + value); dictionaryStack.peek().set(key, value); break; @@ -573,26 +594,20 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }; -var hack = true; - var Type1Font = function(aFontName, aFontFile) { // All Type1 font program should begin with the comment %! if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); - if (hack) { - var start = Date.now(); + var start = Date.now(); - var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); - var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); + var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); + var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); - this.parser = new Type1Parser(ASCIIStream, binaryStream); - this.parser.parse(); + this.parser = new Type1Parser(ASCIIStream, binaryStream); + this.parser.parse(); - var end = Date.now(); - dump("Time to parse font is:" + (end - start)); - - hack = false; - } + var end = Date.now(); + dump("Time to parse font is:" + (end - start)); }; diff --git a/pdf.js b/pdf.js index 001882ad3..cca5ae730 100644 --- a/pdf.js +++ b/pdf.js @@ -2291,6 +2291,10 @@ var CanvasGraphics = (function() { TODO("support Type3 font"); break; + case "TrueType": + TODO("implement TrueType support"); + break; + default: error("Unsupported font type: " + subtype); break; From 4d261759d924550bd16b134d54245227b354f37d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 7 Jun 2011 19:17:46 +0200 Subject: [PATCH 10/72] Support TrueType Font --- PDFFont.js | 28 +++++++++++++++++++++++++++- pdf.js | 9 ++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 51e8282d1..3f494070f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -4,6 +4,32 @@ */ var Fonts = new Dict(); + +var Base64Encoder = { + encode: function(aData) { + var str = []; + var count = aData.length; + for (var i = 0; i < count; i++) + str.push(aData.getChar()); + + return window.btoa(str.join("")); + } +}; + + +var TrueTypeFont = function(aFontName, aFontFile) { + if (Fonts.get(aFontName)) + return; + + //log("Loading a TrueType font: " + aFontName); + var fontData = Base64Encoder.encode(aFontFile); + Fonts.set(aFontName, fontData); + + // Add the css rule + var url = "url(data:font/ttf;base64," + fontData + ");"; + document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); +}; + var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = new Lexer(aAsciiStream); @@ -608,6 +634,6 @@ var Type1Font = function(aFontName, aFontFile) { this.parser.parse(); var end = Date.now(); - dump("Time to parse font is:" + (end - start)); + //log("Time to parse font is:" + (end - start)); }; diff --git a/pdf.js b/pdf.js index cca5ae730..15198c553 100644 --- a/pdf.js +++ b/pdf.js @@ -2292,7 +2292,14 @@ var CanvasGraphics = (function() { break; case "TrueType": - TODO("implement TrueType support"); + var fontDescriptor = font.get("FontDescriptor"); + if (fontDescriptor.num) { + var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); + var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile2")); + fontName = fontDescriptor.get("FontName").name; + fontName = fontName.replace("+", ""); // no + are allowed in the font name + font = new TrueTypeFont(fontName, fontFile); + } break; default: From c098f0b31fa85cb80636b36afa8bb8d482af1f6c Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 8 Jun 2011 17:26:29 +0200 Subject: [PATCH 11/72] Fix some bugs and add the beginning of a Type2 reader --- PDFFont.js | 279 ++++++++++++++++++++- cffStandardStrings.js | 552 ++++++++++++++++++++++++++++++++++++++++++ pdf.js | 5 +- test.html | 1 + 4 files changed, 828 insertions(+), 9 deletions(-) create mode 100644 cffStandardStrings.js diff --git a/PDFFont.js b/PDFFont.js index 3f494070f..b8ce52e07 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -17,6 +17,8 @@ var Base64Encoder = { }; + + var TrueTypeFont = function(aFontName, aFontFile) { if (Fonts.get(aFontName)) return; @@ -30,6 +32,7 @@ var TrueTypeFont = function(aFontName, aFontFile) { document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); }; + var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = new Lexer(aAsciiStream); @@ -211,6 +214,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }, pop: function() { + if (!this.length) + throw new Error("stackunderflow"); return this.__innerStack__.pop(); }, @@ -220,10 +225,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return this.__innerStack__[this.__innerStack__.length - 1]; }, - toString: function() { + dump: function() { log("=== Start Dumping operandStack ==="); var str = []; - for (var i = 0; i < this.__innerStack__.length; i++) + for (var i = 0; i < this.length; i++) log(this.__innerStack__[i]); log("=== End Dumping operandStack ==="); }, @@ -257,7 +262,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }, pop: function() { - if (this.__innerStack__.length == 2) + if (this.__innerStack__.length == 3) return null; return this.__innerStack__.pop(); @@ -275,7 +280,15 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { get length() { return this.__innerStack__.length; - } + }, + + dump: function() { + log("=== Start Dumping dictionaryStack ==="); + var str = []; + for (var i = 0; i < this.length; i++) + log(this.__innerStack__[i]); + log("=== End Dumping dictionaryStack ==="); + }, }; /* @@ -433,10 +446,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var data = operandStack.pop(); var indexOrKey = operandStack.pop(); var object = operandStack.pop(); - //dump("put " + data + " in " + object + "[" + indexOrKey + "]"); + dump("put " + data + " in " + object + "[" + indexOrKey + "]"); object.set ? object.set(indexOrKey, data) : object[indexOrKey] = data; - break; case "pop": @@ -454,7 +466,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var indexOrKey = operandStack.pop(); var object = operandStack.pop(); var data = object.get ? object.get(indexOrKey) : object[indexOrKey]; - dump("get " + obj + "[" + indexOrKey + "]: " + data); + dump("get " + object + "[" + indexOrKey + "]: " + data); operandStack.push(data); break; @@ -501,6 +513,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var value = operandStack.pop(); var key = operandStack.pop(); + // XXX we don't want to do that here but for some reasons the names + // are different between what is declared and the FontName directive if (key == "FontName" && Fonts.get(value)) { // The font has already be decoded, stop! return true; @@ -515,6 +529,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var key = operandStack.pop(); dump("definefont " + font + " with key: " + key); Fonts.set(key, font); + operandStack.push(font); break; case "known": @@ -532,7 +547,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { case "eexec": // All the first segment data has been read, decrypt the second segment // and start interpreting it in order to decode it + var file = operandStack.pop(); var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); + dump(eexecString); lexer = new Lexer(new StringStream(eexecString)); break; @@ -553,7 +570,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var newOperand = operandStack.peek(); - for (var i = 0; i < operands.length; i++) + while (operands.length) operandStack.push(operands.pop()); operandStack.push(newOperand); @@ -620,11 +637,14 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }; +var type1hack = false; var Type1Font = function(aFontName, aFontFile) { // All Type1 font program should begin with the comment %! if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); + if (!type1hack) { + type1hack= true; var start = Date.now(); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); @@ -635,5 +655,248 @@ var Type1Font = function(aFontName, aFontFile) { var end = Date.now(); //log("Time to parse font is:" + (end - start)); + + this.convert(); + } }; +var hack = false; +Type1Font.prototype = { + convert: function() { + var fontName = "TACTGM+NimbusRomNo9L-Medi"; + var fontData = null; + for (var font in Fonts.map) { + if (font == fontName) { + fontData = Fonts.get(font); + break; + } + } + + if (!fontData || hack) + return; + hack = true; + + var t1Only = [ + "callothersubr", + "closepath", + "dotsection", + "hsbw", + "hstem3", + "pop", + "sbw", + "seac", + "setcurrentpoint", + "vstem3" + ]; + + /* + * The sequence and form of a Type 2 charstring program may be + * represented as: + * w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar + * + */ + var t2CharStrings = new Dict(); + + var t1CharStrings = fontData.get("CharStrings"); + for (var key in t1CharStrings.map) { + var font = t1CharStrings.get(key); + var t2font = []; + + for (var i = 0; i < font.length; i++) { + var token = font[i]; + switch (token) { + case "hsbw": + var width = t2font.pop(); + var leftSidebearingPoint = t2font.pop(); + font.push(width); + break; + default: + if (t1Only.indexOf(token) != -1) { + log(token + " need convert!\n"); + throw new Error("Type1 Only token"); + } + t2font.push(token); + break; + } + } + log(key + "::" + t1CharStrings.get(key)); + log("type2::" + t2font); + } + } +}; + +function decodeType2DictData(aString, aDictionary) { + var data = []; + + var value = ""; + var count = aString.length; + for (var i = 0; i < count; i) { + value = aString[i++]; + + if (value < 0) { + continue; + } else if (value == 28) { + value = aString[i++] << 8 | aString[i++]; + } else if (value == 29) { + value = aString[i++] << 24 | + aString[i++] << 16 | + aString[i++] << 8 | + aString[i++]; + } else if (value < 32) { + if (value == 12) { + value = aDictionary["12"][aString[i++]]; + } else { + value = aDictionary[value]; + } + } else if (value <= 246) { + value = parseInt(value) - 139; + } else if (value <= 250) { + value = ((value - 247) * 256) + parseInt(aString[i++]) + 108; + } else if (value <= 254) { + value = -((value - 251) * 256) - parseInt(aString[i++]) - 108; + } else { + throw new Error("Value should not be 255"); + } + + data.push(value); + } + + return data; +} + +var Type2Parser = function(aFilePath) { + var font = new Dict(); + + // Turn on this flag for additional debugging logs + var debug = true; + + function dump(aStr) { + if (debug) + log(aStr); + }; + + function readIndex(aStream, aIsByte) { + var count = aStream.getByte() + aStream.getByte(); + var offsize = aStream.getByte(); + var offsets = []; + for (var i = 0; i < count + 1; i++) { + var offset = 0; + for (var j = 0; j < offsize; j++) { + // XXX need to do some better code here + var byte = aStream.getByte(); + offset += byte; + } + offsets.push(offset); + } + + dump("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")"); + var dataOffset = aStream.pos; + var objects = []; + for (var i = 0; i < count; i++) { + var offset = offsets[i]; + aStream.pos = dataOffset + offset - 1; + + var data = []; + var length = offsets[i + 1] - 1; + for (var j = offset - 1; j < length; j++) + data.push(aIsByte ? aStream.getByte() : aStream.getChar()); + dump("object at offset " + offset + " is: " + data); + objects.push(data); + } + return objects; + }; + + function parseAsToken(aArray) { + var objects = []; + + var count = aArray.length; + for (var i = 0; i < count; i++) { + var decoded = decodeType2DictData(aArray[i], CFFDictOps); + + var stack = []; + var count = decoded.length; + for (var i = 0; i < count; i++) { + var token = decoded[i]; + if (IsNum(token)) { + stack.push(token); + } else { + switch (token.operand) { + case "SID": + font.set(token.name, CFFStrings[stack.pop()]); + break; + case "number number": + font.set(token.name, { + size: stack.pop(), + offset: stack.pop() + }); + break; + case "boolean": + font.set(token.name, stack.pop()); + break; + case "delta": + font.set(token.name, stack.pop()); + break; + default: + if (token.operand && token.operand.length) { + var array = []; + for (var j = 0; j < token.operand.length; j++) + array.push(stack.pop()); + font.set(token.name, array); + } else { + font.set(token.name, stack.pop()); + } + break; + } + } + } + } + + return objects; + }; + + this.parse = function(aStream) { + font.set("major", aStream.getByte()); + font.set("minor", aStream.getByte()); + font.set("hdrSize", aStream.getByte()); + font.set("offsize", aStream.getByte()); + + // Move the cursor after the header + aStream.skip(font.get("hdrSize") - aStream.pos); + + // Read the NAME Index + dump("Reading Index: Names"); + font.set("Names", readIndex(aStream)); + dump(font.get("Names")); + + // Read the Top Dict Index + dump("Reading Index: TopDict"); + var topDict = readIndex(aStream, true); + + // Read the String Index + dump("Reading Index: Strings"); + var strings = readIndex(aStream); + + // Fill up the Strings dictionary with the new unique strings + for (var i = 0; i < strings.length; i++) + CFFStrings.push(strings[i].join("")); + + // Parse the TopDict operator + parseAsToken(topDict); + + for (var p in font.map) { + log(p + "::" + font.get(p)); + } + } +}; + +// +var xhr = new XMLHttpRequest(); +xhr.open("GET", "titi.cff", false); +xhr.mozResponseType = xhr.responseType = "arraybuffer"; +xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; +xhr.send(null); +var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || + xhr.responseArrayBuffer || xhr.response; +var cff = new Type2Parser("titi.cff"); +cff.parse(new Stream(cffData)); + diff --git a/cffStandardStrings.js b/cffStandardStrings.js new file mode 100644 index 000000000..1604b5fdd --- /dev/null +++ b/cffStandardStrings.js @@ -0,0 +1,552 @@ +var CFFStrings = [ + ".notdef", + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quoteright", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "quoteleft", + "95 asciitilde", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + "exclamdown", + "cent", + "sterling", + "fraction", + "yen", + "florin", + "section", + "currency", + "quotesingle", + "quotedblleft", + "guillemotleft", + "guilsinglleft", + "guilsinglright", + "fi", + "fl", + "endash", + "dagger", + "daggerdbl", + "periodcentered", + "paragraph", + "bullet", + "quotesinglbase", + "quotedblbase", + "quotedblright", + "guillemotright", + "ellipsis", + "perthousand", + "questiondown", + "grave", + "acute", + "circumflex", + "tilde", + "macron", + "breve", + "dotaccent", + "dieresis", + "ring", + "cedilla", + "hungarumlaut", + "ogonek", + "caron", + "emdash", + "AE", + "ordfeminine", + "Lslash", + "Oslash", + "OE", + "ordmasculine", + "ae", + "dotlessi", + "lslash", + "oslash", + "oe", + "germandbls", + "onesuperior", + "logicalnot", + "mu", + "trademark", + "Eth", + "onehalf", + "plusminus", + "Thorn", + "onequarter", + "divide", + "brokenbar", + "degree", + "thorn", + "threequarters", + "twosuperior", + "registered", + "minus", + "eth", + "multiply", + "threesuperior", + "copyright", + "Aacute", + "Acircumflex", + "Adieresis", + "Agrave", + "Aring", + "Atilde", + "Ccedilla", + "Eacute", + "Ecircumflex", + "Edieresis", + "Egrave", + "Iacute", + "Icircumflex", + "Idieresis", + "Igrave", + "Ntilde", + "Oacute", + "Ocircumflex", + "Odieresis", + "Ograve", + "Otilde", + "Scaron", + "Uacute", + "Ucircumflex", + "Udieresis", + "Ugrave", + "Yacute", + "Ydieresis", + "Zcaron", + "aacute", + "acircumflex", + "adieresis", + "agrave", + "aring", + "atilde", + "ccedilla", + "eacute", + "ecircumflex", + "edieresis", + "egrave", + "iacute", + "icircumflex", + "idieresis", + "igrave", + "ntilde", + "oacute", + "ocircumflex", + "odieresis", + "ograve", + "otilde", + "scaron", + "uacute", + "ucircumflex", + "udieresis", + "ugrave", + "yacute", + "ydieresis", + "zcaron", + "exclamsmall", + "Hungarumlautsmall", + "dollaroldstyle", + "dollarsuperior", + "ampersandsmall", + "Acutesmall", + "parenleftsuperior", + "parenrightsuperior", + "266 ff", + "onedotenleader", + "zerooldstyle", + "oneoldstyle", + "twooldstyle", + "threeoldstyle", + "fouroldstyle", + "fiveoldstyle", + "sixoldstyle", + "sevenoldstyle", + "eightoldstyle", + "nineoldstyle", + "commasuperior", + "threequartersemdash", + "periodsuperior", + "questionsmall", + "asuperior", + "bsuperior", + "centsuperior", + "dsuperior", + "esuperior", + "isuperior", + "lsuperior", + "msuperior", + "nsuperior", + "osuperior", + "rsuperior", + "ssuperior", + "tsuperior", + "ff", + "ffi", + "ffl", + "parenleftinferior", + "parenrightinferior", + "Circumflexsmall", + "hyphensuperior", + "Gravesmall", + "Asmall", + "Bsmall", + "Csmall", + "Dsmall", + "Esmall", + "Fsmall", + "Gsmall", + "Hsmall", + "Ismall", + "Jsmall", + "Ksmall", + "Lsmall", + "Msmall", + "Nsmall", + "Osmall", + "Psmall", + "Qsmall", + "Rsmall", + "Ssmall", + "Tsmall", + "Usmall", + "Vsmall", + "Wsmall", + "Xsmall", + "Ysmall", + "Zsmall", + "colonmonetary", + "onefitted", + "rupiah", + "Tildesmall", + "exclamdownsmall", + "centoldstyle", + "Lslashsmall", + "Scaronsmall", + "Zcaronsmall", + "Dieresissmall", + "Brevesmall", + "Caronsmall", + "Dotaccentsmall", + "Macronsmall", + "figuredash", + "hypheninferior", + "Ogoneksmall", + "Ringsmall", + "Cedillasmall", + "questiondownsmall", + "oneeighth", + "threeeighths", + "fiveeighths", + "seveneighths", + "onethird", + "twothirds", + "zerosuperior", + "foursuperior", + "fivesuperior", + "sixsuperior", + "sevensuperior", + "eightsuperior", + "ninesuperior", + "zeroinferior", + "oneinferior", + "twoinferior", + "threeinferior", + "fourinferior", + "fiveinferior", + "sixinferior", + "seveninferior", + "eightinferior", + "nineinferior", + "centinferior", + "dollarinferior", + "periodinferior", + "commainferior", + "Agravesmall", + "Aacutesmall", + "Acircumflexsmall", + "Atildesmall", + "Adieresissmall", + "Aringsmall", + "AEsmall", + "Ccedillasmall", + "Egravesmall", + "Eacutesmall", + "Ecircumflexsmall", + "Edieresissmall", + "Igravesmall", + "Iacutesmall", + "Icircumflexsmall", + "Idieresissmall", + "Ethsmall", + "Ntildesmall", + "Ogravesmall", + "Oacutesmall", + "Ocircumflexsmall", + "Otildesmall", + "Odieresissmall", + "OEsmall", + "Oslashsmall", + "Ugravesmall", + "Uacutesmall", + "Ucircumflexsmall", + "Udieresissmall", + "Yacutesmall", + "Thornsmall", + "Ydieresissmall", + "001.000", + "001.001", + "001.002", + "001.003", + "Black", + "Bold", + "Book", + "Light", + "Medium", + "Regular", + "Roman", + "Semibold" +]; + +var CFFDictOps = { + "0": { + name: "version", + operand: "SID" + }, + "1": { + name: "Notice", + operand: "SID" + }, + "2": { + name: "FullName", + operand: "SID" + }, + "3": { + name: "FamilyName", + operand: "SID" + }, + "4": { + name: "Weight", + operand: "SID" + }, + "5": { + name: "FontBBox", + operand: [0, 0, 0, 0] + }, + "6": { + name: "BlueValues" + }, + "7": { + name: "OtherBlues" + }, + "8": { + name: "FamilyBlues" + }, + "9": { + name: "FamilyOtherBlues" + }, + "10": { + name: "StdHW" + }, + "11": { + name: "StdVW" + }, + "12": { + "0": { + name: "Copyright", + operand: "SID" + }, + "1": { + name: "IsFixedPitch", + operand: false + }, + "2": { + name: "ItalicAngle", + operand: 0 + }, + "3": { + name: "UnderlinePosition", + operand: -100 + }, + "4": { + name: "UnderlineThickness", + operand: 50 + }, + "5": { + name: "PaintType", + operand: 0 + }, + "6": { + name: "CharstringType", + operand: 2 + }, + "7": { + name: "FontMatrix", + operand: [0.001, 0, 0, 0.001, 0 ,0] + }, + "8": { + name: "StrokeWidth", + operand: 0 + }, + "9": { + name: "BlueScale" + }, + "10": { + name: "BlueShift" + }, + "11": { + name: "BlueFuzz" + }, + "12": { + name: "StemSnapH" + }, + "13": { + name: "StemSnapV" + }, + "14": { + name: "ForceBold" + }, + "17": { + name: "LanguageGroup" + }, + "18": { + name: "ExpansionFactor" + }, + "9": { + name: "initialRandomSeed" + }, + "20": { + name: "SyntheticBase", + operand: null + }, + "21": { + name: "PostScript", + operand: "SID" + }, + "22": { + name: "BaseFontName", + operand: "SID" + }, + "23": { + name: "BaseFontBlend", + operand: "delta" + } + }, + "13": { + name: "UniqueID", + operand: null + }, + "14": { + name: "XUID", + operand: [] + }, + "15": { + name: "charset", + operand: 0 + }, + "16": { + name: "Encoding", + operand: 0 + }, + "17": { + name: "CharStrings", + operand: null + }, + "18": { + name: "Private", + operand: "number number" + }, + "19": { + name: "Subrs" + }, + "20": { + name: "defaultWidthX" + }, + "21": { + name: "nominalWidthX" + } +}; diff --git a/pdf.js b/pdf.js index 15198c553..ea6a62f57 100644 --- a/pdf.js +++ b/pdf.js @@ -5,6 +5,7 @@ var ERRORS = 0, WARNINGS = 1, TODOS = 5; var verbosity = WARNINGS; function log(msg) { + msg = msg.toString ? msg.toString() : msg; if (console && console.log) console.log(msg); else if (print) @@ -78,7 +79,7 @@ var Stream = (function() { return ch; }, skip: function(n) { - if (!n) + if (!n && !IsNum(n)) n = 1; this.pos += n; }, @@ -2279,6 +2280,7 @@ var CanvasGraphics = (function() { var subtype = font.get("Subtype").name; switch (subtype) { case "Type1": + break; var fontDescriptor = font.get("FontDescriptor"); if (fontDescriptor.num) { var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); @@ -2292,6 +2294,7 @@ var CanvasGraphics = (function() { break; case "TrueType": + break; var fontDescriptor = font.get("FontDescriptor"); if (fontDescriptor.num) { var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); diff --git a/test.html b/test.html index ac568473a..5bd0ea119 100644 --- a/test.html +++ b/test.html @@ -5,6 +5,7 @@ + From 87b4cb85beddc2329148067bd633218993ded138 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 8 Jun 2011 20:37:25 +0200 Subject: [PATCH 12/72] Starts to decode type2 charStrings --- PDFFont.js | 232 +++++++++++++++++++++--------------------- cffStandardStrings.js | 149 ++++++++++++++++++++++++++- 2 files changed, 265 insertions(+), 116 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index b8ce52e07..755bea9b5 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -660,80 +660,19 @@ var Type1Font = function(aFontName, aFontFile) { } }; -var hack = false; -Type1Font.prototype = { - convert: function() { - var fontName = "TACTGM+NimbusRomNo9L-Medi"; - var fontData = null; - for (var font in Fonts.map) { - if (font == fontName) { - fontData = Fonts.get(font); - break; - } - } - if (!fontData || hack) - return; - hack = true; - var t1Only = [ - "callothersubr", - "closepath", - "dotsection", - "hsbw", - "hstem3", - "pop", - "sbw", - "seac", - "setcurrentpoint", - "vstem3" - ]; +/**************************************************************************/ - /* - * The sequence and form of a Type 2 charstring program may be - * represented as: - * w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar - * - */ - var t2CharStrings = new Dict(); - - var t1CharStrings = fontData.get("CharStrings"); - for (var key in t1CharStrings.map) { - var font = t1CharStrings.get(key); - var t2font = []; - - for (var i = 0; i < font.length; i++) { - var token = font[i]; - switch (token) { - case "hsbw": - var width = t2font.pop(); - var leftSidebearingPoint = t2font.pop(); - font.push(width); - break; - default: - if (t1Only.indexOf(token) != -1) { - log(token + " need convert!\n"); - throw new Error("Type1 Only token"); - } - t2font.push(token); - break; - } - } - log(key + "::" + t1CharStrings.get(key)); - log("type2::" + t2font); - } - } -}; - -function decodeType2DictData(aString, aDictionary) { +function decodeType2DictData(aString, aDictionary, aHack) { var data = []; var value = ""; var count = aString.length; for (var i = 0; i < count; i) { value = aString[i++]; - - if (value < 0) { + if (value <= 0) { + data.push(value); continue; } else if (value == 28) { value = aString[i++] << 8 | aString[i++]; @@ -743,11 +682,15 @@ function decodeType2DictData(aString, aDictionary) { aString[i++] << 8 | aString[i++]; } else if (value < 32) { + var oldValue = value; if (value == 12) { value = aDictionary["12"][aString[i++]]; } else { value = aDictionary[value]; } + if (!value) + throw new Error("This command number does not match anything : " + oldValue); + value = aHack ? value.name : value; } else if (value <= 246) { value = parseInt(value) - 139; } else if (value <= 250) { @@ -776,15 +719,30 @@ var Type2Parser = function(aFilePath) { }; function readIndex(aStream, aIsByte) { - var count = aStream.getByte() + aStream.getByte(); + var count = aStream.getByte() << 8 | aStream.getByte(); var offsize = aStream.getByte(); var offsets = []; for (var i = 0; i < count + 1; i++) { - var offset = 0; - for (var j = 0; j < offsize; j++) { - // XXX need to do some better code here - var byte = aStream.getByte(); - offset += byte; + switch (offsize) { + case 0: + offset = 0; + break; + case 1: + offset = aStream.getByte(); + break; + case 2: + offset = aStream.getByte() << 8 | aStream.getByte(); + break; + case 3: + offset = aStream.getByte() << 16 | aStream.getByte() << 8 | + aStream.getByte(); + break; + case 4: + offset = aStream.getByte() << 24 | aStream.getByte() << 16 | + aStream.getByte() << 8 | aStream.getByte(); + break; + default: + throw new Error("Unsupported offsize: " + offsize); } offsets.push(offset); } @@ -800,58 +758,73 @@ var Type2Parser = function(aFilePath) { var length = offsets[i + 1] - 1; for (var j = offset - 1; j < length; j++) data.push(aIsByte ? aStream.getByte() : aStream.getChar()); - dump("object at offset " + offset + " is: " + data); + //dump("object at offset " + offset + " is: " + data); objects.push(data); } return objects; }; - function parseAsToken(aArray) { - var objects = []; + function parseAsToken(aString, aDict) { + var decoded = decodeType2DictData(aString, aDict); - var count = aArray.length; + var stack = []; + var count = decoded.length; for (var i = 0; i < count; i++) { - var decoded = decodeType2DictData(aArray[i], CFFDictOps); - - var stack = []; - var count = decoded.length; - for (var i = 0; i < count; i++) { - var token = decoded[i]; - if (IsNum(token)) { - stack.push(token); - } else { - switch (token.operand) { - case "SID": - font.set(token.name, CFFStrings[stack.pop()]); - break; - case "number number": - font.set(token.name, { - size: stack.pop(), - offset: stack.pop() - }); - break; - case "boolean": + var token = decoded[i]; + if (IsNum(token)) { + stack.push(token); + } else { + switch (token.operand) { + case "SID": + font.set(token.name, CFFStrings[stack.pop()]); + break; + case "number number": + font.set(token.name, { + size: stack.pop(), + offset: stack.pop() + }); + break; + case "boolean": + font.set(token.name, stack.pop()); + break; + case "delta": + font.set(token.name, stack.pop()); + break; + default: + if (token.operand && token.operand.length) { + var array = []; + for (var j = 0; j < token.operand.length; j++) + array.push(stack.pop()); + font.set(token.name, array); + } else { font.set(token.name, stack.pop()); - break; - case "delta": - font.set(token.name, stack.pop()); - break; - default: - if (token.operand && token.operand.length) { - var array = []; - for (var j = 0; j < token.operand.length; j++) - array.push(stack.pop()); - font.set(token.name, array); - } else { - font.set(token.name, stack.pop()); - } - break; - } + } + break; } } } + }; - return objects; + + function readCharset(aStream, aCharStrings) { + var charset = {}; + + var format = aStream.getByte(); + if (format == 0) { + var count = aCharStrings.length - 1; + charset[".notdef"] = decodeType2DictData(aCharStrings[0], CFFDictCommands, true); + for (var i = 1; i < count + 1; i++) { + var sid = aStream.getByte() << 8 | aStream.getByte(); + var charString = decodeType2DictData(aCharStrings[i], CFFDictCommands, true); + charset[CFFStrings[sid]] = charString; + log(CFFStrings[sid] + "::" + charString); + } + } else if (format == 1) { + throw new Error("Format 1 charset are not supported"); + } else { + throw new Error("Invalid charset format"); + } + return charset; }; this.parse = function(aStream) { @@ -881,15 +854,44 @@ var Type2Parser = function(aFilePath) { CFFStrings.push(strings[i].join("")); // Parse the TopDict operator - parseAsToken(topDict); + var objects = []; + var count = topDict.length; + for (var i = 0; i < count; i++) + parseAsToken(topDict[i], CFFDictOps); - for (var p in font.map) { - log(p + "::" + font.get(p)); + for (var p in font.map) + dump(p + "::" + font.get(p)); + + // Read the Subr Index + dump("Reading Subr Index"); + var subrs = readIndex(aStream); + + // Read CharStrings Index + dump("Read CharStrings Index"); + var charStringsOffset = font.get("CharStrings"); + aStream.pos = charStringsOffset; + var charStrings = readIndex(aStream, true); + + + var charsetEntry = font.get("charset"); + if (charsetEntry == 0) { + throw new Error("Need to support CFFISOAdobeCharset"); + } else if (charsetEntry == 1) { + throw new Error("Need to support CFFExpert"); + } else if (charsetEntry == 2) { + throw new Error("Need to support CFFExpertSubsetCharset"); + } else { + aStream.pos = charsetEntry; + var charset = readCharset(aStream, charStrings); } + + // Read Encoding data + log("Reading encoding data"); } }; -// + +// XXX var xhr = new XMLHttpRequest(); xhr.open("GET", "titi.cff", false); xhr.mozResponseType = xhr.responseType = "arraybuffer"; diff --git a/cffStandardStrings.js b/cffStandardStrings.js index 1604b5fdd..ab71947ec 100644 --- a/cffStandardStrings.js +++ b/cffStandardStrings.js @@ -65,7 +65,7 @@ var CFFStrings = [ "asciicircum", "underscore", "quoteleft", - "95 asciitilde", + "a", "b", "c", "d", @@ -550,3 +550,150 @@ var CFFDictOps = { name: "nominalWidthX" } }; + +var CFFDictCommands = { + "1": { + name: "hstem" + }, + "3": { + name: "vstem" + }, + "4": { + name: "vmoveto" + }, + "5": { + name: "rlineto" + }, + "6": { + name: "hlineto" + }, + "7": { + name: "vlineto" + }, + "8": { + name: "rrcurveto" + }, + "10": { + name: "callsubr" + }, + "11": { + name: "return" + }, + "12": { + "3": { + name: "and" + }, + "4": { + name: "or" + }, + "5": { + name: "not" + }, + "9": { + name: "abs" + }, + "10": { + name: "add" + }, + "11": { + name: "div" + }, + "12": { + name: "sub" + }, + "14": { + name: "neg" + }, + "15": { + name: "eq" + }, + "18": { + name: "drop" + }, + "20": { + name: "put" + }, + "21": { + name: "get" + }, + "22": { + name: "ifelse" + }, + "23": { + name: "random" + }, + "24": { + name: "mul" + }, + "26": { + name: "sqrt" + }, + "27": { + name: "dup" + }, + "28": { + name: "exch" + }, + "29": { + name: "index" + }, + "30": { + name: "roll" + }, + "34": { + name: "hflex" + }, + "35": { + name: "flex" + }, + "36": { + name: "hflex1" + }, + "37": { + name: "flex1" + } + }, + "14": { + name: "endchar" + }, + "18": { + name: "hstemhm" + }, + "19": { + name: "hintmask" + }, + "20": { + name: "cntrmask" + }, + "21": { + name: "rmoveto" + }, + "22": { + name: "hmoveto" + }, + "23": { + name: "vstemhm" + }, + "24": { + name: "rcurveline" + }, + "25": { + name: "rlivecurve" + }, + "26": { + name: "vvcurveto" + }, + "27": { + name: "hhcurveto" + }, + "29": { + name: "callgsubr" + }, + "30": { + name: "vhcurveto" + }, + "31": { + name: "hvcurveto" + } +}; + From 714571dc807c91ce9555e64ac5f047a59b6260d4 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 9 Jun 2011 00:26:41 +0200 Subject: [PATCH 13/72] Support reading all parts of a Type2 file --- PDFFont.js | 89 +++++++++++++++++++++++++++++++++++-------- cffStandardStrings.js | 76 ++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 15 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 755bea9b5..a6363f940 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -664,7 +664,7 @@ var Type1Font = function(aFontName, aFontFile) { /**************************************************************************/ -function decodeType2DictData(aString, aDictionary, aHack) { +function decodeType2DictData(aString, aDictionary, aHack, aUseRealNumber) { var data = []; var value = ""; @@ -672,8 +672,6 @@ function decodeType2DictData(aString, aDictionary, aHack) { for (var i = 0; i < count; i) { value = aString[i++]; if (value <= 0) { - data.push(value); - continue; } else if (value == 28) { value = aString[i++] << 8 | aString[i++]; } else if (value == 29) { @@ -681,22 +679,72 @@ function decodeType2DictData(aString, aDictionary, aHack) { aString[i++] << 16 | aString[i++] << 8 | aString[i++]; + } else if (aUseRealNumber && value == 30) { + value = ""; + var done = false; + while (!done) { + var byte = aString[i++]; + var nibbles = [parseInt(byte / 16), parseInt(byte % 16)]; + for (var j = 0; j < nibbles.length; j++) { + var nibble = nibbles[j]; + dump(nibble + "\n"); + switch (nibble) { + case 0x0: + case 0x1: + case 0x2: + case 0x3: + case 0x4: + case 0x5: + case 0x6: + case 0x7: + case 0x8: + case 0x9: + value += nibble; + break; + case 0xA: + value += "."; + break; + case 0xB: + value += "E"; + break; + case 0xC: + value += "E-"; + break; + case 0xD: + break; + case 0xE: + value += "-"; + break; + case 0xF: + done = true; + break; + default: + error(nibble + " is unssuported"); + break; + } + } + }; + value = parseFloat(value); } else if (value < 32) { - var oldValue = value; if (value == 12) { value = aDictionary["12"][aString[i++]]; - } else { + } else if (aDictionary[value]) { value = aDictionary[value]; + } else { + error(value + " is an invalid command number"); } - if (!value) - throw new Error("This command number does not match anything : " + oldValue); value = aHack ? value.name : value; } else if (value <= 246) { value = parseInt(value) - 139; } else if (value <= 250) { - value = ((value - 247) * 256) + parseInt(aString[i++]) + 108; + value = ((value - 247) * 256) + aString[i++] + 108; } else if (value <= 254) { - value = -((value - 251) * 256) - parseInt(aString[i++]) - 108; + value = -((value - 251) * 256) - aString[i++] - 108; + } else if (value == 255) { + var byte = aString[i++]; + var high = (byte >> 1); + value = (byte - high) << 24 | aString[i++] << 16 | + aString[i++] << 8 | aString[i]; } else { throw new Error("Value should not be 255"); } @@ -780,8 +828,8 @@ var Type2Parser = function(aFilePath) { break; case "number number": font.set(token.name, { - size: stack.pop(), - offset: stack.pop() + offset: stack.pop(), + size: stack.pop() }); break; case "boolean": @@ -859,16 +907,19 @@ var Type2Parser = function(aFilePath) { for (var i = 0; i < count; i++) parseAsToken(topDict[i], CFFDictOps); + var topDictOffset = aStream.pos; + for (var p in font.map) dump(p + "::" + font.get(p)); - // Read the Subr Index + // Read the Global Subr Index that comes just after the Strings Index + // (cf. "The Compact Font Format Specification" Chapter 16) dump("Reading Subr Index"); var subrs = readIndex(aStream); // Read CharStrings Index - dump("Read CharStrings Index"); var charStringsOffset = font.get("CharStrings"); + dump("Read CharStrings Index (offset: " + charStringsOffset + ")"); aStream.pos = charStringsOffset; var charStrings = readIndex(aStream, true); @@ -885,8 +936,16 @@ var Type2Parser = function(aFilePath) { var charset = readCharset(aStream, charStrings); } - // Read Encoding data - log("Reading encoding data"); + // Reading Private Dict + var private = font.get("Private"); + log("Reading Private Dict (offset: " + private.offset + " size: " + private.size + ")"); + aStream.pos = private.offset; + + var privateDict = []; + for (var i = 0; i < private.size; i++) + privateDict.push(aStream.getByte()); + log(privateDict); + log(decodeType2DictData(privateDict, CFFDictPrivate, true, true)); } }; diff --git a/cffStandardStrings.js b/cffStandardStrings.js index ab71947ec..25301ee31 100644 --- a/cffStandardStrings.js +++ b/cffStandardStrings.js @@ -697,3 +697,79 @@ var CFFDictCommands = { } }; +var CFFDictPrivate = { + "6": { + name: "BluesValues", + operand: "delta" + }, + "7": { + name: "OtherBlues", + operand: "delta" + }, + "8": { + name: "FamilyBlues", + operand: "delta" + }, + "9": { + name: "FamilyOtherBlues", + operand: "delta" + }, + "10": { + name: "StdHW", + operand: null + }, + "11": { + name: "StdVW", + operand: null + }, + "12": { + "9": { + name: "BlueScale", + operand: 0.039625 + }, + "10": { + name: "BlueShift", + operand: 7 + }, + "11": { + name: "BlueFuzz", + operand: 1 + }, + "12": { + name: "StemSnapH", + operand: "delta" + }, + "13": { + name: "StemSnapV", + operand: "delta" + }, + "14": { + name: "ForceBold", + operand: "boolean" + }, + "17": { + name: "LanguageGroup", + operand: 0 + }, + "18": { + name: "ExpansionFactor", + operand: 0.06 + }, + "19": { + name: "initialRandomSeed", + operand: 0 + } + }, + "19": { + name: "Subrs", + operand: null + }, + "20": { + name: "defaultWidthX", + operand: 0 + }, + "21": { + name: "nominalWidthX", + operand: 0 + } +}; From fcc4ce9bec1a4f1b01138e9fc66e8b5ec3ad1b68 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 9 Jun 2011 02:37:06 +0200 Subject: [PATCH 14/72] Clean up some Type2 reader code for readibility --- PDFFont.js | 361 +++++++++++++++++++++++++----------------- cffStandardStrings.js | 209 +++++++----------------- 2 files changed, 274 insertions(+), 296 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index a6363f940..084174dad 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -33,6 +33,8 @@ var TrueTypeFont = function(aFontName, aFontFile) { }; + + var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = new Lexer(aAsciiStream); @@ -155,6 +157,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { "31": "hcurveto" }; + // XXX Is count++ the right thing to do? Is it not i++? function decodeCharString(aStream) { var start = Date.now(); var charString = []; @@ -184,8 +187,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } else { var byte = aStream.getByte(); var high = (byte >> 1); - value = (byte - high) * 16777216 + aStream.getByte() * 65536 + - aStream.getByte() * 256 * + aStream.getByte(); + value = (byte - high) << 24 | aStream.getByte() << 16 | + aStream.getByte() << 8 | aStream.getByte(); count += 4; } @@ -662,98 +665,230 @@ var Type1Font = function(aFontName, aFontFile) { -/**************************************************************************/ -function decodeType2DictData(aString, aDictionary, aHack, aUseRealNumber) { - var data = []; - var value = ""; + + + + + + + + +/** + * The Type2 reader code below is only used for debugging purpose since Type2 + * is only a CharString format and is never used directly as a Font file. + * + * So the code here is useful for dumping the data content of a .cff file in + * order to investigate the similarity between a Type1 CharString and a Type2 + * CharString. + */ + + +/** + * Build a charset by assigning the glyph name and the human readable form + * of the glyph data. + */ +function readCharset(aStream, aCharstrings) { + var charset = {}; + + var format = aStream.getByte(); + if (format == 0) { + charset[".notdef"] = readCharstringEncoding(aCharstrings[0]); + + var count = aCharstrings.length - 1; + for (var i = 1; i < count + 1; i++) { + var sid = aStream.getByte() << 8 | aStream.getByte(); + charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]); + log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); + } + } else if (format == 1) { + error("Charset Range are not supported"); + } else { + error("Invalid charset format"); + } + + return charset; +}; + +/** + * Take a Type2 binary charstring as input and transform it to a human + * readable representation as specified by the 'The Type 2 Charstring Format', + * chapter 3.1. + */ +function readCharstringEncoding(aString) { + var charstringTokens = []; + + var count = aString.length; + for (var i = 0; i < count; ) { + var value = aString[i++]; + var token = null; + + if (value < 0) { + continue; + } else if (value <= 11) { + token = CFFEncodingMap[value]; + } else if (value == 12) { + token = CFFEncodingMap[value][aString[i++]]; + } else if (value <= 18) { + token = CFFEncodingMap[value]; + } else if (value <= 20) { + var mask = aString[i++]; + token = CFFEncodingMap[value]; + } else if (value <= 27) { + token = CFFEncodingMap[value]; + } else if (value == 28) { + token = aString[i++] << 8 | aString[i++]; + } else if (value <= 31) { + token = CFFEncodingMap[value]; + } else if (value < 247) { + token = parseInt(value) - 139; + } else if (value < 251) { + token = ((value - 247) * 256) + aString[i++] + 108; + } else if (value < 255) { + token = -((value - 251) * 256) - aString[i++] - 108; + } else {// value == 255 + token = aString[i++] << 24 | aString[i++] << 16 | + aString[i++] << 8 | aString[i]; + } + + charstringTokens.push(token); + } + + return charstringTokens; +}; + + +/** + * Take a binary DICT Data as input and transform it into a human readable + * form as specified by 'The Compact Font Format Specification', chapter 5. + */ +function readFontDictData(aString, aMap) { + var fontDictDataTokens = []; + var count = aString.length; for (var i = 0; i < count; i) { - value = aString[i++]; - if (value <= 0) { + var value = aString[i++]; + var token = null; + + if (value == 12) { + token = aMap[value][aString[i++]]; } else if (value == 28) { - value = aString[i++] << 8 | aString[i++]; + token = aString[i++] << 8 | aString[i++]; } else if (value == 29) { - value = aString[i++] << 24 | + token = aString[i++] << 24 | aString[i++] << 16 | aString[i++] << 8 | aString[i++]; - } else if (aUseRealNumber && value == 30) { - value = ""; - var done = false; - while (!done) { + } else if (value == 30) { + token = ""; + var parsed = false; + while (!parsed) { var byte = aString[i++]; + var nibbles = [parseInt(byte / 16), parseInt(byte % 16)]; for (var j = 0; j < nibbles.length; j++) { var nibble = nibbles[j]; - dump(nibble + "\n"); switch (nibble) { - case 0x0: - case 0x1: - case 0x2: - case 0x3: - case 0x4: - case 0x5: - case 0x6: - case 0x7: - case 0x8: - case 0x9: - value += nibble; - break; case 0xA: - value += "."; + token += "."; break; case 0xB: - value += "E"; + token += "E"; break; case 0xC: - value += "E-"; + token += "E-"; break; case 0xD: break; case 0xE: - value += "-"; + token += "-"; break; case 0xF: - done = true; + parsed = true; break; default: - error(nibble + " is unssuported"); + token += nibble; break; } } }; - value = parseFloat(value); - } else if (value < 32) { - if (value == 12) { - value = aDictionary["12"][aString[i++]]; - } else if (aDictionary[value]) { - value = aDictionary[value]; - } else { - error(value + " is an invalid command number"); - } - value = aHack ? value.name : value; + token = parseFloat(token); + } else if (value <= 31) { + token = aMap[value]; } else if (value <= 246) { - value = parseInt(value) - 139; + token = parseInt(value) - 139; } else if (value <= 250) { - value = ((value - 247) * 256) + aString[i++] + 108; + token = ((value - 247) * 256) + aString[i++] + 108; } else if (value <= 254) { - value = -((value - 251) * 256) - aString[i++] - 108; + token = -((value - 251) * 256) - aString[i++] - 108; } else if (value == 255) { - var byte = aString[i++]; - var high = (byte >> 1); - value = (byte - high) << 24 | aString[i++] << 16 | - aString[i++] << 8 | aString[i]; - } else { - throw new Error("Value should not be 255"); + error("255 is not a valid DICT command"); } - data.push(value); + fontDictDataTokens.push(token); } - return data; -} + return fontDictDataTokens; +}; + + +/** + * Take a stream as input and return an array of objects. + * In CFF an INDEX is a structure with the following format: + * { + * count: 2 bytes (Number of objects stored in INDEX), + * offsize: 1 byte (Offset array element size), + * offset: [count + 1] bytes (Offsets array), + * data: - (Objects data) + * } + * + * More explanation are given in the 'CFF Font Format Specification', + * chapter 5. + */ +function readFontIndexData(aStream, aIsByte) { + var count = aStream.getByte() << 8 | aStream.getByte(); + var offsize = aStream.getByte(); + + function getNextOffset() { + switch (offsize) { + case 0: + return 0; + case 1: + return aStream.getByte(); + case 2: + return aStream.getByte() << 8 | aStream.getByte(); + case 3: + return aStream.getByte() << 16 | aStream.getByte() << 8 | + aStream.getByte(); + case 4: + return aStream.getByte() << 24 | aStream.getByte() << 16 | + aStream.getByte() << 8 | aStream.getByte(); + } + }; + + var offsets = []; + for (var i = 0; i < count + 1; i++) + offsets.push(getNextOffset()); + + log("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")"); + + // Now extract the objects + var relativeOffset = aStream.pos; + var objects = []; + for (var i = 0; i < count; i++) { + var offset = offsets[i]; + aStream.pos = relativeOffset + offset - 1; + + var data = []; + var length = offsets[i + 1] - 1; + for (var j = offset - 1; j < length; j++) + data.push(aIsByte ? aStream.getByte() : aStream.getChar()); + objects.push(data); + } + + return objects; +}; var Type2Parser = function(aFilePath) { var font = new Dict(); @@ -766,54 +901,9 @@ var Type2Parser = function(aFilePath) { log(aStr); }; - function readIndex(aStream, aIsByte) { - var count = aStream.getByte() << 8 | aStream.getByte(); - var offsize = aStream.getByte(); - var offsets = []; - for (var i = 0; i < count + 1; i++) { - switch (offsize) { - case 0: - offset = 0; - break; - case 1: - offset = aStream.getByte(); - break; - case 2: - offset = aStream.getByte() << 8 | aStream.getByte(); - break; - case 3: - offset = aStream.getByte() << 16 | aStream.getByte() << 8 | - aStream.getByte(); - break; - case 4: - offset = aStream.getByte() << 24 | aStream.getByte() << 16 | - aStream.getByte() << 8 | aStream.getByte(); - break; - default: - throw new Error("Unsupported offsize: " + offsize); - } - offsets.push(offset); - } - - dump("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")"); - var dataOffset = aStream.pos; - var objects = []; - for (var i = 0; i < count; i++) { - var offset = offsets[i]; - aStream.pos = dataOffset + offset - 1; - - var data = []; - var length = offsets[i + 1] - 1; - for (var j = offset - 1; j < length; j++) - data.push(aIsByte ? aStream.getByte() : aStream.getChar()); - //dump("object at offset " + offset + " is: " + data); - objects.push(data); - } - return objects; - }; - - function parseAsToken(aString, aDict) { - var decoded = decodeType2DictData(aString, aDict); + function parseAsToken(aString, aMap) { + var decoded = readFontDictData(aString, aMap); + log(decoded); var stack = []; var count = decoded.length; @@ -853,28 +943,6 @@ var Type2Parser = function(aFilePath) { } }; - - function readCharset(aStream, aCharStrings) { - var charset = {}; - - var format = aStream.getByte(); - if (format == 0) { - var count = aCharStrings.length - 1; - charset[".notdef"] = decodeType2DictData(aCharStrings[0], CFFDictCommands, true); - for (var i = 1; i < count + 1; i++) { - var sid = aStream.getByte() << 8 | aStream.getByte(); - var charString = decodeType2DictData(aCharStrings[i], CFFDictCommands, true); - charset[CFFStrings[sid]] = charString; - log(CFFStrings[sid] + "::" + charString); - } - } else if (format == 1) { - throw new Error("Format 1 charset are not supported"); - } else { - throw new Error("Invalid charset format"); - } - return charset; - }; - this.parse = function(aStream) { font.set("major", aStream.getByte()); font.set("minor", aStream.getByte()); @@ -886,16 +954,15 @@ var Type2Parser = function(aFilePath) { // Read the NAME Index dump("Reading Index: Names"); - font.set("Names", readIndex(aStream)); - dump(font.get("Names")); + font.set("Names", readFontIndexData(aStream)); // Read the Top Dict Index dump("Reading Index: TopDict"); - var topDict = readIndex(aStream, true); + var topDict = readFontIndexData(aStream, true); // Read the String Index dump("Reading Index: Strings"); - var strings = readIndex(aStream); + var strings = readFontIndexData(aStream); // Fill up the Strings dictionary with the new unique strings for (var i = 0; i < strings.length; i++) @@ -905,23 +972,31 @@ var Type2Parser = function(aFilePath) { var objects = []; var count = topDict.length; for (var i = 0; i < count; i++) - parseAsToken(topDict[i], CFFDictOps); - - var topDictOffset = aStream.pos; - - for (var p in font.map) - dump(p + "::" + font.get(p)); + parseAsToken(topDict[i], CFFDictDataMap); // Read the Global Subr Index that comes just after the Strings Index // (cf. "The Compact Font Format Specification" Chapter 16) - dump("Reading Subr Index"); - var subrs = readIndex(aStream); + dump("Reading Global Subr Index"); + var subrs = readFontIndexData(aStream); + + // Reading Private Dict + var private = font.get("Private"); + log("Reading Private Dict (offset: " + private.offset + " size: " + private.size + ")"); + aStream.pos = private.offset; + + var privateDict = []; + for (var i = 0; i < private.size; i++) + privateDict.push(aStream.getByte()); + parseAsToken(privateDict, CFFDictPrivateDataMap); + + for (var p in font.map) + dump(p + "::" + font.get(p)); // Read CharStrings Index var charStringsOffset = font.get("CharStrings"); dump("Read CharStrings Index (offset: " + charStringsOffset + ")"); aStream.pos = charStringsOffset; - var charStrings = readIndex(aStream, true); + var charStrings = readFontIndexData(aStream, true); var charsetEntry = font.get("charset"); @@ -936,16 +1011,6 @@ var Type2Parser = function(aFilePath) { var charset = readCharset(aStream, charStrings); } - // Reading Private Dict - var private = font.get("Private"); - log("Reading Private Dict (offset: " + private.offset + " size: " + private.size + ")"); - aStream.pos = private.offset; - - var privateDict = []; - for (var i = 0; i < private.size; i++) - privateDict.push(aStream.getByte()); - log(privateDict); - log(decodeType2DictData(privateDict, CFFDictPrivate, true, true)); } }; diff --git a/cffStandardStrings.js b/cffStandardStrings.js index 25301ee31..585c1157e 100644 --- a/cffStandardStrings.js +++ b/cffStandardStrings.js @@ -392,7 +392,66 @@ var CFFStrings = [ "Semibold" ]; -var CFFDictOps = { +var CFFEncodingMap = { + "0": "-reserved-", + "1": "hstem", + "2": "-reserved-", + "3": "vstem", + "4": "vmoveto", + "5": "rlineto", + "6": "hlineto", + "7": "vlineto", + "8": "rrcurveto", + "9": "-reserved-", + "10": "callsubr", + "11": "return", + "12": { + "3": "and", + "4": "or", + "5": "not", + "9": "abs", + "10": "add", + "11": "div", + "12": "sub", + "14": "neg", + "15": "eq", + "18": "drop", + "20": "put", + "21": "get", + "22": "ifelse", + "23": "random", + "24": "mul", + "26": "sqrt", + "27": "dup", + "28": "exch", + "29": "index", + "30": "roll", + "34": "hflex", + "35": "flex", + "36": "hflex1", + "37": "flex1" + }, + "13": "-reserved-", + "14": "endchar", + "15": "-reserved-", + "16": "-reserved-", + "17": "-reserved-", + "18": "hstemhm", + "19": "hintmask", + "20": "cntrmask", + "21": "rmoveto", + "22": "hmoveto", + "23": "vstemhm", + "24": "rcurveline", + "25": "rlivecurve", + "26": "vvcurveto", + "27": "hhcurveto", + "29": "callgsubr", + "30": "vhcurveto", + "31": "hvcurveto" +}; + +var CFFDictDataMap = { "0": { name: "version", operand: "SID" @@ -551,153 +610,7 @@ var CFFDictOps = { } }; -var CFFDictCommands = { - "1": { - name: "hstem" - }, - "3": { - name: "vstem" - }, - "4": { - name: "vmoveto" - }, - "5": { - name: "rlineto" - }, - "6": { - name: "hlineto" - }, - "7": { - name: "vlineto" - }, - "8": { - name: "rrcurveto" - }, - "10": { - name: "callsubr" - }, - "11": { - name: "return" - }, - "12": { - "3": { - name: "and" - }, - "4": { - name: "or" - }, - "5": { - name: "not" - }, - "9": { - name: "abs" - }, - "10": { - name: "add" - }, - "11": { - name: "div" - }, - "12": { - name: "sub" - }, - "14": { - name: "neg" - }, - "15": { - name: "eq" - }, - "18": { - name: "drop" - }, - "20": { - name: "put" - }, - "21": { - name: "get" - }, - "22": { - name: "ifelse" - }, - "23": { - name: "random" - }, - "24": { - name: "mul" - }, - "26": { - name: "sqrt" - }, - "27": { - name: "dup" - }, - "28": { - name: "exch" - }, - "29": { - name: "index" - }, - "30": { - name: "roll" - }, - "34": { - name: "hflex" - }, - "35": { - name: "flex" - }, - "36": { - name: "hflex1" - }, - "37": { - name: "flex1" - } - }, - "14": { - name: "endchar" - }, - "18": { - name: "hstemhm" - }, - "19": { - name: "hintmask" - }, - "20": { - name: "cntrmask" - }, - "21": { - name: "rmoveto" - }, - "22": { - name: "hmoveto" - }, - "23": { - name: "vstemhm" - }, - "24": { - name: "rcurveline" - }, - "25": { - name: "rlivecurve" - }, - "26": { - name: "vvcurveto" - }, - "27": { - name: "hhcurveto" - }, - "29": { - name: "callgsubr" - }, - "30": { - name: "vhcurveto" - }, - "31": { - name: "hvcurveto" - } -}; - -var CFFDictPrivate = { +var CFFDictPrivateDataMap = { "6": { name: "BluesValues", operand: "delta" From 74abf984d5823f56179ca8f266d3eb320003aeaf Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 01:20:00 +0200 Subject: [PATCH 15/72] Add the beginning of a Type1 to Type2 charstring converter --- PDFFont.js | 358 ++++++++++++++++++++++++++++++++++++++++++++++++----- pdf.js | 2 +- test.html | 1 + 3 files changed, 328 insertions(+), 33 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 084174dad..5685e6874 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,9 +1,15 @@ - -/* - * This dictionary hold the decoded fonts +/** + * This dictionary holds decoded fonts data. */ var Fonts = new Dict(); +/** + * This simple object keep a trace of the fonts that have already been decoded + * by storing a map between the name given by the PDF and the name gather from + * the font (aka the PostScript code of the font itself for Type1 font). + */ +var _Fonts = {}; + var Base64Encoder = { encode: function(aData) { @@ -16,12 +22,10 @@ var Base64Encoder = { } }; - - - var TrueTypeFont = function(aFontName, aFontFile) { - if (Fonts.get(aFontName)) + if (_Fonts[aFontName]) return; + _Fonts[aFontName] = true; //log("Loading a TrueType font: " + aFontName); var fontData = Base64Encoder.encode(aFontFile); @@ -36,7 +40,16 @@ var TrueTypeFont = function(aFontName, aFontFile) { var Type1Parser = function(aAsciiStream, aBinaryStream) { - var lexer = new Lexer(aAsciiStream); + if (IsStream(aAsciiStream)) { + var lexer = new Lexer(aAsciiStream); + } else { + var lexer = { + __data__: aAsciiStream.slice(), + getObj: function() { + return this.__data__.shift(); + } + } + } // Turn on this flag for additional debugging logs var debug = false; @@ -46,6 +59,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { log(aData); }; + // Hold the fontName as declared inside the /FontName postscript directive + // XXX This is a hack but at the moment I need it to map the name declared + // in the PDF and the name in the PS code. + var fontName = ""; + /* * Parse a whole Type1 font stream (from the first segment to the last) * assuming the 'eexec' block is binary data and fill up the 'Fonts' @@ -55,6 +73,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { this.parse = function() { if (!debug) { while (!processNextToken()) {}; + return fontName; } else { // debug mode is used to debug postcript processing setTimeout(function() { @@ -62,7 +81,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { self.parse(); }, 0); } - } + }; /* * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence @@ -87,7 +106,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var end = Date.now(); dump("Time to decrypt string of length " + count + " is " + (end - start)); return decryptedString.slice(aDiscardNumber); - } + }; /* * CharStrings are encoded following the the CharString Encoding sequence @@ -98,7 +117,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * CharString Number Encoding: * A CharString byte containing the values from 32 through 255 inclusive * indicate an integer. These values are decoded in four ranges. - * + * * 1. A CharString byte containing a value, v, between 32 and 246 inclusive, * indicate the integer v - 139. Thus, the integer values from -107 through * 107 inclusive may be encoded in single byte. @@ -110,7 +129,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * 3. A CharString byte containing a value, v, between 251 and 254 inclusive, * indicates an integer involving the next byte, w, according to the formula: * -[(v - 251) * 256] - w - 108 - * + * * 4. A CharString containing the value 255 indicates that the next 4 bytes * are a two complement signed integer. The first of these bytes contains the * highest order bits, the second byte contains the next higher order bits @@ -157,7 +176,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { "31": "hcurveto" }; - // XXX Is count++ the right thing to do? Is it not i++? function decodeCharString(aStream) { var start = Date.now(); var charString = []; @@ -167,12 +185,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { for (var i = 0; i < count; i++) { value = aStream.getByte(); - if (value < 0) { - continue; - } else if (value < 32) { + if (value < 32) { if (value == 12) { value = charStringDictionary["12"][aStream.getByte()]; - count++; + i++; } else { value = charStringDictionary[value]; } @@ -180,16 +196,16 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { value = parseInt(value) - 139; } else if (value <= 250) { value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108; - count++; + i++; } else if (value <= 254) { value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108; - count++; + i++; } else { var byte = aStream.getByte(); var high = (byte >> 1); value = (byte - high) << 24 | aStream.getByte() << 16 | aStream.getByte() << 8 | aStream.getByte(); - count += 4; + i += 4; } charString.push(value); @@ -228,6 +244,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return this.__innerStack__[this.__innerStack__.length - 1]; }, + get: function(aIndex) { + return this.__innerStack__[aIndex]; + }, + dump: function() { log("=== Start Dumping operandStack ==="); var str = []; @@ -345,7 +365,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return lexer.getObj(); }; - /* * Get the next token from the executionStack and process it. * Actually the function does not process the third segment of a Type1 font @@ -531,7 +550,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var font = operandStack.pop(); var key = operandStack.pop(); dump("definefont " + font + " with key: " + key); + + // The key will be the identifier to recognize this font + fontName = key; Fonts.set(key, font); + operandStack.push(font); break; @@ -600,6 +623,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var decodedCharString = decodeCharString(charStream); dump("decodedCharString: " + decodedCharString); operandStack.push(decodedCharString); + // boolean indicating if the operation is a success or not operandStack.push(true); break; @@ -630,36 +654,305 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } break; } - } else if (obj){ + } else if (obj) { dump("unknow: " + obj); operandStack.push(obj); + } else { // The End! + operandStack.dump(); + return true; } return false; } + + function aggregateCommand(aCommand) { + var command = aCommand; + switch (command) { + case "hstem": + case "vstem": + break; + + case "rrcurveto": + var stack = [operandStack.pop(), operandStack.pop(), + operandStack.pop(), operandStack.pop(), + operandStack.pop(), operandStack.pop()]; + var next = true; + while (next) { + var op = operandStack.peek(); + if (op == "rrcurveto") { + operandStack.pop(); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + } else { + next = false; + } + } + break; + + case "hlineto": + case "vlineto": + var last = command; + var stack = [operandStack.pop()]; + var next = true; + while (next) { + var op = operandStack.peek(); + if (op == "vlineto" && last == "hlineto") { + operandStack.pop(); + stack.push(operandStack.pop()); + } else if (op == "hlineto" && last == "vlineto") { + operandStack.pop(); + stack.push(operandStack.pop()); + } else if (op == "rlineto" && command == "hlineto") { + operandStack.pop(); + var x = stack.pop(); + operandStack.push(0); + operandStack.push(x); + command = "rlineto"; + } else if (op == "rlineto" && command == "vlineto") { + operandStack.pop(); + operandStack.push(0); + command = "rlineto"; + } else { + next = false; + } + last = op; + } + break; + + case "rlineto": + var stack = [operandStack.pop(), operandStack.pop()]; + var next = true; + while (next) { + var op = operandStack.peek(); + if (op == "rlineto") { + operandStack.pop(); + stack.push(operandStack.pop()); + stack.push(operandStack.pop()); + } else if (op == "hlineto") { + operandStack.pop(); + stack.push(0); + stack.push(operandStack.pop()); + } else if (op == "vlineto") { + operandStack.pop(); + stack.push(operandStack.pop()); + stack.push(0); + } else { + next= false; + } + } + break; + } + + while (stack.length) + operandStack.push(stack.pop()); + operandStack.push(command); + }; + + + /* + * Flatten the commands by interpreting the postscript code and replacing + * every 'callsubr', 'callothersubr' by the real commands. + * At the moment OtherSubrs are not fully supported and only otherSubrs 0-4 + * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format', + * chapter 8. + */ + this.flattenCharstring = function(aCharString, aDefaultWidth, aNominalWidth, aSubrs) { + var leftSidebearing = 0; + var lastPoint = 0; + while (true) { + var obj = nextInStack(); + if (IsBool(obj) || IsInt(obj) || IsNum(obj)) { + dump("Value: " + obj); + operandStack.push(obj); + } else if (IsString(obj)) { + dump("String: " + obj); + switch (obj) { + case "hsbw": + var charWidthVector = operandStack.pop(); + leftSidebearing = operandStack.pop(); + + if (charWidthVector != aDefaultWidth) + operandStack.push(charWidthVector - aNominalWidth); + break; + + case "setcurrentpoint": + case "dotsection": + case "seac": + case "sbw": + error(obj + " parsing is not implemented (yet)"); + break; + + case "vstem3": + operandStack.push("vstem"); + break; + + case "vstem": + log(obj + " is not converted (yet?)"); + operandStack.push("vstem"); + break; + + case "closepath": + case "return": + break; + + case "hlineto": + case "vlineto": + case "rlineto": + case "rrcurveto": + aggregateCommand(obj); + break; + + case "rmoveto": + var dy = operandStack.pop(); + var dx = operandStack.pop(); + + if (leftSidebearing) { + dx += leftSidebearing; + leftSidebearing = 0; + } + + operandStack.push(dx); + operandStack.push(dy); + operandStack.push("rmoveto"); + break; + + case "hstem": + case "hstem3": + var dy = operandStack.pop(); + var y = operandStack.pop(); + if (operandStack.peek() == "hstem" || + operandStack.peek() == "hstem3") + operandStack.pop(); + + operandStack.push(y - lastPoint); + lastPoint = y + dy; + + operandStack.push(dy); + operandStack.push("hstem"); + break; + + case "callsubr": + var index = operandStack.pop(); + executionStack.push(aSubrs[index].slice()); + break; + + case "callothersubr": + log("callothersubr"); + // XXX need to be improved + var index = operandStack.pop(); + var count = operandStack.pop(); + var data = operandStack.pop(); + operandStack.push(3); + operandStack.push("callothersubr"); + break; + case "endchar": + operandStack.push("endchar"); + return operandStack.__innerStack__.slice(); + case "pop": + operandStack.pop(); + break; + default: + operandStack.push(obj); + break; + } + } + } + } }; var type1hack = false; var Type1Font = function(aFontName, aFontFile) { + if (_Fonts[aFontName]) + return; + _Fonts[aFontName] = true; + // All Type1 font program should begin with the comment %! if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); if (!type1hack) { - type1hack= true; - var start = Date.now(); + type1hack = true; + var start = Date.now(); - var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); - var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); + var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); + var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); - this.parser = new Type1Parser(ASCIIStream, binaryStream); - this.parser.parse(); + this.parser = new Type1Parser(ASCIIStream, binaryStream); + var fontName = this.parser.parse(); + this.convertToOTF(fontName); + } +}; - var end = Date.now(); - //log("Time to parse font is:" + (end - start)); +Type1Font.prototype = { + convertToOTF: function(aFontName) { + var font = Fonts.get(aFontName); - this.convert(); + var private = font.get("Private"); + var subrs = private.get("Subrs"); + var otherSubrs = private.get("OtherSubrs"); + var charstrings = font.get("CharStrings") + + // Try to get the most used glyph width + var widths = {}; + for (var glyph in charstrings.map) { + var glyphData = charstrings.get(glyph); + var glyphWidth = glyphData[1]; + if (widths[glyphWidth]) + widths[glyphWidth]++; + else + widths[glyphWidth] = 1; + } + + var defaultWidth = 0; + var used = 0; + for (var width in widths) { + if (widths[width] > used) { + defaultWidth = width; + used = widths[width]; + } + } + log("defaultWidth to used: " + defaultWidth); + + var maxNegDistance = 0; + var maxPosDistance = 0; + for (var width in widths) { + var diff = width - defaultWidth; + if (diff < 0 && diff < maxNegDistance) { + maxNegDistance = diff; + } else if (diff > 0 && diff > maxPosDistance) { + maxPosDistance = diff; + } + } + + var nominalWidth = parseInt(defaultWidth) + (parseInt(maxPosDistance) + parseInt(maxNegDistance)) / 2; + log("nominalWidth to used: " + nominalWidth); + log("Hack nonimal:" + (nominalWidth = 615)); + + for (var glyph in charstrings.map) { + if (glyph == ".notdef") + continue; + + var glyphData = charstrings.get(glyph); + var parser = new Type1Parser(glyphData); + log("=================================== " + glyph + " =============================="); + log(charstrings.get(glyph)); + log(parser.flattenCharstring("A", defaultWidth, nominalWidth, subrs)); + log(validationData[glyph]); + } + + + /* + log(charStrings.get("A")); + log(newCharStrings.get("A")); + log(validationData["A"]); + */ + var end = Date.now(); + //log("Time to parse font is:" + (end - start)); } }; @@ -1016,6 +1309,7 @@ var Type2Parser = function(aFilePath) { // XXX +/* var xhr = new XMLHttpRequest(); xhr.open("GET", "titi.cff", false); xhr.mozResponseType = xhr.responseType = "arraybuffer"; @@ -1025,4 +1319,4 @@ var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || xhr.responseArrayBuffer || xhr.response; var cff = new Type2Parser("titi.cff"); cff.parse(new Stream(cffData)); - +*/ diff --git a/pdf.js b/pdf.js index ea6a62f57..ef8a18861 100644 --- a/pdf.js +++ b/pdf.js @@ -2280,9 +2280,9 @@ var CanvasGraphics = (function() { var subtype = font.get("Subtype").name; switch (subtype) { case "Type1": - break; var fontDescriptor = font.get("FontDescriptor"); if (fontDescriptor.num) { + // XXX fetchIfRef looks expensive var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); font = new Type1Font(fontDescriptor.get("FontName").name, fontFile); diff --git a/test.html b/test.html index 5bd0ea119..83d48741e 100644 --- a/test.html +++ b/test.html @@ -6,6 +6,7 @@ + From 3834c9be08096c98fe229d58a5e3faa2a0d3368d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 02:07:41 +0200 Subject: [PATCH 16/72] Move the Type2 utils to a new file and create a Stack object --- PDFFont.js | 630 ++++++++---------------------------------------- PDFFontUtils.js | 352 +++++++++++++++++++++++++++ 2 files changed, 457 insertions(+), 525 deletions(-) create mode 100644 PDFFontUtils.js diff --git a/PDFFont.js b/PDFFont.js index 5685e6874..407dd8b10 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -11,6 +11,47 @@ var Fonts = new Dict(); var _Fonts = {}; +var Stack = function() { + var innerStack = []; + + this.push = function(aOperand) { + innerStack.push(aOperand); + }; + + this.pop = function() { + if (!this.count()) + throw new Error("stackunderflow"); + return innerStack.pop(); + }; + + this.peek = function() { + if (!this.count()) + return null; + return innerStack[innerStack.length - 1]; + }; + + this.get = function(aIndex) { + return innerStack[aIndex]; + }; + + this.clear = function() { + innerStack = []; + }; + + this.count = function() { + return innerStack.length; + }; + + this.dump = function() { + for (var i = 0; i < this.length; i++) + log(innerStack[i]); + }; + + this.clone = function() { + return innerStack.slice(); + }; +}; + var Base64Encoder = { encode: function(aData) { var str = []; @@ -36,20 +77,8 @@ var TrueTypeFont = function(aFontName, aFontFile) { document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); }; - - - var Type1Parser = function(aAsciiStream, aBinaryStream) { - if (IsStream(aAsciiStream)) { - var lexer = new Lexer(aAsciiStream); - } else { - var lexer = { - __data__: aAsciiStream.slice(), - getObj: function() { - return this.__data__.shift(); - } - } - } + var lexer = aAsciiStream ? new Lexer(aAsciiStream) : null; // Turn on this flag for additional debugging logs var debug = false; @@ -225,41 +254,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * operator returns one or more results, it does so by pushing them on the * operand stack. */ - var operandStack = { - __innerStack__: [], - - push: function(aOperand) { - this.__innerStack__.push(aOperand); - }, - - pop: function() { - if (!this.length) - throw new Error("stackunderflow"); - return this.__innerStack__.pop(); - }, - - peek: function() { - if (!this.length) - return null; - return this.__innerStack__[this.__innerStack__.length - 1]; - }, - - get: function(aIndex) { - return this.__innerStack__[aIndex]; - }, - - dump: function() { - log("=== Start Dumping operandStack ==="); - var str = []; - for (var i = 0; i < this.length; i++) - log(this.__innerStack__[i]); - log("=== End Dumping operandStack ==="); - }, - - get length() { - return this.__innerStack__.length; - } - }; + var operandStack = new Stack(); // Flag indicating if the topmost operand of the operandStack is an array var operandIsArray = 0; @@ -277,42 +272,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { globalDict = new Dict(), userDict = new Dict(); - var dictionaryStack = { - __innerStack__: [systemDict, globalDict, userDict], - - push: function(aDictionary) { - this.__innerStack__.push(aDictionary); - }, - - pop: function() { - if (this.__innerStack__.length == 3) - return null; - - return this.__innerStack__.pop(); - }, - - peek: function() { - if (!this.length) - return null; - return this.__innerStack__[this.__innerStack__.length - 1]; - }, - - get: function(aIndex) { - return this.__innerStack__[aIndex]; - }, - - get length() { - return this.__innerStack__.length; - }, - - dump: function() { - log("=== Start Dumping dictionaryStack ==="); - var str = []; - for (var i = 0; i < this.length; i++) - log(this.__innerStack__[i]); - log("=== End Dumping dictionaryStack ==="); - }, - }; + var dictionaryStack = new Stack(); + dictionaryStack.push(systemDict); + dictionaryStack.push(globalDict); + dictionaryStack.push(userDict); /* * The execution stack holds executable objects (mainly procedures and files) @@ -324,31 +287,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * object off the execution stack and resumes executing the suspended object * beneath it. */ - var executionStack = { - __innerStack__: [], - - push: function(aProcedure) { - this.__innerStack__.push(aProcedure); - }, - - pop: function() { - return this.__innerStack__.pop(); - }, - - peek: function() { - if (!this.length) - return null; - return this.__innerStack__[this.__innerStack__.length - 1]; - }, - - get: function(aIndex) { - return this.__innerStack__[aIndex]; - }, - - get length() { - return this.__innerStack__.length; - } - }; + var executionStack = new Stack(); /* * Return the next token in the execution stack @@ -637,7 +576,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { default: var command = null; if (IsCmd(obj)) { - for (var i = 0; i < dictionaryStack.length; i++) { + for (var i = 0; i < dictionaryStack.count(); i++) { if (command = dictionaryStack.get(i).get(obj.cmd)) { dump("found in dictionnary for " + obj.cmd + " command: " + command); executionStack.push(command.slice()); @@ -760,7 +699,11 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format', * chapter 8. */ - this.flattenCharstring = function(aCharString, aDefaultWidth, aNominalWidth, aSubrs) { + this.flattenCharstring = function(aCharstring, aDefaultWidth, aNominalWidth, aSubrs) { + operandStack.clear(); + executionStack.clear(); + executionStack.push(aCharstring); + var leftSidebearing = 0; var lastPoint = 0; while (true) { @@ -791,7 +734,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { break; case "vstem": - log(obj + " is not converted (yet?)"); + //log(obj + " is not converted (yet?)"); operandStack.push("vstem"); break; @@ -841,20 +784,24 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { break; case "callothersubr": - log("callothersubr"); // XXX need to be improved var index = operandStack.pop(); var count = operandStack.pop(); var data = operandStack.pop(); + if (index != 3) + log("callothersubr for index: " + index); operandStack.push(3); operandStack.push("callothersubr"); break; + case "endchar": operandStack.push("endchar"); - return operandStack.__innerStack__.slice(); + return operandStack.clone(); + case "pop": operandStack.pop(); break; + default: operandStack.push(obj); break; @@ -885,41 +832,31 @@ var Type1Font = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); this.convertToOTF(fontName); + var end = Date.now(); + log("Time to parse font is:" + (end - start)); } }; Type1Font.prototype = { - convertToOTF: function(aFontName) { - var font = Fonts.get(aFontName); - - var private = font.get("Private"); - var subrs = private.get("Subrs"); - var otherSubrs = private.get("OtherSubrs"); - var charstrings = font.get("CharStrings") - - // Try to get the most used glyph width - var widths = {}; - for (var glyph in charstrings.map) { - var glyphData = charstrings.get(glyph); - var glyphWidth = glyphData[1]; - if (widths[glyphWidth]) - widths[glyphWidth]++; - else - widths[glyphWidth] = 1; - } - + getDefaultWidths: function(aCharstrings) { var defaultWidth = 0; - var used = 0; - for (var width in widths) { - if (widths[width] > used) { - defaultWidth = width; - used = widths[width]; - } - } - log("defaultWidth to used: " + defaultWidth); + var defaultUsedCount = 0; - var maxNegDistance = 0; - var maxPosDistance = 0; + var widths = {}; + for (var glyph in aCharstrings.map) { + var width = aCharstrings.get(glyph)[1]; + var usedCount = (widths[width] || 0) + 1; + + if (usedCount > defaultUsedCount) { + defaultUsedCount = usedCount; + defaultWidth = width; + } + + widths[width] = usedCount; + } + defaultWidth = parseInt(defaultWidth); + + var maxNegDistance = 0, maxPosDistance = 0; for (var width in widths) { var diff = width - defaultWidth; if (diff < 0 && diff < maxNegDistance) { @@ -929,394 +866,37 @@ Type1Font.prototype = { } } - var nominalWidth = parseInt(defaultWidth) + (parseInt(maxPosDistance) + parseInt(maxNegDistance)) / 2; + return { + default: defaultWidth, + nominal: defaultWidth + (maxPosDistance + maxNegDistance) / 2 + }; + }, + + convertToOTF: function(aFontName) { + var font = Fonts.get(aFontName); + + var charstrings = font.get("CharStrings") + var defaultWidths = this.getDefaultWidths(charstrings); + var defaultWidth = defaultWidths.default; + var nominalWidth = defaultWidths.nominal; + + log("defaultWidth to used: " + defaultWidth); log("nominalWidth to used: " + nominalWidth); log("Hack nonimal:" + (nominalWidth = 615)); + + var glyphs = {}; + var subrs = font.get("Private").get("Subrs"); + var parser = new Type1Parser(); for (var glyph in charstrings.map) { - if (glyph == ".notdef") - continue; + var charstring = charstrings.get(glyph); + glyphs[glyph] = parser.flattenCharstring(charstring, defaultWidth, nominalWidth, subrs); - var glyphData = charstrings.get(glyph); - var parser = new Type1Parser(glyphData); - log("=================================== " + glyph + " =============================="); - log(charstrings.get(glyph)); - log(parser.flattenCharstring("A", defaultWidth, nominalWidth, subrs)); - log(validationData[glyph]); + //log("=================================== " + glyph + " =============================="); + //log(charstrings.get(glyph)); + //log(flattenedCharstring); + //log(validationData[glyph]); } - - - /* - log(charStrings.get("A")); - log(newCharStrings.get("A")); - log(validationData["A"]); - */ - var end = Date.now(); - //log("Time to parse font is:" + (end - start)); } }; - - - - - - - - - - - - -/** - * The Type2 reader code below is only used for debugging purpose since Type2 - * is only a CharString format and is never used directly as a Font file. - * - * So the code here is useful for dumping the data content of a .cff file in - * order to investigate the similarity between a Type1 CharString and a Type2 - * CharString. - */ - - -/** - * Build a charset by assigning the glyph name and the human readable form - * of the glyph data. - */ -function readCharset(aStream, aCharstrings) { - var charset = {}; - - var format = aStream.getByte(); - if (format == 0) { - charset[".notdef"] = readCharstringEncoding(aCharstrings[0]); - - var count = aCharstrings.length - 1; - for (var i = 1; i < count + 1; i++) { - var sid = aStream.getByte() << 8 | aStream.getByte(); - charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]); - log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); - } - } else if (format == 1) { - error("Charset Range are not supported"); - } else { - error("Invalid charset format"); - } - - return charset; -}; - -/** - * Take a Type2 binary charstring as input and transform it to a human - * readable representation as specified by the 'The Type 2 Charstring Format', - * chapter 3.1. - */ -function readCharstringEncoding(aString) { - var charstringTokens = []; - - var count = aString.length; - for (var i = 0; i < count; ) { - var value = aString[i++]; - var token = null; - - if (value < 0) { - continue; - } else if (value <= 11) { - token = CFFEncodingMap[value]; - } else if (value == 12) { - token = CFFEncodingMap[value][aString[i++]]; - } else if (value <= 18) { - token = CFFEncodingMap[value]; - } else if (value <= 20) { - var mask = aString[i++]; - token = CFFEncodingMap[value]; - } else if (value <= 27) { - token = CFFEncodingMap[value]; - } else if (value == 28) { - token = aString[i++] << 8 | aString[i++]; - } else if (value <= 31) { - token = CFFEncodingMap[value]; - } else if (value < 247) { - token = parseInt(value) - 139; - } else if (value < 251) { - token = ((value - 247) * 256) + aString[i++] + 108; - } else if (value < 255) { - token = -((value - 251) * 256) - aString[i++] - 108; - } else {// value == 255 - token = aString[i++] << 24 | aString[i++] << 16 | - aString[i++] << 8 | aString[i]; - } - - charstringTokens.push(token); - } - - return charstringTokens; -}; - - -/** - * Take a binary DICT Data as input and transform it into a human readable - * form as specified by 'The Compact Font Format Specification', chapter 5. - */ -function readFontDictData(aString, aMap) { - var fontDictDataTokens = []; - - var count = aString.length; - for (var i = 0; i < count; i) { - var value = aString[i++]; - var token = null; - - if (value == 12) { - token = aMap[value][aString[i++]]; - } else if (value == 28) { - token = aString[i++] << 8 | aString[i++]; - } else if (value == 29) { - token = aString[i++] << 24 | - aString[i++] << 16 | - aString[i++] << 8 | - aString[i++]; - } else if (value == 30) { - token = ""; - var parsed = false; - while (!parsed) { - var byte = aString[i++]; - - var nibbles = [parseInt(byte / 16), parseInt(byte % 16)]; - for (var j = 0; j < nibbles.length; j++) { - var nibble = nibbles[j]; - switch (nibble) { - case 0xA: - token += "."; - break; - case 0xB: - token += "E"; - break; - case 0xC: - token += "E-"; - break; - case 0xD: - break; - case 0xE: - token += "-"; - break; - case 0xF: - parsed = true; - break; - default: - token += nibble; - break; - } - } - }; - token = parseFloat(token); - } else if (value <= 31) { - token = aMap[value]; - } else if (value <= 246) { - token = parseInt(value) - 139; - } else if (value <= 250) { - token = ((value - 247) * 256) + aString[i++] + 108; - } else if (value <= 254) { - token = -((value - 251) * 256) - aString[i++] - 108; - } else if (value == 255) { - error("255 is not a valid DICT command"); - } - - fontDictDataTokens.push(token); - } - - return fontDictDataTokens; -}; - - -/** - * Take a stream as input and return an array of objects. - * In CFF an INDEX is a structure with the following format: - * { - * count: 2 bytes (Number of objects stored in INDEX), - * offsize: 1 byte (Offset array element size), - * offset: [count + 1] bytes (Offsets array), - * data: - (Objects data) - * } - * - * More explanation are given in the 'CFF Font Format Specification', - * chapter 5. - */ -function readFontIndexData(aStream, aIsByte) { - var count = aStream.getByte() << 8 | aStream.getByte(); - var offsize = aStream.getByte(); - - function getNextOffset() { - switch (offsize) { - case 0: - return 0; - case 1: - return aStream.getByte(); - case 2: - return aStream.getByte() << 8 | aStream.getByte(); - case 3: - return aStream.getByte() << 16 | aStream.getByte() << 8 | - aStream.getByte(); - case 4: - return aStream.getByte() << 24 | aStream.getByte() << 16 | - aStream.getByte() << 8 | aStream.getByte(); - } - }; - - var offsets = []; - for (var i = 0; i < count + 1; i++) - offsets.push(getNextOffset()); - - log("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")"); - - // Now extract the objects - var relativeOffset = aStream.pos; - var objects = []; - for (var i = 0; i < count; i++) { - var offset = offsets[i]; - aStream.pos = relativeOffset + offset - 1; - - var data = []; - var length = offsets[i + 1] - 1; - for (var j = offset - 1; j < length; j++) - data.push(aIsByte ? aStream.getByte() : aStream.getChar()); - objects.push(data); - } - - return objects; -}; - -var Type2Parser = function(aFilePath) { - var font = new Dict(); - - // Turn on this flag for additional debugging logs - var debug = true; - - function dump(aStr) { - if (debug) - log(aStr); - }; - - function parseAsToken(aString, aMap) { - var decoded = readFontDictData(aString, aMap); - log(decoded); - - var stack = []; - var count = decoded.length; - for (var i = 0; i < count; i++) { - var token = decoded[i]; - if (IsNum(token)) { - stack.push(token); - } else { - switch (token.operand) { - case "SID": - font.set(token.name, CFFStrings[stack.pop()]); - break; - case "number number": - font.set(token.name, { - offset: stack.pop(), - size: stack.pop() - }); - break; - case "boolean": - font.set(token.name, stack.pop()); - break; - case "delta": - font.set(token.name, stack.pop()); - break; - default: - if (token.operand && token.operand.length) { - var array = []; - for (var j = 0; j < token.operand.length; j++) - array.push(stack.pop()); - font.set(token.name, array); - } else { - font.set(token.name, stack.pop()); - } - break; - } - } - } - }; - - this.parse = function(aStream) { - font.set("major", aStream.getByte()); - font.set("minor", aStream.getByte()); - font.set("hdrSize", aStream.getByte()); - font.set("offsize", aStream.getByte()); - - // Move the cursor after the header - aStream.skip(font.get("hdrSize") - aStream.pos); - - // Read the NAME Index - dump("Reading Index: Names"); - font.set("Names", readFontIndexData(aStream)); - - // Read the Top Dict Index - dump("Reading Index: TopDict"); - var topDict = readFontIndexData(aStream, true); - - // Read the String Index - dump("Reading Index: Strings"); - var strings = readFontIndexData(aStream); - - // Fill up the Strings dictionary with the new unique strings - for (var i = 0; i < strings.length; i++) - CFFStrings.push(strings[i].join("")); - - // Parse the TopDict operator - var objects = []; - var count = topDict.length; - for (var i = 0; i < count; i++) - parseAsToken(topDict[i], CFFDictDataMap); - - // Read the Global Subr Index that comes just after the Strings Index - // (cf. "The Compact Font Format Specification" Chapter 16) - dump("Reading Global Subr Index"); - var subrs = readFontIndexData(aStream); - - // Reading Private Dict - var private = font.get("Private"); - log("Reading Private Dict (offset: " + private.offset + " size: " + private.size + ")"); - aStream.pos = private.offset; - - var privateDict = []; - for (var i = 0; i < private.size; i++) - privateDict.push(aStream.getByte()); - parseAsToken(privateDict, CFFDictPrivateDataMap); - - for (var p in font.map) - dump(p + "::" + font.get(p)); - - // Read CharStrings Index - var charStringsOffset = font.get("CharStrings"); - dump("Read CharStrings Index (offset: " + charStringsOffset + ")"); - aStream.pos = charStringsOffset; - var charStrings = readFontIndexData(aStream, true); - - - var charsetEntry = font.get("charset"); - if (charsetEntry == 0) { - throw new Error("Need to support CFFISOAdobeCharset"); - } else if (charsetEntry == 1) { - throw new Error("Need to support CFFExpert"); - } else if (charsetEntry == 2) { - throw new Error("Need to support CFFExpertSubsetCharset"); - } else { - aStream.pos = charsetEntry; - var charset = readCharset(aStream, charStrings); - } - - } -}; - - -// XXX -/* -var xhr = new XMLHttpRequest(); -xhr.open("GET", "titi.cff", false); -xhr.mozResponseType = xhr.responseType = "arraybuffer"; -xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; -xhr.send(null); -var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || - xhr.responseArrayBuffer || xhr.response; -var cff = new Type2Parser("titi.cff"); -cff.parse(new Stream(cffData)); -*/ diff --git a/PDFFontUtils.js b/PDFFontUtils.js new file mode 100644 index 000000000..3ae87f4ab --- /dev/null +++ b/PDFFontUtils.js @@ -0,0 +1,352 @@ +/** + * The Type2 reader code below is only used for debugging purpose since Type2 + * is only a CharString format and is never used directly as a Font file. + * + * So the code here is useful for dumping the data content of a .cff file in + * order to investigate the similarity between a Type1 CharString and a Type2 + * CharString. + */ + + +/** + * Build a charset by assigning the glyph name and the human readable form + * of the glyph data. + */ +function readCharset(aStream, aCharstrings) { + var charset = {}; + + var format = aStream.getByte(); + if (format == 0) { + charset[".notdef"] = readCharstringEncoding(aCharstrings[0]); + + var count = aCharstrings.length - 1; + for (var i = 1; i < count + 1; i++) { + var sid = aStream.getByte() << 8 | aStream.getByte(); + charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]); + log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); + } + } else if (format == 1) { + error("Charset Range are not supported"); + } else { + error("Invalid charset format"); + } + + return charset; +}; + +/** + * Take a Type2 binary charstring as input and transform it to a human + * readable representation as specified by the 'The Type 2 Charstring Format', + * chapter 3.1. + */ +function readCharstringEncoding(aString) { + var charstringTokens = []; + + var count = aString.length; + for (var i = 0; i < count; ) { + var value = aString[i++]; + var token = null; + + if (value < 0) { + continue; + } else if (value <= 11) { + token = CFFEncodingMap[value]; + } else if (value == 12) { + token = CFFEncodingMap[value][aString[i++]]; + } else if (value <= 18) { + token = CFFEncodingMap[value]; + } else if (value <= 20) { + var mask = aString[i++]; + token = CFFEncodingMap[value]; + } else if (value <= 27) { + token = CFFEncodingMap[value]; + } else if (value == 28) { + token = aString[i++] << 8 | aString[i++]; + } else if (value <= 31) { + token = CFFEncodingMap[value]; + } else if (value < 247) { + token = parseInt(value) - 139; + } else if (value < 251) { + token = ((value - 247) * 256) + aString[i++] + 108; + } else if (value < 255) { + token = -((value - 251) * 256) - aString[i++] - 108; + } else {// value == 255 + token = aString[i++] << 24 | aString[i++] << 16 | + aString[i++] << 8 | aString[i]; + } + + charstringTokens.push(token); + } + + return charstringTokens; +}; + + +/** + * Take a binary DICT Data as input and transform it into a human readable + * form as specified by 'The Compact Font Format Specification', chapter 5. + */ +function readFontDictData(aString, aMap) { + var fontDictDataTokens = []; + + var count = aString.length; + for (var i = 0; i < count; i) { + var value = aString[i++]; + var token = null; + + if (value == 12) { + token = aMap[value][aString[i++]]; + } else if (value == 28) { + token = aString[i++] << 8 | aString[i++]; + } else if (value == 29) { + token = aString[i++] << 24 | + aString[i++] << 16 | + aString[i++] << 8 | + aString[i++]; + } else if (value == 30) { + token = ""; + var parsed = false; + while (!parsed) { + var byte = aString[i++]; + + var nibbles = [parseInt(byte / 16), parseInt(byte % 16)]; + for (var j = 0; j < nibbles.length; j++) { + var nibble = nibbles[j]; + switch (nibble) { + case 0xA: + token += "."; + break; + case 0xB: + token += "E"; + break; + case 0xC: + token += "E-"; + break; + case 0xD: + break; + case 0xE: + token += "-"; + break; + case 0xF: + parsed = true; + break; + default: + token += nibble; + break; + } + } + }; + token = parseFloat(token); + } else if (value <= 31) { + token = aMap[value]; + } else if (value <= 246) { + token = parseInt(value) - 139; + } else if (value <= 250) { + token = ((value - 247) * 256) + aString[i++] + 108; + } else if (value <= 254) { + token = -((value - 251) * 256) - aString[i++] - 108; + } else if (value == 255) { + error("255 is not a valid DICT command"); + } + + fontDictDataTokens.push(token); + } + + return fontDictDataTokens; +}; + + +/** + * Take a stream as input and return an array of objects. + * In CFF an INDEX is a structure with the following format: + * { + * count: 2 bytes (Number of objects stored in INDEX), + * offsize: 1 byte (Offset array element size), + * offset: [count + 1] bytes (Offsets array), + * data: - (Objects data) + * } + * + * More explanation are given in the 'CFF Font Format Specification', + * chapter 5. + */ +function readFontIndexData(aStream, aIsByte) { + var count = aStream.getByte() << 8 | aStream.getByte(); + var offsize = aStream.getByte(); + + function getNextOffset() { + switch (offsize) { + case 0: + return 0; + case 1: + return aStream.getByte(); + case 2: + return aStream.getByte() << 8 | aStream.getByte(); + case 3: + return aStream.getByte() << 16 | aStream.getByte() << 8 | + aStream.getByte(); + case 4: + return aStream.getByte() << 24 | aStream.getByte() << 16 | + aStream.getByte() << 8 | aStream.getByte(); + } + }; + + var offsets = []; + for (var i = 0; i < count + 1; i++) + offsets.push(getNextOffset()); + + log("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")"); + + // Now extract the objects + var relativeOffset = aStream.pos; + var objects = []; + for (var i = 0; i < count; i++) { + var offset = offsets[i]; + aStream.pos = relativeOffset + offset - 1; + + var data = []; + var length = offsets[i + 1] - 1; + for (var j = offset - 1; j < length; j++) + data.push(aIsByte ? aStream.getByte() : aStream.getChar()); + objects.push(data); + } + + return objects; +}; + +var Type2Parser = function(aFilePath) { + var font = new Dict(); + + // Turn on this flag for additional debugging logs + var debug = true; + + function dump(aStr) { + if (debug) + log(aStr); + }; + + function parseAsToken(aString, aMap) { + var decoded = readFontDictData(aString, aMap); + log(decoded); + + var stack = []; + var count = decoded.length; + for (var i = 0; i < count; i++) { + var token = decoded[i]; + if (IsNum(token)) { + stack.push(token); + } else { + switch (token.operand) { + case "SID": + font.set(token.name, CFFStrings[stack.pop()]); + break; + case "number number": + font.set(token.name, { + offset: stack.pop(), + size: stack.pop() + }); + break; + case "boolean": + font.set(token.name, stack.pop()); + break; + case "delta": + font.set(token.name, stack.pop()); + break; + default: + if (token.operand && token.operand.length) { + var array = []; + for (var j = 0; j < token.operand.length; j++) + array.push(stack.pop()); + font.set(token.name, array); + } else { + font.set(token.name, stack.pop()); + } + break; + } + } + } + }; + + this.parse = function(aStream) { + font.set("major", aStream.getByte()); + font.set("minor", aStream.getByte()); + font.set("hdrSize", aStream.getByte()); + font.set("offsize", aStream.getByte()); + + // Move the cursor after the header + aStream.skip(font.get("hdrSize") - aStream.pos); + + // Read the NAME Index + dump("Reading Index: Names"); + font.set("Names", readFontIndexData(aStream)); + + // Read the Top Dict Index + dump("Reading Index: TopDict"); + var topDict = readFontIndexData(aStream, true); + + // Read the String Index + dump("Reading Index: Strings"); + var strings = readFontIndexData(aStream); + + // Fill up the Strings dictionary with the new unique strings + for (var i = 0; i < strings.length; i++) + CFFStrings.push(strings[i].join("")); + + // Parse the TopDict operator + var objects = []; + var count = topDict.length; + for (var i = 0; i < count; i++) + parseAsToken(topDict[i], CFFDictDataMap); + + // Read the Global Subr Index that comes just after the Strings Index + // (cf. "The Compact Font Format Specification" Chapter 16) + dump("Reading Global Subr Index"); + var subrs = readFontIndexData(aStream); + + // Reading Private Dict + var private = font.get("Private"); + log("Reading Private Dict (offset: " + private.offset + " size: " + private.size + ")"); + aStream.pos = private.offset; + + var privateDict = []; + for (var i = 0; i < private.size; i++) + privateDict.push(aStream.getByte()); + parseAsToken(privateDict, CFFDictPrivateDataMap); + + for (var p in font.map) + dump(p + "::" + font.get(p)); + + // Read CharStrings Index + var charStringsOffset = font.get("CharStrings"); + dump("Read CharStrings Index (offset: " + charStringsOffset + ")"); + aStream.pos = charStringsOffset; + var charStrings = readFontIndexData(aStream, true); + + + var charsetEntry = font.get("charset"); + if (charsetEntry == 0) { + throw new Error("Need to support CFFISOAdobeCharset"); + } else if (charsetEntry == 1) { + throw new Error("Need to support CFFExpert"); + } else if (charsetEntry == 2) { + throw new Error("Need to support CFFExpertSubsetCharset"); + } else { + aStream.pos = charsetEntry; + var charset = readCharset(aStream, charStrings); + } + + } +}; + + +// XXX +/* +var xhr = new XMLHttpRequest(); +xhr.open("GET", "titi.cff", false); +xhr.mozResponseType = xhr.responseType = "arraybuffer"; +xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; +xhr.send(null); +var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || + xhr.responseArrayBuffer || xhr.response; +var cff = new Type2Parser("titi.cff"); +cff.parse(new Stream(cffData)); +*/ From 18b2e3c6f874b850e3e8974df43c2df8dcba43e6 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 06:12:59 +0200 Subject: [PATCH 17/72] Add the beginning of a CFF encoder --- PDFFont.js | 209 +++++++++++++++++++++++++++++++++++++++++++++++- PDFFontUtils.js | 19 ++--- t2data.js | 60 ++++++++++++++ test.html | 1 + 4 files changed, 279 insertions(+), 10 deletions(-) create mode 100644 t2data.js diff --git a/PDFFont.js b/PDFFont.js index 407dd8b10..c001c825e 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -57,7 +57,7 @@ var Base64Encoder = { var str = []; var count = aData.length; for (var i = 0; i < count; i++) - str.push(aData.getChar()); + str.push(aData.getChar ? aData.getChar : String.fromCharCode(aData[i])); return window.btoa(str.join("")); } @@ -897,6 +897,213 @@ Type1Font.prototype = { //log(flattenedCharstring); //log(validationData[glyph]); } + + // Create a CFF font data + var cff = new Uint8Array(20000); + var currentOffset = 0; + + // Font header (major version, minor version, header size, offset size) + var header = [0x01, 0x00, 0x04, 0x04]; + currentOffset += header.length; + cff.set(header); + + // Names Index + var nameIndex = this.createCFFIndexHeader([aFontName]); + cff.set(nameIndex, currentOffset); + currentOffset += nameIndex.length; + + //Top Dict Index + var topDictIndex = [ + 0x00, 0x01, 0x01, 0x01, 0x29, + 248, 27, 0, // version + 248, 28, 1, // Notice + 248, 29, 2, // FullName + 248, 30, 3, // FamilyName + 248, 20, 4, // Weigth + 82, 251, 98, 250, 105, 249, 72, 5, // FontBBox + 248, 136, 15, // charset (offset: 500) + 28, 0, 0, 16, // Encoding (offset: 600) + 248, 236, 17, // CharStrings + 28, 0, 55, 28, 15, 160, 18 // Private (offset: 4000) + ]; + cff.set(topDictIndex, currentOffset); + currentOffset += topDictIndex.length; + + // Strings Index + var stringsIndex = [ + 0x00, 0x04, 0x01, + 0x01, 0x05, 0x06, 0x07, 0x08, + 0x31, 0x2E, 0x030, 0x35, // 1.05 + 0x2B, // + + 0x28, // ( + 0x29 // ) + ]; + cff.set(stringsIndex, currentOffset); + currentOffset += stringsIndex.length; + + + // Global Subrs Index + var globalSubrsIndex = [ + 0x00, 0x00, 0x00 + ]; + cff.set(globalSubrsIndex, currentOffset); + currentOffset += globalSubrsIndex.length; + + // Fill the space between this and the charset by '1' + var empty = new Array(500 - currentOffset); + for (var i = 0; i < empty.length; i++) + empty[i] = 0x01; + cff.set(empty, currentOffset); + currentOffset += empty.length; + + //Declare the letter 'C' + var charset = [ + 0x00, 0x00, 0x42 + ]; + cff.set(charset, currentOffset); + currentOffset += charset.length; + + // Fill the space between this and the charstrings data by '1' + var empty = new Array(600 - currentOffset); + for (var i = 0; i < empty.length; i++) + empty[i] = 0x01; + cff.set(empty, currentOffset); + currentOffset += empty.length; + + + // Encode the glyph and add it to the FUX + var charStringsIndex = [ + 0x00, 0x02, 0x01, 0x01, 0x03, 0x05, + 0x40, 0x0E, + 0xAF, 0x0E + ]; + cff.set(charStringsIndex, currentOffset); + currentOffset += charStringsIndex.length; + + // Fill the space between this and the private dict data by '1' + var empty = new Array(4000 - currentOffset); + for (var i = 0; i < empty.length; i++) + empty[i] = 0x01; + cff.set(empty, currentOffset); + currentOffset += empty.length; + + // Private Data + var privateData = [ + 248, 136, 20, + 248, 251, 21, + 119, 159, 248, 97, 159, 247, 87, 159, 6, + 30, 10, 3, 150, 37, 255, 12, 9, + 139, 12, 10, + 172, 10, + 172, 150, 143, 146, 150, 146, 12, 12, + 247, 32, 11, + 247, 10, 161, 147, 154, 150, 143, 12, 13, + 139, 12, 14, + 28, 0, 55, 19 + ]; + cff.set(privateData, currentOffset); + currentOffset += privateData.length; + + // Dump shit at the end of the file + var shit = [ + 0x00, 0x01, 0x01, 0x01, + 0x13, 0x5D, 0x65, 0x64, + 0x5E, 0x5B, 0xAF, 0x66, + 0xBA, 0xBB, 0xB1, 0xB0, + 0xB9, 0xBA, 0x65, 0xB2, + 0x5C, 0x1F, 0x0B + ]; + cff.set(shit, currentOffset); + currentOffset += shit.length; + + var file = new Uint8Array(cff, 0, currentOffset); + var parser = new Type2Parser(); + parser.parse(new Stream(file)); + + var file64 = Base64Encoder.encode(file); + console.log(file64); + + var data = []; + for (var i = 0; i < currentOffset; i++) + data.push(cff[i]); + log(data); + }, + + createCFFIndexHeader: function(aObjects, aIsByte) { + var data = []; + + // First 2 bytes contains the number of objects contained into this index + var count = aObjects.length; + var bytes = integerToBytes(count, 2); + for (var i = 0; i < bytes.length; i++) + data.push(bytes[i]); + + // Next byte contains the offset size use to reference object in the file + // Actually we're using 0x04 to be sure to be able to store everything + // without thinking of it while coding. + data.push(0x04); + + // Add another offset after this one because we need a new offset + var relativeOffset = 1; + for (var i = 0; i < count + 1; i++) { + var bytes = integerToBytes(relativeOffset, 4); + for (var j = 0; j < bytes.length; j++) + data.push(bytes[j]); + + if (aObjects[i]) + relativeOffset += aObjects[i].length; + } + + for (var i =0; i < count; i++) { + for (var j = 0; j < aObjects[i].length; j++) + data.push(aIsByte ? aObjects[i][j] : aObjects[i][j].charCodeAt(0)); + } + return data; } }; +function integerToBytes(aValue, aBytesCount) { + var bytes = []; + + do { + bytes[--aBytesCount] = (aValue & 0xFF); + aValue = aValue >> 8; + } while (aBytesCount && aValue > 0); + + return bytes; +}; + +function encodeNumber(aValue) { + var x = 0; + if (aValue >= -107 && aValue <= 107) { + return [aValue + 139]; + } else if (aValue >= 108 && aValue <= 1131) { + x = aValue - 108; + return [ + integerToBytes(x / 256 + 247, 1), + x % 256 + ]; + } else if (aValue >= -1131 && aValue <= -108) { + x = Math.abs(aValue) - 108; + return [ + integerToBytes(x / 256 + 251, 1), + x % 256 + ]; + } else if (aValue >= -32768 && aValue <= 32767) { + return [ + 28, + integerToBytes(aValue >> 8, 1), + integerToBytes(aValue, 1) + ]; + } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { + return [ + 0xFF, + integerToBytes(aValue >> 24, 1), + integerToBytes(aValue >> 16, 1), + integerToBytes(aValue >> 8, 1), + integerToBytes(aValue, 1) + ]; + } else { + error("Value: " + aValue + " is not allowed"); + } +} diff --git a/PDFFontUtils.js b/PDFFontUtils.js index 3ae87f4ab..02c588217 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -22,6 +22,7 @@ function readCharset(aStream, aCharstrings) { var count = aCharstrings.length - 1; for (var i = 1; i < count + 1; i++) { var sid = aStream.getByte() << 8 | aStream.getByte(); + log(sid); charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]); log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); } @@ -282,10 +283,12 @@ var Type2Parser = function(aFilePath) { // Read the Top Dict Index dump("Reading Index: TopDict"); var topDict = readFontIndexData(aStream, true); + log(topDict); // Read the String Index dump("Reading Index: Strings"); var strings = readFontIndexData(aStream); + log(strings); // Fill up the Strings dictionary with the new unique strings for (var i = 0; i < strings.length; i++) @@ -310,6 +313,7 @@ var Type2Parser = function(aFilePath) { var privateDict = []; for (var i = 0; i < private.size; i++) privateDict.push(aStream.getByte()); + dump("private:" + privateDict); parseAsToken(privateDict, CFFDictPrivateDataMap); for (var p in font.map) @@ -321,25 +325,23 @@ var Type2Parser = function(aFilePath) { aStream.pos = charStringsOffset; var charStrings = readFontIndexData(aStream, true); - + // Read Charset + dump("Read Charset for " + charStrings.length + " glyphs"); var charsetEntry = font.get("charset"); if (charsetEntry == 0) { - throw new Error("Need to support CFFISOAdobeCharset"); + error("Need to support CFFISOAdobeCharset"); } else if (charsetEntry == 1) { - throw new Error("Need to support CFFExpert"); + error("Need to support CFFExpert"); } else if (charsetEntry == 2) { - throw new Error("Need to support CFFExpertSubsetCharset"); + error("Need to support CFFExpertSubsetCharset"); } else { aStream.pos = charsetEntry; var charset = readCharset(aStream, charStrings); } - } }; -// XXX -/* var xhr = new XMLHttpRequest(); xhr.open("GET", "titi.cff", false); xhr.mozResponseType = xhr.responseType = "arraybuffer"; @@ -348,5 +350,4 @@ xhr.send(null); var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || xhr.responseArrayBuffer || xhr.response; var cff = new Type2Parser("titi.cff"); -cff.parse(new Stream(cffData)); -*/ +//cff.parse(new Stream(cffData)); diff --git a/t2data.js b/t2data.js new file mode 100644 index 000000000..6e5f52344 --- /dev/null +++ b/t2data.js @@ -0,0 +1,60 @@ +var validationData = { +hyphen: ["-282","171","116","hstem","44","243","vstem","287","287","rmoveto","-243","-116","243","hlineto","endchar"], +period: ["-365","-13","20","hstem","125","156","rmoveto","-107","callsubr","endchar"], +zero: ["-13","28","645","28","hstem","24","158","136","158","vstem","250","688","rmoveto","-65","0","-66","-44","-41","-72","-32","-55","-22","-94","0","-83","rrcurveto","-206","94","-147","133","129","96","149","201","200","-97","151","-129","vhcurveto","68","-466","rmoveto","0","-59","-8","-78","-9","-27","-11","-29","-16","-14","-23","0","rrcurveto","-50","-19","59","148","hvcurveto","229","vlineto","151","19","58","48","49","20","-62","-147","vhcurveto","endchar"], +one: ["0","24","529","26","89","20","hstem","186","148","vstem","317","688","rmoveto","-252","-109","0","-26","rlineto","9","3","8","3","3","2","27","10","25","7","13","0","rrcurveto","25","11","-22","-47","hvcurveto","-385","vlineto","0","-82","-20","-17","-99","-1","rrcurveto","-24","375","24","vlineto","-91","2","-17","15","0","77","rrcurveto","570","vlineto","endchar"], +two: ["0","133","441","114","hstem","300","136","vstem","478","211","rmoveto","-24","hlineto","-29","-72","-12","-6","-102","0","rrcurveto","-151","0","162","154","rlineto","79","75","35","67","0","76","0","107","-77","76","-108","0","-50","0","-48","-20","-35","-36","-39","-39","-20","-34","-28","-75","rrcurveto","28","hlineto","31","62","37","28","54","0","44","0","33","-19","22","-36","12","-21","8","-28","0","-23","0","-44","-19","-55","-32","-49","-51","-78","-35","-42","-146","-156","rrcurveto","-23","416","vlineto","endchar"], +three: ["-14","50","571","81","hstemhm","283","146","-75","114","hintmask","58","523","rmoveto","38","61","33","23","52","0","63","0","39","-40","0","-63","0","-66","-35","-35","-95","-30","rrcurveto","-17","vlineto","83","-29","33","-17","36","-34","rrcurveto","hintmask","31","-29","18","-45","0","-48","0","-72","-38","-46","-60","0","-24","0","-19","11","-33","32","-39","39","-30","16","-30","0","rrcurveto","-38","-27","-24","-34","-54","61","-36","92","167","132","114","146","hvcurveto","0","46","-15","43","-29","35","-21","24","-17","12","-40","18","rrcurveto","hintmask","64","39","19","29","0","59","0","86","-61","51","-102","0","-99","0","-75","-50","-55","-103","rrcurveto","endchar"], +four: ["0","20","124","111","413","20","hstemhm","267","145","-143","143","hintmask","412","255","rmoveto","433","-80","vlineto","-59","-77","rlineto","-128","-167","-66","-92","-60","-95","rrcurveto","-113","248","-144","vlineto","hintmask","145","144","63","111","hlineto","-206","hmoveto","-209","0","209","297","rlineto","endchar"], +five: ["-8","61","496","127","hstem","373","78","vstem","149","549","rmoveto","278","0","43","127","-322","0","-97","-345","rlineto","95","-7","41","-6","44","-14","89","-28","53","-57","0","-66","0","-56","-44","-44","-56","0","-23","0","-27","12","-40","30","-43","32","-30","13","-26","0","rrcurveto","-36","-26","-25","-35","-53","58","-35","89","165","117","100","141","hvcurveto","0","104","-64","80","-109","31","-38","11","-31","4","-82","5","rrcurveto","endchar"], +six: ["-13","27","362","45","247","20","hstem","28","156","139","152","vstem","470","688","rmoveto","-133","-19","-62","-22","-76","-52","-112","-77","-59","-112","0","-133","rrcurveto","-173","92","-113","140","124","91","99","136","121","-73","78","-114","vhcurveto","-32","0","-22","-4","-30","-13","37","147","83","81","146","30","rrcurveto","-237","-286","rmoveto","67","23","-55","-159","-118","-12","-30","-49","hvcurveto","-23","0","-18","10","-10","19","-17","33","-10","73","0","93","0","62","6","57","7","5","8","6","14","4","14","0","rrcurveto","endchar"], +seven: ["0","20","519","137","hstem","477","676","rmoveto","-416","0","-44","-222","25","0","rlineto","19","64","24","21","55","0","rrcurveto","207","0","-200","-539","95","0","rlineto","endchar"], +eight: ["-13","32","640","29","hstemhm","28","121","-110","129","165","139","-133","119","hintmask","178","324","rmoveto","-57","-19","-23","-13","-26","-25","-29","-28","-15","-38","0","-44","rrcurveto","-102","85","-68","127","139","93","80","120","vhcurveto","0","87","-43","62","-107","68","rrcurveto","hintmask","96","34","40","41","0","64","rrcurveto","89","-76","56","-119","-135","-89","-70","-105","vhcurveto","0","-75","40","-55","99","-59","rrcurveto","120","98","rmoveto","-84","43","-46","55","0","56","rrcurveto","47","36","36","47","56","32","-45","-78","vhcurveto","0","-47","-8","-23","-33","-44","rrcurveto","hintmask","-98","-116","rmoveto","109","-73","24","-32","0","-74","rrcurveto","-67","-33","-41","-54","-61","-36","53","89","vhcurveto","0","55","11","31","40","59","rrcurveto","endchar"], +nine: ["-13","20","247","45","362","27","hstem","26","152","139","156","vstem","31","-13","rmoveto","140","20","68","25","78","59","102","77","54","107","0","127","rrcurveto","173","-92","113","-140","-125","-90","-99","-137","-118","73","-80","109","vhcurveto","38","0","26","5","27","14","-43","-152","-81","-79","-144","-29","rrcurveto","278","315","rmoveto","-1","-13","-1","-5","-3","-2","-7","-5","-19","-4","-15","0","rrcurveto","-62","-23","59","157","116","12","30","49","hvcurveto","25","0","17","-11","11","-23","15","-28","10","-70","0","-71","0","-27","-2","-32","-3","-45","rrcurveto","endchar"], +colon: ["-282","-13","169","147","169","hstem","166","156","rmoveto","-107","callsubr","316","vmoveto","-46","-38","-38","-45","-49","36","-37","48","47","38","38","46","47","-38","38","-47","hvcurveto","endchar"], +equal: ["-45","107","88","116","88","hstem","537","399","rmoveto","-504","-88","504","hlineto","-116","vmoveto","-504","-88","504","hlineto","endchar"], +A: ["107","0","25","172","39","223","20","191","20","hstem","689","25","rmoveto","-4","hlineto","-39","0","-16","21","-50","118","rrcurveto","-223","526","-28","0","-222","-548","rlineto","-38","-93","-12","-15","-48","-9","rrcurveto","-25","203","25","vlineto","-59","4","-23","11","0","26","0","13","8","25","21","54","rrcurveto","15","39","225","0","rlineto","34","-78","12","-36","0","-22","0","-21","-13","-10","-35","-2","-5","0","-13","-1","-14","-2","rrcurveto","-25","324","vlineto","-500","236","rmoveto","94","243","101","-243","rlineto","endchar"], +B: ["52","0","25","-25","32","614","30","-25","25","hstemhm","104","160","156","165","-138","172","hintmask","16","676","rmoveto","hintmask","-25","vlineto","69","-4","19","-16","0","-51","rrcurveto","-484","vlineto","0","-51","-15","-12","-73","-8","rrcurveto","hintmask","-25","322","vlineto","169","112","75","113","hvcurveto","0","45","-19","41","-35","31","-35","30","-34","15","-70","15","rrcurveto","hintmask","116","34","43","41","0","76","rrcurveto","103","-93","57","-167","vhcurveto","-61","-333","rmoveto","30","hlineto","hintmask","103","50","-54","-110","-97","-41","-50","-81","-44","-17","17","44","hvcurveto","504","vmoveto","36","15","13","39","vhcurveto","hintmask","70","32","-43","-94","hvcurveto","0","-105","-33","-28","-123","-3","rrcurveto","endchar"], +C: ["107","-19","48","629","33","hstem","49","177","vstem","657","152","rmoveto","-59","-62","-32","-24","-49","-19","-29","-12","-33","-6","-28","0","-66","0","-63","35","-29","52","-29","53","-14","72","0","100","0","207","64","110","120","0","47","0","43","-18","43","-38","rrcurveto","43","-39","23","-33","35","-76","rrcurveto","25","234","-27","hlineto","-14","-35","-11","-11","-19","0","-9","0","-15","4","-22","10","-59","24","-48","11","-48","0","rrcurveto","-199","-149","-154","-204","-206","147","-146","207","hvcurveto","71","0","59","17","55","37","32","22","22","20","45","50","rrcurveto","endchar"], +D: ["107","0","25","-25","35","607","34","-25","25","hstemhm","97","162","257","174","hintmask","97","91","rmoveto","0","-44","-25","-20","-58","-2","rrcurveto","hintmask","-25","316","vlineto","216","144","138","207","206","-140","125","-231","hvcurveto","-305","hlineto","hintmask","-25","vlineto","64","-6","19","-15","0","-46","rrcurveto","hintmask","162","23","rmoveto","23","20","12","39","vhcurveto","72","0","51","-33","34","-69","27","-53","14","-73","0","-84","0","-95","-20","-86","-32","-45","-34","-46","-48","-23","-65","0","rrcurveto","-45","-13","13","45","hvcurveto","endchar"], +E: ["52","0","25","-25","31","611","34","-25","25","hstemhm","104","162","hintmask","593","676","rmoveto","-577","hlineto","hintmask","-25","vlineto","69","-4","19","-16","0","-51","rrcurveto","-484","vlineto","0","-52","-14","-11","-74","-8","rrcurveto","hintmask","-25","585","vlineto","40","208","-28","0","rlineto","-31","-67","-20","-28","-36","-29","-46","-37","-55","-16","-76","0","rrcurveto","-64","-19","13","43","hvcurveto","242","vlineto","109","0","41","-39","12","-116","rrcurveto","26","338","-26","hlineto","-15","-114","-39","-36","-108","1","rrcurveto","232","vlineto","38","13","9","52","vhcurveto","163","0","49","-34","25","-133","rrcurveto","25","hlineto","endchar"], +F: ["-4","0","25","617","34","-25","25","hstemhm","104","162","hintmask","583","676","rmoveto","-567","hlineto","hintmask","-25","vlineto","69","-4","19","-15","0","-52","rrcurveto","-484","vlineto","0","-52","-14","-11","-74","-8","rrcurveto","-25","360","25","vlineto","-92","4","-18","12","0","55","rrcurveto","233","vlineto","102","-2","37","-37","14","-116","rrcurveto","25","338","-25","hlineto","-18","-114","-35","-35","-100","0","rrcurveto","232","vlineto","hintmask","37","13","10","51","vhcurveto","92","0","59","-17","31","-35","22","-25","11","-27","14","-63","rrcurveto","24","hlineto","endchar"], +G: ["163","-19","33","644","33","hstem","37","177","299","156","vstem","755","287","rmoveto","-343","-25","hlineto","86","-5","15","-11","0","-54","rrcurveto","-105","vlineto","-51","-29","-22","-67","vhcurveto","-63","0","-42","19","-33","43","-44","56","-21","86","0","120","0","209","63","111","121","0","46","0","43","-18","44","-38","43","-38","23","-34","35","-76","rrcurveto","25","234","-27","hlineto","-14","-35","-11","-11","-19","0","-9","0","-15","4","-22","10","-59","24","-48","11","-48","0","rrcurveto","-199","-149","-154","-206","-206","146","-144","210","hvcurveto","103","0","109","24","64","38","rrcurveto","127","vlineto","0","72","11","12","75","8","rrcurveto","endchar"], +I: ["-226","0","25","626","25","hstem","113","162","vstem","113","96","rmoveto","0","-50","-18","-14","-75","-7","rrcurveto","-25","350","25","vlineto","-76","5","-19","14","0","52","rrcurveto","484","vlineto","0","52","21","16","74","3","rrcurveto","25","-350","-25","vlineto","72","-5","21","-15","0","-51","rrcurveto","endchar"], +J: ["-96","33","714","25","hstem","3","116","109","162","vstem","390","559","rmoveto","0","72","14","15","75","5","rrcurveto","25","-352","-25","vlineto","81","-3","20","-14","0","-54","rrcurveto","-556","vlineto","-62","-19","-25","-45","-27","-18","13","19","vhcurveto","0","8","3","6","9","12","12","15","3","8","0","15","rrcurveto","40","-34","35","-39","-37","-33","-34","-38","vhcurveto","0","-41","28","-45","39","-23","26","-14","40","-9","40","0","rrcurveto","140","74","76","143","hvcurveto","endchar"], +L: ["52","0","25","-25","31","620","25","hstemhm","105","162","hintmask","638","227","rmoveto","-29","hlineto","-33","-78","-20","-31","-35","-32","-41","-37","-56","-18","-77","0","rrcurveto","-61","-19","13","43","hvcurveto","472","vlineto","0","75","14","13","87","4","rrcurveto","25","-349","-25","vlineto","68","-4","18","-16","0","-51","rrcurveto","-484","vlineto","hintmask","0","-51","-13","-11","-73","-9","rrcurveto","hintmask","-25","578","vlineto","endchar"], +M: ["329","0","25","626","25","hstem","105","42","531","155","vstem","678","609","rmoveto","-509","vlineto","0","-56","-14","-12","-75","-7","rrcurveto","-25","332","25","vlineto","-78","10","-10","11","0","71","rrcurveto","442","vlineto","0","72","15","15","73","5","rrcurveto","25","-252","vlineto","-200","-472","-200","472","-253","0","0","-25","rlineto","73","-6","16","-13","0","-52","rrcurveto","-475","vlineto","0","-60","-13","-12","-78","-8","rrcurveto","-25","234","25","vlineto","-82","6","-19","19","0","74","rrcurveto","0","470","252","-594","27","0","rlineto","endchar"], +N: ["107","-18","20","-2","25","626","25","hstemhm","104","44","431","44","hintmask","230","676","rmoveto","-211","-25","hlineto","20","0","17","-15","48","-57","rrcurveto","-474","vlineto","0","-57","-15","-14","-73","-9","rrcurveto","-25","227","25","vlineto","-77","9","-18","19","0","71","rrcurveto","0","402","rlineto","hintmask","447","-544","28","0","0","589","rlineto","0","57","13","13","65","10","rrcurveto","25","-215","-25","vlineto","74","-7","19","-21","0","-71","rrcurveto","-305","vlineto","endchar"], +O: ["163","-19","33","644","33","hstem","35","177","354","177","vstem","393","691","rmoveto","-209","-149","-148","-208","-207","147","-147","207","207","147","148","207","204","-149","151","-201","hvcurveto","-1","-33","rmoveto","110","64","-121","-208","-205","-62","-110","-115","-115","-62","110","202","216","63","116","117","hvcurveto","endchar"], +P: ["-4","0","25","616","35","-25","25","hstemhm","100","162","166","172","hintmask","262","303","rmoveto","135","1","34","3","44","18","81","31","44","61","0","77","rrcurveto","116","-95","66","-167","vhcurveto","-322","hlineto","hintmask","-25","vlineto","70","-6","14","-16","0","-70","rrcurveto","-442","vlineto","0","-62","-13","-20","-47","-6","-3","0","-10","-2","-11","-2","rrcurveto","-25","334","25","vlineto","-79","10","-9","9","0","73","rrcurveto","hintmask","489","vmoveto","23","17","12","33","83","33","-44","-108","vhcurveto","0","-63","-12","-38","-26","-23","-24","-20","-33","-7","-71","0","rrcurveto","endchar"], +R: ["107","0","25","617","34","-25","25","hstemhm","114","162","183","171","hintmask","715","25","rmoveto","-18","0","-13","6","-10","13","rrcurveto","-201","285","rlineto","59","19","25","13","28","25","29","27","16","40","0","45","rrcurveto","115","-99","63","-183","vhcurveto","-322","hlineto","hintmask","-25","vlineto","74","-5","14","-15","0","-72","rrcurveto","-442","vlineto","0","-73","-10","-10","-78","-9","rrcurveto","-25","338","25","vlineto","-78","10","-10","11","0","71","rrcurveto","196","27","vlineto","207","-313","205","0","rlineto","hintmask","-439","600","rmoveto","0","6","4","15","3","6","6","10","16","5","25","0","92","0","37","-43","0","-105","0","-64","-15","-39","-32","-21","-26","-17","-34","-7","-76","-1","rrcurveto","endchar"], +S: ["-59","-19","33","646","31","-19","20","hstemhm","44","109","241","119","hintmask","484","475","rmoveto","217","-30","vlineto","-7","-26","-8","-8","-17","0","-9","0","-11","3","-22","8","rrcurveto","hintmask","-47","16","-32","6","-38","0","-136","0","-83","-77","0","-126","0","-88","52","-63","119","-57","rrcurveto","67","-32","rlineto","88","-42","24","-26","0","-52","0","-69","-49","-45","-77","0","-58","0","-50","25","-38","49","-27","37","-14","33","-17","70","rrcurveto","-29","-247","29","hlineto","6","25","8","9","16","0","8","0","11","-3","22","-7","50","-17","37","-7","43","0","148","0","100","85","0","126","0","75","-46","75","-64","32","rrcurveto","-147","73","rlineto","-81","40","-22","25","0","48","0","62","42","38","68","0","45","0","42","-19","36","-37","34","-35","16","-29","20","-65","rrcurveto","endchar"], +T: ["52","0","25","631","20","hstem","253","162","vstem","253","117","rmoveto","0","-74","-11","-11","-86","-7","rrcurveto","-25","357","25","vlineto","-87","6","-11","10","0","76","rrcurveto","527","vlineto","123","-4","52","-46","17","-119","rrcurveto","29","0","-2","201","-600","0","-3","-201","29","0","rlineto","17","119","52","46","124","4","rrcurveto","endchar"], +V: ["107","-18","20","654","20","hstem","701","676","rmoveto","-213","-25","hlineto","71","-5","15","-7","0","-32","0","-16","-3","-12","-17","-43","rrcurveto","-127","-329","-138","334","rlineto","-19","45","-4","12","0","15","0","23","15","11","38","2","5","0","13","1","15","1","rrcurveto","25","-336","-25","vlineto","50","-7","9","-8","25","-55","rrcurveto","256","-599","27","0","228","587","rlineto","24","62","14","13","52","7","rrcurveto","endchar"], +W: ["385","-15","20","437","20","194","20","hstem","981","676","rmoveto","-182","-25","hlineto","54","-3","15","-10","0","-31","0","-13","-2","-15","-5","-14","rrcurveto","-112","-343","-108","336","rlineto","-10","32","-4","15","0","9","0","24","15","9","44","3","2","0","5","0","6","1","rrcurveto","25","-312","-25","vlineto","41","-2","20","-9","11","-24","rrcurveto","35","-96","-118","-308","-120","364","rlineto","-5","16","-2","8","0","9","0","29","12","9","52","4","rrcurveto","25","-294","-25","vlineto","42","-6","10","-9","17","-49","rrcurveto","212","-602","28","0","186","477","171","-477","27","0","200","602","rlineto","13","40","23","21","33","3","rrcurveto","endchar"], +a: ["-14","20","435","32","hstemhm","25","146","122","138","-137","137","hintmask","473","64","rmoveto","-10","-10","rlineto","-3","-3","-3","-1","-5","0","rrcurveto","-14","-7","9","16","hvcurveto","261","vlineto","84","-76","53","-122","-113","-76","-51","-75","-41","24","-25","41","40","28","24","34","vhcurveto","0","14","-6","13","-12","16","-9","10","-3","6","0","6","rrcurveto","21","28","16","35","vhcurveto","hintmask","58","26","-27","-61","hvcurveto","-73","vlineto","-119","-36","-49","-20","-37","-25","-43","-30","-21","-34","0","-43","0","-61","47","-45","64","0","57","0","46","20","55","50","11","-51","22","-19","49","0","43","0","31","16","38","41","rrcurveto","hintmask","-195","57","rmoveto","-27","-31","-20","-12","-24","0","-30","0","-21","27","0","40","0","58","42","42","80","21","rrcurveto","endchar"], +b: ["-59","-14","32","401","54","179","24","hstem","72","139","163","147","vstem","211","676","rmoveto","-194","-24","hlineto","46","-9","9","-9","0","-40","rrcurveto","-607","12","vlineto","79","56","rlineto","46","-41","36","-16","50","0","rrcurveto","133","93","104","149","138","-77","96","-111","hvcurveto","-48","0","-37","-17","-37","-39","rrcurveto","-57","vmoveto","17","43","20","16","33","0","rrcurveto","62","31","-67","-131","-138","-30","-65","-64","-42","-27","31","48","hvcurveto","endchar"], +c: ["-171","-14","67","389","31","hstem","25","141","vstem","412","109","rmoveto","-37","-42","-26","-14","-41","0","rrcurveto","-87","-55","87","136","103","32","63","52","hvcurveto","16","0","15","-8","6","-11","5","-9","0","0","0","-42","1","-49","18","-23","37","0","rrcurveto","42","26","24","39","62","-67","48","-88","-136","-100","-106","-144","-138","90","-99","124","hvcurveto","77","0","56","31","58","74","rrcurveto","endchar"], +d: ["-59","-14","56","-22","23","374","56","179","24","hstemhm","25","148","163","139","hintmask","339","-13","rmoveto","46","13","25","5","62","7","rrcurveto","62","8","0","23","rlineto","-46","3","-13","13","0","42","rrcurveto","575","-215","-24","vlineto","67","-5","9","-7","0","-46","rrcurveto","-183","vlineto","-43","46","-30","16","-46","0","rrcurveto","-110","-82","-107","-145","hvcurveto","hintmask","-136","76","-99","105","vhcurveto","53","0","33","16","47","50","rrcurveto","-3","60","rmoveto","0","-6","-10","-17","-12","-13","-20","-23","-21","-11","-22","0","rrcurveto","-53","-25","60","127","129","27","59","58","hvcurveto","33","0","31","-24","14","-38","rrcurveto","endchar"], +e: ["-171","-14","72","187","37","160","31","hstem","402","125","rmoveto","-40","-49","-31","-18","-43","0","-39","0","-29","18","-21","35","-18","32","-8","33","-4","69","rrcurveto","252","hlineto","-6","84","-15","47","-32","38","-33","39","-46","20","-55","0","rrcurveto","-125","-84","-99","-146","-146","82","-96","124","hvcurveto","81","0","49","32","65","93","rrcurveto","-262","171","rmoveto","3","120","18","40","49","0","28","0","19","-16","8","-31","5","-19","2","-28","2","-51","rrcurveto","-15","vlineto","endchar"], +f: ["-282","0","24","393","44","199","31","hstem","71","139","vstem","71","417","rmoveto","-333","vlineto","0","-43","-11","-12","-46","-5","rrcurveto","-24","278","24","vlineto","-70","2","-12","12","0","65","rrcurveto","314","87","44","-87","122","vlineto","55","15","22","35","18","11","-7","-12","vhcurveto","0","-4","-3","-7","-6","-10","-9","-15","-4","-11","0","-10","rrcurveto","-31","26","-24","34","37","25","25","37","59","-59","41","-84","vhcurveto","-67","0","-52","-24","-27","-44","-22","-35","-7","-41","0","-86","rrcurveto","-57","-44","hlineto","endchar"], +g: ["-206","32","122","120","87","27","216","53","-9","31","hstemhm","28","88","-79","137","132","135","-37","79","hintmask","-reserved-","482","398","rmoveto","53","-130","vlineto","hintmask","-reserved-","-44","16","-28","6","-40","0","-119","0","-84","-66","0","-95","0","-34","13","-33","23","-28","22","-24","19","-13","47","-20","-79","-27","-40","-38","0","-52","0","-41","18","-18","64","-23","rrcurveto","hintmask","-11","-63","-9","-33","-25","0","-41","rrcurveto","-58","75","-34","126","166","88","52","99","77","-62","46","-102","vhcurveto","-65","hlineto","-80","-20","7","28","30","28","22","40","hvcurveto","45","-1","rlineto","47","0","25","6","34","20","rrcurveto","hintmask","-reserved-","45","26","23","41","0","53","0","40","-12","30","-28","28","rrcurveto","-77","-450","rmoveto","hintmask","-11","54","26","-17","-33","-46","-55","-26","-99","-88","-46","23","44","hvcurveto","0","21","7","11","28","23","rrcurveto","hintmask","-reserved-","89","494","rmoveto","47","19","-38","-93","-93","-19","-36","-47","-48","-18","36","93","hvcurveto","94","19","37","47","vhcurveto","endchar"], +h: ["-59","0","24","382","67","179","24","hstem","69","139","138","139","vstem","208","676","rmoveto","-192","-24","hlineto","46","-9","7","-8","0","-41","rrcurveto","-510","vlineto","0","-42","-8","-9","-45","-9","rrcurveto","-24","241","24","vlineto","-37","5","-12","15","0","37","rrcurveto","267","vlineto","0","4","7","10","10","9","22","23","24","12","23","0","rrcurveto","35","17","-27","-56","hvcurveto","-242","vlineto","0","-37","-13","-16","-34","-4","rrcurveto","-24","235","24","vlineto","-36","3","-13","15","0","42","rrcurveto","248","vlineto","86","-53","55","-82","vhcurveto","-53","0","-37","-20","-52","-58","rrcurveto","endchar"], +i: ["-337","0","24","413","24","75","155","hstemhm","60","155","-146","139","hintmask","208","461","rmoveto","-192","-24","hlineto","44","-9","9","-9","0","-41","rrcurveto","-294","vlineto","0","-41","-7","-8","-46","-11","rrcurveto","-24","239","24","vlineto","-35","5","-12","14","0","38","rrcurveto","hintmask","-70","610","rmoveto","-43","-35","-35","-42","-45","33","-33","44","44","34","33","44","hvcurveto","43","-34","35","-43","vhcurveto","endchar"], +j: ["-282","-203","31","609","24","75","155","hstemhm","108","155","-142","139","hintmask","260","461","rmoveto","-202","-24","hlineto","51","-4","12","-12","0","-43","rrcurveto","-474","vlineto","-53","-15","-23","-33","-19","-13","7","10","vhcurveto","0","5","3","7","6","10","10","16","4","11","0","10","rrcurveto","30","-27","24","-32","-37","-25","-25","-36","-59","58","-41","82","vhcurveto","69","0","53","27","28","49","19","32","8","38","0","59","rrcurveto","hintmask","-75","689","rmoveto","-43","-34","-35","-42","-45","32","-33","45","43","35","34","43","hvcurveto","43","-35","35","-43","vhcurveto","endchar"], +k: ["-59","0","24","628","24","hstem","70","139","vstem","513","461","rmoveto","-214","-23","hlineto","11","-2","10","-1","3","-1","24","-3","11","-7","0","-13","0","-9","-10","-18","-11","-11","rrcurveto","-128","-128","0","431","-187","0","0","-24","rlineto","34","-3","14","-17","0","-38","rrcurveto","-510","vlineto","0","-39","-15","-18","-33","-3","rrcurveto","-24","239","24","vlineto","-47","7","-5","6","0","47","rrcurveto","0","114","23","24","95","-134","rlineto","17","-25","7","-12","0","-8","0","-12","-14","-6","-28","-1","rrcurveto","-24","234","24","vlineto","-11","0","-5","3","-9","12","rrcurveto","-194","268","rlineto","100","105","26","18","63","8","rrcurveto","endchar"], +l: ["-337","0","24","628","24","hstem","67","139","vstem","206","676","rmoveto","-190","-24","hlineto","35","-3","16","-18","0","-37","rrcurveto","-510","vlineto","0","-37","-17","-20","-34","-3","rrcurveto","-24","239","24","vlineto","-33","1","-16","19","0","40","rrcurveto","endchar"], +m: ["218","0","24","382","67","-36","24","hstemhm","71","139","138","139","138","139","hintmask","207","461","rmoveto","-191","-24","hlineto","44","-6","11","-12","0","-41","rrcurveto","-294","vlineto","0","-41","-11","-11","-44","-8","rrcurveto","-24","240","24","vlineto","-35","5","-11","14","0","38","rrcurveto","267","vlineto","0","6","16","19","13","10","rrcurveto","hintmask","21","16","17","7","17","0","rrcurveto","39","15","-23","-60","hvcurveto","-242","vlineto","0","-41","-11","-13","-37","-3","rrcurveto","-24","234","24","vlineto","-35","4","-12","15","0","38","rrcurveto","267","vlineto","0","5","16","19","13","10","22","17","17","7","17","0","rrcurveto","38","15","-24","-59","hvcurveto","-242","vlineto","0","-42","-11","-12","-38","-3","rrcurveto","-24","238","24","vlineto","-39","2","-11","12","0","43","rrcurveto","251","vlineto","86","-53","55","-82","vhcurveto","-57","0","-38","-23","-52","-64","-30","63","-35","24","-63","0","-63","0","-46","-27","-38","-60","rrcurveto","endchar"], +n: ["-59","0","24","382","67","-36","24","hstemhm","74","139","138","139","hintmask","212","461","rmoveto","-191","-24","hlineto","44","-7","9","-10","0","-42","rrcurveto","-294","vlineto","0","-42","-8","-9","-45","-9","rrcurveto","-24","241","24","vlineto","-37","5","-12","15","0","37","rrcurveto","267","vlineto","0","4","7","10","10","9","rrcurveto","hintmask","22","23","24","12","23","0","rrcurveto","35","17","-27","-56","hvcurveto","-242","vlineto","0","-37","-13","-16","-34","-4","rrcurveto","-24","235","24","vlineto","-39","3","-10","12","0","42","rrcurveto","251","vlineto","86","-53","55","-82","vhcurveto","-60","0","-46","-28","-37","-59","rrcurveto","endchar"], +o: ["-14","31","425","31","hstem","25","147","157","147","vstem","251","473","rmoveto","-127","-99","-105","-136","-143","95","-103","131","129","96","104","139","140","-96","104","-129","hvcurveto","1","-31","rmoveto","58","19","-55","-164","-154","-20","-52","-58","-59","-20","51","146","176","18","52","62","hvcurveto","endchar"], +p: ["-59","-205","21","-21","24","-24","205","-13","55","374","57","-36","24","hstemhm","75","139","162","148","hintmask","212","461","rmoveto","-191","-24","hlineto","44","-7","10","-10","0","-42","rrcurveto","-501","vlineto","0","-41","-8","-9","-48","-8","rrcurveto","hintmask","-24","273","vlineto","hintmask","21","vlineto","-61","3","-17","20","0","67","rrcurveto","141","vlineto","48","-47","26","-13","44","0","rrcurveto","112","80","104","148","139","-75","95","-111","hvcurveto","-59","0","-36","-23","-31","-58","rrcurveto","hintmask","2","-46","rmoveto","0","7","9","16","12","13","20","22","23","12","22","0","rrcurveto","52","24","-60","-131","-124","-28","-59","-57","hvcurveto","-33","0","-29","23","-15","38","rrcurveto","endchar"], +r: ["-171","0","24","413","24","-8","20","hstemhm","83","139","hintmask","218","461","rmoveto","-189","-24","hlineto","43","-6","11","-13","0","-40","rrcurveto","-294","vlineto","0","-41","-10","-11","-44","-8","rrcurveto","-24","266","24","vlineto","-61","4","-12","13","0","62","rrcurveto","189","vlineto","52","28","43","33","vhcurveto","8","0","9","-7","11","-16","19","-27","15","-9","26","0","rrcurveto","37","26","28","39","hvcurveto","hintmask","45","-34","33","-47","vhcurveto","-50","0","-38","-27","-47","-67","rrcurveto","endchar"], +s: ["-226","-14","34","420","33","hstem","27","100","138","96","vstem","340","326","rmoveto","145","-22","vlineto","-6","-15","-6","-5","-12","0","-6","0","-10","2","-16","5","-33","11","-23","4","-22","0","-91","0","-66","-62","0","-84","0","-66","41","-46","101","-43","68","-30","28","-25","0","-32","rrcurveto","-39","-30","-26","-45","vhcurveto","-70","0","-46","45","-21","87","rrcurveto","-28","-165","25","hlineto","11","21","6","7","9","0","5","0","8","-2","10","-4","27","-12","53","-11","28","0","91","0","63","62","0","90","0","71","-39","43","-99","42","-68","29","-28","25","0","34","rrcurveto","33","28","25","38","vhcurveto","26","0","27","-11","22","-21","21","-20","11","-18","15","-44","rrcurveto","endchar"], +t: ["-282","-12","71","358","44","hstem","72","139","vstem","305","461","rmoveto","-94","169","-25","hlineto","-61","-86","-40","-45","-65","-55","rrcurveto","-27","52","-324","vlineto","-65","43","-40","69","vhcurveto","67","0","40","30","41","82","rrcurveto","-25","11","rlineto","-20","-38","-16","-14","-21","0","rrcurveto","-28","-11","17","40","hvcurveto","301","94","vlineto","endchar"], +u: ["-59","-14","65","-31","23","394","24","hstemhm","65","139","138","139","hintmask","343","-13","rmoveto","43","15","24","4","65","7","rrcurveto","62","7","0","23","rlineto","-44","2","-12","13","0","43","rrcurveto","360","-201","-24","vlineto","50","-4","12","-12","0","-43","rrcurveto","-283","vlineto","hintmask","-33","-33","-21","-11","-28","0","rrcurveto","-41","-15","20","51","hvcurveto","339","-188","-24","vlineto","41","-8","8","-9","0","-42","rrcurveto","-252","vlineto","-88","50","-52","83","vhcurveto","52","0","35","16","58","50","rrcurveto","endchar"], +v: ["-14","20","435","20","hstem","485","461","rmoveto","-151","-24","hlineto","43","-2","12","-7","0","-24","0","-11","-4","-16","-7","-18","rrcurveto","-72","-182","-79","203","rlineto","-7","19","-2","6","0","6","0","15","10","7","25","2","2","0","8","1","8","1","rrcurveto","24","-250","-24","vlineto","23","-3","6","-3","6","-9","2","1","47","-100","16","-41","rrcurveto","120","-296","26","0","160","396","rlineto","19","44","8","8","31","3","rrcurveto","endchar"], +w: ["107","-14","20","435","20","hstem","707","461","rmoveto","-135","-24","hlineto","37","-4","11","-8","0","-23","0","-12","-13","-37","-31","-79","-15","-39","-8","-21","-12","-34","-10","41","-3","12","-21","74","-20","69","-7","28","0","9","0","16","10","5","38","3","rrcurveto","24","-234","-24","vlineto","39","-4","1","-1","19","-66","rrcurveto","6","-19","-68","-171","-24","64","rlineto","-9","21","-7","19","-5","14","-28","70","-11","32","0","13","0","16","10","8","28","4","rrcurveto","24","-222","-24","vlineto","26","-5","4","-6","27","-66","rrcurveto","148","-374","24","0","125","310","102","-310","23","0","155","401","rlineto","16","37","8","8","26","5","rrcurveto","endchar"], +x: ["0","20","421","20","hstem","484","24","rmoveto","-16","5","-7","4","-7","11","rrcurveto","-148","228","101","126","rlineto","19","23","20","11","31","5","rrcurveto","24","-168","-24","vlineto","9","-1","8","-1","3","0","23","-1","8","-6","0","-15","0","-15","-10","-17","-28","-32","-6","-6","-15","-19","-15","-21","-6","7","-4","6","-3","4","-32","42","-26","43","0","13","rrcurveto","0","12","14","6","33","1","rrcurveto","24","-250","-24","vlineto","26","-4","6","-5","20","-30","rrcurveto","128","-197","rlineto","-29","-37","-27","-33","-9","-14","-56","-74","-20","-17","-37","-2","rrcurveto","-24","169","24","vlineto","-36","2","-14","7","0","15","0","15","26","42","38","46","2","3","7","8","7","9","rrcurveto","42","-63","rlineto","23","-33","10","-19","0","-12","0","-12","-14","-6","-31","-2","rrcurveto","-24","241","vlineto","endchar"], +y: ["-205","57","589","20","hstem","16","119","vstem","480","461","rmoveto","-151","-24","hlineto","43","-2","12","-7","0","-24","0","-11","-2","-9","-9","-25","rrcurveto","-68","-192","-72","185","rlineto","-20","51","0","0","0","8","0","14","12","9","25","2","rrcurveto","16","1","0","24","-250","0","0","-24","rlineto","23","-3","6","-3","6","-9","3","0","45","-98","17","-42","rrcurveto","120","-295","-18","-53","rlineto","-17","-50","-26","-32","-23","0","-9","0","-8","8","0","9","0","1","0","2","1","3","1","5","1","5","0","4","rrcurveto","29","-24","20","-34","-37","-27","-26","-38","-46","40","-33","57","vhcurveto","34","0","29","12","21","21","21","23","20","40","35","94","rrcurveto","149","397","rlineto","17","42","10","10","31","3","rrcurveto","endchar"], +z: ["-171","0","32","409","20","hstem","420","160","rmoveto","-28","hlineto","-9","-32","-8","-18","-16","-21","-32","-44","-33","-13","-80","0","rrcurveto","-29","0","231","403","0","26","-371","0","-7","-142","26","0","rlineto","25","95","26","16","140","-1","rrcurveto","-234","-404","0","-25","383","0","rlineto","endchar"] +}; diff --git a/test.html b/test.html index 83d48741e..b70351f2f 100644 --- a/test.html +++ b/test.html @@ -7,6 +7,7 @@ + From da69361ae057158ab6d233c2ee0a2437011374a4 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 08:40:28 +0200 Subject: [PATCH 18/72] Add the beginning of a working CFF font encoder --- PDFFont.js | 78 +++++++++++++++++++++++++++++++++++++++---------- PDFFontUtils.js | 6 ++-- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index c001c825e..e3ddd213d 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -202,7 +202,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { "21": "rmoveto", "22": "hmoveto", "30": "vhcurveto", - "31": "hcurveto" + "31": "hvcurveto" }; function decodeCharString(aStream) { @@ -914,7 +914,7 @@ Type1Font.prototype = { //Top Dict Index var topDictIndex = [ - 0x00, 0x01, 0x01, 0x01, 0x29, + 0x00, 0x01, 0x01, 0x01, 0x2A, 248, 27, 0, // version 248, 28, 1, // Notice 248, 29, 2, // FullName @@ -922,9 +922,9 @@ Type1Font.prototype = { 248, 20, 4, // Weigth 82, 251, 98, 250, 105, 249, 72, 5, // FontBBox 248, 136, 15, // charset (offset: 500) - 28, 0, 0, 16, // Encoding (offset: 600) - 248, 236, 17, // CharStrings - 28, 0, 55, 28, 15, 160, 18 // Private (offset: 4000) + 28, 0, 0, 16, // Encoding + 28, 7, 208, 17, // CharStrings (offset: 2000) + 28, 0, 55, 28, 39, 16, 18 // Private (offset: 10000) ]; cff.set(topDictIndex, currentOffset); currentOffset += topDictIndex.length; @@ -956,32 +956,74 @@ Type1Font.prototype = { cff.set(empty, currentOffset); currentOffset += empty.length; - //Declare the letter 'C' + //Declare the letters var charset = [ - 0x00, 0x00, 0x42 + 0x00 ]; + var limit = 30; + for (var glyph in charstrings.map) { + if (!limit--) + break; + var index = CFFStrings.indexOf(glyph); + var bytes = integerToBytes(index, 2); + charset.push(bytes[0]); + charset.push(bytes[1]); + } cff.set(charset, currentOffset); currentOffset += charset.length; // Fill the space between this and the charstrings data by '1' - var empty = new Array(600 - currentOffset); + var empty = new Array(2000 - currentOffset); for (var i = 0; i < empty.length; i++) empty[i] = 0x01; cff.set(empty, currentOffset); currentOffset += empty.length; + var getNumFor = { + "hstem": 1, + "vstem": 3, + "vmoveto": 4, + "rlineto": 5, + "hlineto": 6, + "vlineto": 7, + "rrcurveto": 8, + "endchar": 14, + "rmoveto": 21, + "vhcurveto": 30, + "hvcurveto": 31, + }; + // Encode the glyph and add it to the FUX - var charStringsIndex = [ - 0x00, 0x02, 0x01, 0x01, 0x03, 0x05, - 0x40, 0x0E, - 0xAF, 0x0E - ]; - cff.set(charStringsIndex, currentOffset); + var r = [[0x40, 0xEA]]; + var limit = 30; + for (var glyph in glyphs) { + if (!limit--) + break; + var data = glyphs[glyph].slice(); + var charstring = []; + for (var i = 0; i < data.length; i++) { + var c = data[i]; + if (!IsNum(c)) { + var token = getNumFor[c]; + if (!token) + error(c); + charstring.push(token); + } else { + var bytes = encodeNumber(c); + for (var j = 0; j < bytes.length; j++) + charstring.push(bytes[j]); + } + } + r.push(charstring); + } + + var charStringsIndex = this.createCFFIndexHeader(r, true); + cff.set(charStringsIndex.join(" ").split(" "), currentOffset); currentOffset += charStringsIndex.length; // Fill the space between this and the private dict data by '1' - var empty = new Array(4000 - currentOffset); + var empty = new Array(10000 - currentOffset); for (var i = 0; i < empty.length; i++) empty[i] = 0x01; cff.set(empty, currentOffset); @@ -1018,6 +1060,7 @@ Type1Font.prototype = { var file = new Uint8Array(cff, 0, currentOffset); var parser = new Type2Parser(); + log("parse"); parser.parse(new Stream(file)); var file64 = Base64Encoder.encode(file); @@ -1064,6 +1107,8 @@ Type1Font.prototype = { function integerToBytes(aValue, aBytesCount) { var bytes = []; + for (var i = 0; i < aBytesCount; i++) + bytes[i] = 0x00; do { bytes[--aBytesCount] = (aValue & 0xFF); @@ -1106,4 +1151,5 @@ function encodeNumber(aValue) { } else { error("Value: " + aValue + " is not allowed"); } -} +}; + diff --git a/PDFFontUtils.js b/PDFFontUtils.js index 02c588217..16d1150ab 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -22,9 +22,8 @@ function readCharset(aStream, aCharstrings) { var count = aCharstrings.length - 1; for (var i = 1; i < count + 1; i++) { var sid = aStream.getByte() << 8 | aStream.getByte(); - log(sid); charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]); - log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); + //log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]); } } else if (format == 1) { error("Charset Range are not supported"); @@ -218,7 +217,7 @@ var Type2Parser = function(aFilePath) { var font = new Dict(); // Turn on this flag for additional debugging logs - var debug = true; + var debug = false; function dump(aStr) { if (debug) @@ -227,7 +226,6 @@ var Type2Parser = function(aFilePath) { function parseAsToken(aString, aMap) { var decoded = readFontDictData(aString, aMap); - log(decoded); var stack = []; var count = decoded.length; From 4191485e1f4fcc1c5b6df6c3469df1933c19e95d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 18:38:57 +0200 Subject: [PATCH 19/72] Add a debug writeToFile function and remove aggregations for Type2 fonts --- PDFFont.js | 55 ++++++++++++++----------------------------------- PDFFontUtils.js | 23 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index e3ddd213d..b81b2b2f4 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -729,24 +729,18 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { error(obj + " parsing is not implemented (yet)"); break; - case "vstem3": - operandStack.push("vstem"); - break; - - case "vstem": - //log(obj + " is not converted (yet?)"); - operandStack.push("vstem"); - break; - case "closepath": case "return": break; - case "hlineto": - case "vlineto": - case "rlineto": - case "rrcurveto": - aggregateCommand(obj); + case "vstem3": + case "vstem": + operandStack.push("vstem"); + break; + + case "hstem": + case "hstem3": + operandStack.push("hstem"); break; case "rmoveto": @@ -763,20 +757,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { operandStack.push("rmoveto"); break; - case "hstem": - case "hstem3": - var dy = operandStack.pop(); - var y = operandStack.pop(); - if (operandStack.peek() == "hstem" || - operandStack.peek() == "hstem3") - operandStack.pop(); - - operandStack.push(y - lastPoint); - lastPoint = y + dy; - - operandStack.push(dy); - operandStack.push("hstem"); - break; case "callsubr": var index = operandStack.pop(); @@ -960,10 +940,7 @@ Type1Font.prototype = { var charset = [ 0x00 ]; - var limit = 30; for (var glyph in charstrings.map) { - if (!limit--) - break; var index = CFFStrings.indexOf(glyph); var bytes = integerToBytes(index, 2); charset.push(bytes[0]); @@ -990,16 +967,14 @@ Type1Font.prototype = { "rrcurveto": 8, "endchar": 14, "rmoveto": 21, + "hmoveto": 22, "vhcurveto": 30, "hvcurveto": 31, }; // Encode the glyph and add it to the FUX var r = [[0x40, 0xEA]]; - var limit = 30; for (var glyph in glyphs) { - if (!limit--) - break; var data = glyphs[glyph].slice(); var charstring = []; for (var i = 0; i < data.length; i++) { @@ -1060,16 +1035,18 @@ Type1Font.prototype = { var file = new Uint8Array(cff, 0, currentOffset); var parser = new Type2Parser(); - log("parse"); - parser.parse(new Stream(file)); - var file64 = Base64Encoder.encode(file); - console.log(file64); + + log("==================== debug ===================="); + log("== parse"); + parser.parse(new Stream(file)); var data = []; for (var i = 0; i < currentOffset; i++) data.push(cff[i]); - log(data); + + log("== write to file"); + writeToFile(data, "/tmp/pdf.js.cff"); }, createCFFIndexHeader: function(aObjects, aIsByte) { diff --git a/PDFFontUtils.js b/PDFFontUtils.js index 16d1150ab..e3b6a5a07 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -349,3 +349,26 @@ var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || xhr.responseArrayBuffer || xhr.response; var cff = new Type2Parser("titi.cff"); //cff.parse(new Stream(cffData)); + + +/** + * Write to a file (works only on Firefox in privilege mode"); + */ + function writeToFile(aBytes, aFilePath) { + netscape.security.PrivilegeManager.enablePrivilege("UniversalXPConnect"); + var Cc = Components.classes, + Ci = Components.interfaces; + var file = Cc['@mozilla.org/file/local;1'].createInstance(Ci.nsILocalFile); + file.initWithPath(aFilePath); + + var stream = Cc["@mozilla.org/network/file-output-stream;1"] + .createInstance(Ci.nsIFileOutputStream); + stream.init(file, 0x04 | 0x08 | 0x20, 0600, 0); + + var bos = Cc["@mozilla.org/binaryoutputstream;1"] + .createInstance(Ci.nsIBinaryOutputStream); + bos.setOutputStream(stream); + bos.writeByteArray(aBytes, aBytes.length); + stream.close(); + }; + From ffa52f9dbd157802f583698cc8db1d48e09f2662 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 18:46:26 +0200 Subject: [PATCH 20/72] Remove the aggregate commands and do some methods dance --- PDFFont.js | 263 +++++++++++++++++------------------------------------ 1 file changed, 85 insertions(+), 178 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index b81b2b2f4..d24681c77 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -604,94 +604,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { return false; } - function aggregateCommand(aCommand) { - var command = aCommand; - switch (command) { - case "hstem": - case "vstem": - break; - - case "rrcurveto": - var stack = [operandStack.pop(), operandStack.pop(), - operandStack.pop(), operandStack.pop(), - operandStack.pop(), operandStack.pop()]; - var next = true; - while (next) { - var op = operandStack.peek(); - if (op == "rrcurveto") { - operandStack.pop(); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - } else { - next = false; - } - } - break; - - case "hlineto": - case "vlineto": - var last = command; - var stack = [operandStack.pop()]; - var next = true; - while (next) { - var op = operandStack.peek(); - if (op == "vlineto" && last == "hlineto") { - operandStack.pop(); - stack.push(operandStack.pop()); - } else if (op == "hlineto" && last == "vlineto") { - operandStack.pop(); - stack.push(operandStack.pop()); - } else if (op == "rlineto" && command == "hlineto") { - operandStack.pop(); - var x = stack.pop(); - operandStack.push(0); - operandStack.push(x); - command = "rlineto"; - } else if (op == "rlineto" && command == "vlineto") { - operandStack.pop(); - operandStack.push(0); - command = "rlineto"; - } else { - next = false; - } - last = op; - } - break; - - case "rlineto": - var stack = [operandStack.pop(), operandStack.pop()]; - var next = true; - while (next) { - var op = operandStack.peek(); - if (op == "rlineto") { - operandStack.pop(); - stack.push(operandStack.pop()); - stack.push(operandStack.pop()); - } else if (op == "hlineto") { - operandStack.pop(); - stack.push(0); - stack.push(operandStack.pop()); - } else if (op == "vlineto") { - operandStack.pop(); - stack.push(operandStack.pop()); - stack.push(0); - } else { - next= false; - } - } - break; - } - - while (stack.length) - operandStack.push(stack.pop()); - operandStack.push(command); - }; - - /* * Flatten the commands by interpreting the postscript code and replacing * every 'callsubr', 'callothersubr' by the real commands. @@ -852,6 +764,87 @@ Type1Font.prototype = { }; }, + + createCFFIndexHeader: function(aObjects, aIsByte) { + var data = []; + + // First 2 bytes contains the number of objects contained into this index + var count = aObjects.length; + var bytes = this.integerToBytes(count, 2); + for (var i = 0; i < bytes.length; i++) + data.push(bytes[i]); + + // Next byte contains the offset size use to reference object in the file + // Actually we're using 0x04 to be sure to be able to store everything + // without thinking of it while coding. + data.push(0x04); + + // Add another offset after this one because we need a new offset + var relativeOffset = 1; + for (var i = 0; i < count + 1; i++) { + var bytes = this.integerToBytes(relativeOffset, 4); + for (var j = 0; j < bytes.length; j++) + data.push(bytes[j]); + + if (aObjects[i]) + relativeOffset += aObjects[i].length; + } + + for (var i =0; i < count; i++) { + for (var j = 0; j < aObjects[i].length; j++) + data.push(aIsByte ? aObjects[i][j] : aObjects[i][j].charCodeAt(0)); + } + return data; + }, + + integerToBytes: function(aValue, aBytesCount) { + var bytes = []; + for (var i = 0; i < aBytesCount; i++) + bytes[i] = 0x00; + + do { + bytes[--aBytesCount] = (aValue & 0xFF); + aValue = aValue >> 8; + } while (aBytesCount && aValue > 0); + + return bytes; + }, + + encodeNumber: function(aValue) { + var x = 0; + if (aValue >= -107 && aValue <= 107) { + return [aValue + 139]; + } else if (aValue >= 108 && aValue <= 1131) { + x = aValue - 108; + return [ + this.integerToBytes(x / 256 + 247, 1), + x % 256 + ]; + } else if (aValue >= -1131 && aValue <= -108) { + x = Math.abs(aValue) - 108; + return [ + this.integerToBytes(x / 256 + 251, 1), + x % 256 + ]; + } else if (aValue >= -32768 && aValue <= 32767) { + return [ + 28, + integerToBytes(aValue >> 8, 1), + integerToBytes(aValue, 1) + ]; + } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { + return [ + 0xFF, + integerToBytes(aValue >> 24, 1), + integerToBytes(aValue >> 16, 1), + integerToBytes(aValue >> 8, 1), + integerToBytes(aValue, 1) + ]; + } else { + error("Value: " + aValue + " is not allowed"); + } + }, + convertToOTF: function(aFontName) { var font = Fonts.get(aFontName); @@ -871,11 +864,6 @@ Type1Font.prototype = { for (var glyph in charstrings.map) { var charstring = charstrings.get(glyph); glyphs[glyph] = parser.flattenCharstring(charstring, defaultWidth, nominalWidth, subrs); - - //log("=================================== " + glyph + " =============================="); - //log(charstrings.get(glyph)); - //log(flattenedCharstring); - //log(validationData[glyph]); } // Create a CFF font data @@ -942,7 +930,7 @@ Type1Font.prototype = { ]; for (var glyph in charstrings.map) { var index = CFFStrings.indexOf(glyph); - var bytes = integerToBytes(index, 2); + var bytes = this.integerToBytes(index, 2); charset.push(bytes[0]); charset.push(bytes[1]); } @@ -985,7 +973,7 @@ Type1Font.prototype = { error(c); charstring.push(token); } else { - var bytes = encodeNumber(c); + var bytes = this.encodeNumber(c); for (var j = 0; j < bytes.length; j++) charstring.push(bytes[j]); } @@ -1033,12 +1021,11 @@ Type1Font.prototype = { cff.set(shit, currentOffset); currentOffset += shit.length; - var file = new Uint8Array(cff, 0, currentOffset); - var parser = new Type2Parser(); - log("==================== debug ===================="); log("== parse"); + var file = new Uint8Array(cff, 0, currentOffset); + var parser = new Type2Parser(); parser.parse(new Stream(file)); var data = []; @@ -1047,86 +1034,6 @@ Type1Font.prototype = { log("== write to file"); writeToFile(data, "/tmp/pdf.js.cff"); - }, - - createCFFIndexHeader: function(aObjects, aIsByte) { - var data = []; - - // First 2 bytes contains the number of objects contained into this index - var count = aObjects.length; - var bytes = integerToBytes(count, 2); - for (var i = 0; i < bytes.length; i++) - data.push(bytes[i]); - - // Next byte contains the offset size use to reference object in the file - // Actually we're using 0x04 to be sure to be able to store everything - // without thinking of it while coding. - data.push(0x04); - - // Add another offset after this one because we need a new offset - var relativeOffset = 1; - for (var i = 0; i < count + 1; i++) { - var bytes = integerToBytes(relativeOffset, 4); - for (var j = 0; j < bytes.length; j++) - data.push(bytes[j]); - - if (aObjects[i]) - relativeOffset += aObjects[i].length; - } - - for (var i =0; i < count; i++) { - for (var j = 0; j < aObjects[i].length; j++) - data.push(aIsByte ? aObjects[i][j] : aObjects[i][j].charCodeAt(0)); - } - return data; - } -}; - -function integerToBytes(aValue, aBytesCount) { - var bytes = []; - for (var i = 0; i < aBytesCount; i++) - bytes[i] = 0x00; - - do { - bytes[--aBytesCount] = (aValue & 0xFF); - aValue = aValue >> 8; - } while (aBytesCount && aValue > 0); - - return bytes; -}; - -function encodeNumber(aValue) { - var x = 0; - if (aValue >= -107 && aValue <= 107) { - return [aValue + 139]; - } else if (aValue >= 108 && aValue <= 1131) { - x = aValue - 108; - return [ - integerToBytes(x / 256 + 247, 1), - x % 256 - ]; - } else if (aValue >= -1131 && aValue <= -108) { - x = Math.abs(aValue) - 108; - return [ - integerToBytes(x / 256 + 251, 1), - x % 256 - ]; - } else if (aValue >= -32768 && aValue <= 32767) { - return [ - 28, - integerToBytes(aValue >> 8, 1), - integerToBytes(aValue, 1) - ]; - } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { - return [ - 0xFF, - integerToBytes(aValue >> 24, 1), - integerToBytes(aValue >> 16, 1), - integerToBytes(aValue >> 8, 1), - integerToBytes(aValue, 1) - ]; - } else { - error("Value: " + aValue + " is not allowed"); } }; From d1d66211644eee43292a874ec076b6bfb0169a6a Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 19:27:34 +0200 Subject: [PATCH 21/72] Remove some useless bits of code --- PDFFont.js | 90 +++++++++++++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index d24681c77..1344b0456 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -611,7 +611,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format', * chapter 8. */ - this.flattenCharstring = function(aCharstring, aDefaultWidth, aNominalWidth, aSubrs) { + this.flattenCharstring = function(aCharstring, aDefaultWidth, aSubrs) { operandStack.clear(); executionStack.clear(); executionStack.push(aCharstring); @@ -631,7 +631,21 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { leftSidebearing = operandStack.pop(); if (charWidthVector != aDefaultWidth) - operandStack.push(charWidthVector - aNominalWidth); + operandStack.push(charWidthVector - aDefaultWidth); + break; + + case "rmoveto": + var dy = operandStack.pop(); + var dx = operandStack.pop(); + + if (leftSidebearing) { + dx += leftSidebearing; + leftSidebearing = 0; + } + + operandStack.push(dx); + operandStack.push(dy); + operandStack.push("rmoveto"); break; case "setcurrentpoint": @@ -655,21 +669,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { operandStack.push("hstem"); break; - case "rmoveto": - var dy = operandStack.pop(); - var dx = operandStack.pop(); - - if (leftSidebearing) { - dx += leftSidebearing; - leftSidebearing = 0; - } - - operandStack.push(dx); - operandStack.push(dy); - operandStack.push("rmoveto"); - break; - - case "callsubr": var index = operandStack.pop(); executionStack.push(aSubrs[index].slice()); @@ -723,14 +722,16 @@ var Type1Font = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); - this.convertToOTF(fontName); + var font = Fonts.get(fontName); + this.convertToOTF(this.convertToCFF(font), font); + var end = Date.now(); log("Time to parse font is:" + (end - start)); } }; Type1Font.prototype = { - getDefaultWidths: function(aCharstrings) { + getDefaultWidth: function(aCharstrings) { var defaultWidth = 0; var defaultUsedCount = 0; @@ -746,25 +747,9 @@ Type1Font.prototype = { widths[width] = usedCount; } - defaultWidth = parseInt(defaultWidth); - - var maxNegDistance = 0, maxPosDistance = 0; - for (var width in widths) { - var diff = width - defaultWidth; - if (diff < 0 && diff < maxNegDistance) { - maxNegDistance = diff; - } else if (diff > 0 && diff > maxPosDistance) { - maxPosDistance = diff; - } - } - - return { - default: defaultWidth, - nominal: defaultWidth + (maxPosDistance + maxNegDistance) / 2 - }; + return parseInt(defaultWidth); }, - createCFFIndexHeader: function(aObjects, aIsByte) { var data = []; @@ -845,27 +830,27 @@ Type1Font.prototype = { } }, - convertToOTF: function(aFontName) { - var font = Fonts.get(aFontName); - - var charstrings = font.get("CharStrings") - var defaultWidths = this.getDefaultWidths(charstrings); - var defaultWidth = defaultWidths.default; - var nominalWidth = defaultWidths.nominal; + convertToCFF: function(aFont) { + var charstrings = aFont.get("CharStrings") + var defaultWidth = this.getDefaultWidth(charstrings); log("defaultWidth to used: " + defaultWidth); - log("nominalWidth to used: " + nominalWidth); - log("Hack nonimal:" + (nominalWidth = 615)); + var charstringsCount = 0; + var charstringsDataLength = 0; var glyphs = {}; - var subrs = font.get("Private").get("Subrs"); + var subrs = aFont.get("Private").get("Subrs"); var parser = new Type1Parser(); for (var glyph in charstrings.map) { var charstring = charstrings.get(glyph); - glyphs[glyph] = parser.flattenCharstring(charstring, defaultWidth, nominalWidth, subrs); + glyphs[glyph] = parser.flattenCharstring(charstring, defaultWidth, subrs); + charstringsCount++; + charstringsDataLength += glyphs[glyph].length; } + log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); + // Create a CFF font data var cff = new Uint8Array(20000); var currentOffset = 0; @@ -876,7 +861,7 @@ Type1Font.prototype = { cff.set(header); // Names Index - var nameIndex = this.createCFFIndexHeader([aFontName]); + var nameIndex = this.createCFFIndexHeader([aFont.get("FontName")]); cff.set(nameIndex, currentOffset); currentOffset += nameIndex.length; @@ -995,7 +980,7 @@ Type1Font.prototype = { // Private Data var privateData = [ 248, 136, 20, - 248, 251, 21, + 248, 136, 21, 119, 159, 248, 97, 159, 247, 87, 159, 6, 30, 10, 3, 150, 37, 255, 12, 9, 139, 12, 10, @@ -1023,10 +1008,12 @@ Type1Font.prototype = { log("==================== debug ===================="); + /* log("== parse"); var file = new Uint8Array(cff, 0, currentOffset); var parser = new Type2Parser(); parser.parse(new Stream(file)); + */ var data = []; for (var i = 0; i < currentOffset; i++) @@ -1034,6 +1021,11 @@ Type1Font.prototype = { log("== write to file"); writeToFile(data, "/tmp/pdf.js.cff"); + + return data; + }, + + convertToOTF: function(aData, aFont) { } }; From a73ffc2d30ba513dcd1f02452f368f982db89edc Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 10 Jun 2011 20:45:42 +0200 Subject: [PATCH 22/72] Do not add empty spaces anymore --- PDFFont.js | 180 +++++++++++++++++++++++------------------------- PDFFontUtils.js | 5 +- 2 files changed, 91 insertions(+), 94 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 1344b0456..b616075e4 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -755,6 +755,9 @@ Type1Font.prototype = { // First 2 bytes contains the number of objects contained into this index var count = aObjects.length; + if (count ==0) + return [0x00, 0x00, 0x00]; + var bytes = this.integerToBytes(count, 2); for (var i = 0; i < bytes.length; i++) data.push(bytes[i]); @@ -777,7 +780,7 @@ Type1Font.prototype = { for (var i =0; i < count; i++) { for (var j = 0; j < aObjects[i].length; j++) - data.push(aIsByte ? aObjects[i][j] : aObjects[i][j].charCodeAt(0)); + data.push(aIsByte ? aObjects[i][j] : aObjects[i].charCodeAt(j)); } return data; }, @@ -797,33 +800,20 @@ Type1Font.prototype = { encodeNumber: function(aValue) { var x = 0; - if (aValue >= -107 && aValue <= 107) { - return [aValue + 139]; - } else if (aValue >= 108 && aValue <= 1131) { - x = aValue - 108; - return [ - this.integerToBytes(x / 256 + 247, 1), - x % 256 - ]; - } else if (aValue >= -1131 && aValue <= -108) { - x = Math.abs(aValue) - 108; - return [ - this.integerToBytes(x / 256 + 251, 1), - x % 256 - ]; - } else if (aValue >= -32768 && aValue <= 32767) { + // XXX we don't really care about Type2 optimization here... + if (aValue >= -32768 && aValue <= 32767) { return [ 28, - integerToBytes(aValue >> 8, 1), - integerToBytes(aValue, 1) + this.integerToBytes(aValue >> 8, 1), + this.integerToBytes(aValue, 1) ]; } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { return [ 0xFF, - integerToBytes(aValue >> 24, 1), - integerToBytes(aValue >> 16, 1), - integerToBytes(aValue >> 8, 1), - integerToBytes(aValue, 1) + this.integerToBytes(aValue >> 24, 1), + this.integerToBytes(aValue >> 16, 1), + this.integerToBytes(aValue >> 8, 1), + this.integerToBytes(aValue, 1) ]; } else { error("Value: " + aValue + " is not allowed"); @@ -834,11 +824,8 @@ Type1Font.prototype = { var charstrings = aFont.get("CharStrings") var defaultWidth = this.getDefaultWidth(charstrings); - log("defaultWidth to used: " + defaultWidth); - var charstringsCount = 0; var charstringsDataLength = 0; - var glyphs = {}; var subrs = aFont.get("Private").get("Subrs"); var parser = new Type1Parser(); @@ -848,7 +835,6 @@ Type1Font.prototype = { charstringsCount++; charstringsDataLength += glyphs[glyph].length; } - log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); // Create a CFF font data @@ -865,71 +851,33 @@ Type1Font.prototype = { cff.set(nameIndex, currentOffset); currentOffset += nameIndex.length; - //Top Dict Index - var topDictIndex = [ - 0x00, 0x01, 0x01, 0x01, 0x2A, - 248, 27, 0, // version - 248, 28, 1, // Notice - 248, 29, 2, // FullName - 248, 30, 3, // FamilyName - 248, 20, 4, // Weigth - 82, 251, 98, 250, 105, 249, 72, 5, // FontBBox - 248, 136, 15, // charset (offset: 500) - 28, 0, 0, 16, // Encoding - 28, 7, 208, 17, // CharStrings (offset: 2000) - 28, 0, 55, 28, 39, 16, 18 // Private (offset: 10000) - ]; - cff.set(topDictIndex, currentOffset); - currentOffset += topDictIndex.length; + // Calculate strings before writing the TopDICT index in order + // to calculate correct relative offsets for storing 'charset' + // and 'charstrings' data + var fontInfo = aFont.get("FontInfo"); + var version = fontInfo.get("version"); + var notice = fontInfo.get("Notice"); + var fullName = fontInfo.get("FullName"); + var familyName = fontInfo.get("FamilyName"); + var weight = fontInfo.get("Weight"); + var strings = [version, notice, fullName, + familyName, weight]; + var stringsIndex = this.createCFFIndexHeader(strings); + var stringsDataLength = stringsIndex.length; - // Strings Index - var stringsIndex = [ - 0x00, 0x04, 0x01, - 0x01, 0x05, 0x06, 0x07, 0x08, - 0x31, 0x2E, 0x030, 0x35, // 1.05 - 0x2B, // + - 0x28, // ( - 0x29 // ) - ]; - cff.set(stringsIndex, currentOffset); - currentOffset += stringsIndex.length; + // Create the global subroutines index + var globalSubrsIndex = this.createCFFIndexHeader([]); - - // Global Subrs Index - var globalSubrsIndex = [ - 0x00, 0x00, 0x00 - ]; - cff.set(globalSubrsIndex, currentOffset); - currentOffset += globalSubrsIndex.length; - - // Fill the space between this and the charset by '1' - var empty = new Array(500 - currentOffset); - for (var i = 0; i < empty.length; i++) - empty[i] = 0x01; - cff.set(empty, currentOffset); - currentOffset += empty.length; - - //Declare the letters - var charset = [ - 0x00 - ]; - for (var glyph in charstrings.map) { + // Fill the charset header (first byte is the encoding) + var charset = [0x00]; + for (var glyph in glyphs) { var index = CFFStrings.indexOf(glyph); var bytes = this.integerToBytes(index, 2); charset.push(bytes[0]); charset.push(bytes[1]); } - cff.set(charset, currentOffset); - currentOffset += charset.length; - - // Fill the space between this and the charstrings data by '1' - var empty = new Array(2000 - currentOffset); - for (var i = 0; i < empty.length; i++) - empty[i] = 0x01; - cff.set(empty, currentOffset); - currentOffset += empty.length; - + // Convert charstrings var getNumFor = { "hstem": 1, "vstem": 3, @@ -966,16 +914,64 @@ Type1Font.prototype = { r.push(charstring); } - var charStringsIndex = this.createCFFIndexHeader(r, true); - cff.set(charStringsIndex.join(" ").split(" "), currentOffset); - currentOffset += charStringsIndex.length; + var charstringsIndex = this.createCFFIndexHeader(r, true); + charstringsIndex = charstringsIndex.join(" ").split(" "); // XXX why? - // Fill the space between this and the private dict data by '1' - var empty = new Array(10000 - currentOffset); - for (var i = 0; i < empty.length; i++) - empty[i] = 0x01; - cff.set(empty, currentOffset); - currentOffset += empty.length; + + var fontBBox = aFont.get("FontBBox"); + + //Top Dict Index + var topDictIndex = [ + 0x00, 0x01, 0x01, 0x01, 0x30, + 248, 27, 0, // version + 248, 28, 1, // Notice + 248, 29, 2, // FullName + 248, 30, 3, // FamilyName + 248, 31, 4, // Weight + ]; + + for (var i = 0; i < fontBBox.length; i++) + topDictIndex = topDictIndex.concat(this.encodeNumber(fontBBox[i])); + topDictIndex.push(5) // FontBBox; + + var charsetOffset = currentOffset + + (topDictIndex.length + (4 + 4 + 4 + 7)) + + stringsIndex.length + + globalSubrsIndex.length; + topDictIndex = topDictIndex.concat(this.encodeNumber(charsetOffset)); + topDictIndex.push(15); // charset + + topDictIndex = topDictIndex.concat([28, 0, 0, 16]) // Encoding + + var charstringsOffset = charsetOffset + (charstringsCount * 2) + 1; + topDictIndex = topDictIndex.concat(this.encodeNumber(charstringsOffset)); + topDictIndex.push(17); // charstrings + + topDictIndex = topDictIndex.concat([28, 0, 55]) + var privateOffset = charstringsOffset + charstringsIndex.length; + topDictIndex = topDictIndex.concat(this.encodeNumber(privateOffset)); + topDictIndex.push(18); // Private + topDictIndex = topDictIndex.join(" ").split(" "); + + // Top Dict Index + cff.set(topDictIndex, currentOffset); + currentOffset += topDictIndex.length; + + // Strings Index + cff.set(stringsIndex, currentOffset); + currentOffset += stringsIndex.length; + + // Global Subrs Index + cff.set(globalSubrsIndex, currentOffset); + currentOffset += globalSubrsIndex.length; + + // Charset Index + cff.set(charset, currentOffset); + currentOffset += charset.length; + + // Fill charstrings data + cff.set(charstringsIndex, currentOffset); + currentOffset += charstringsIndex.length; // Private Data var privateData = [ diff --git a/PDFFontUtils.js b/PDFFontUtils.js index e3b6a5a07..36dc2b421 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -277,16 +277,17 @@ var Type2Parser = function(aFilePath) { // Read the NAME Index dump("Reading Index: Names"); font.set("Names", readFontIndexData(aStream)); + log("Names: " + font.get("Names")); // Read the Top Dict Index dump("Reading Index: TopDict"); var topDict = readFontIndexData(aStream, true); - log(topDict); + log("TopDict: " + topDict); // Read the String Index dump("Reading Index: Strings"); var strings = readFontIndexData(aStream); - log(strings); + log("strings: " + strings); // Fill up the Strings dictionary with the new unique strings for (var i = 0; i < strings.length; i++) From 865b39336765007d82bedf967dddf195aa8ef106 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sat, 11 Jun 2011 03:25:58 +0200 Subject: [PATCH 23/72] Add a basic non-working OTF generator --- PDFFont.js | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 1 deletion(-) diff --git a/PDFFont.js b/PDFFont.js index b616075e4..3dc4d4957 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -798,6 +798,13 @@ Type1Font.prototype = { return bytes; }, + bytesToInteger: function(aBytesArray) { + var value = 0; + for (var i = 0; i < aBytesArray.length; i++) + value = (value << 8) + aBytesArray[i]; + return value; + }, + encodeNumber: function(aValue) { var x = 0; // XXX we don't really care about Type2 optimization here... @@ -835,7 +842,7 @@ Type1Font.prototype = { charstringsCount++; charstringsDataLength += glyphs[glyph].length; } - log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); + //log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); // Create a CFF font data var cff = new Uint8Array(20000); @@ -1021,7 +1028,242 @@ Type1Font.prototype = { return data; }, + + createOpenTypeHeader: function(aNumTables) { + // sfnt version (4 bytes) + var version = [0x4F, 0x54, 0x54, 0X4F]; + + // numTables (2 bytes) + var numTables = aNumTables; + + // searchRange (2bytes) + // XXX oh man this is dirty, there's probably something obvious to do to + // quickly get the maximum power of 2 value... + var maxPower = 0; + var value = numTables; + while (value >= 2) { + value /= 2; + maxPower++; + } + + value = 2; + for (var i = 1; i < maxPower; i++) + value *= 2; + var searchRange = value * 16; + + // entrySelector (2 bytes) + var entrySelector = Math.log(value) / Math.log(2); + + // rangeShift (2 bytes) + var rangeShift = numTables * 16 - searchRange; + + return [].concat(version, + this.integerToBytes(numTables, 2), + this.integerToBytes(searchRange, 2), + this.integerToBytes(entrySelector, 2), + this.integerToBytes(rangeShift, 2)); + }, + + createTableEntry: function(aTag, aOffset, aData) { + // tag + var tag = [ + aTag.charCodeAt(0), + aTag.charCodeAt(1), + aTag.charCodeAt(2), + aTag.charCodeAt(3) + ]; + + // offset + var offset = aOffset; + + // length + var length = aData.length; + + // checksum + var checksum = this.bytesToInteger(tag) + offset + length; + + return [].concat(tag, + this.integerToBytes(checksum, 4), + this.integerToBytes(offset, 4), + this.integerToBytes(length, 4)); + }, + convertToOTF: function(aData, aFont) { + var otf = new Uint8Array(20000); + var currentOffset = 0; + + var header = this.createOpenTypeHeader(9); + otf.set(header, currentOffset); + currentOffset += header.length; + + var tablesLength = 9 * 16; + var virtualOffset = tablesLength + currentOffset; + var tableEntry = this.createTableEntry("CFF ", tablesLength + currentOffset, aData); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + + virtualOffset += aData.length; + + var OS2 = [ + 0x00, 0x03, // version + 0x02, 0x24, // xAvgCharWidth + 0x01, 0xF4, // usWeightClass + 0x00, 0x05, // usWidthClass + 0x00, 0x00, // fstype + 0x02, 0x8A, // ySubscriptXSize + 0x02, 0xBB, // ySubscriptYSize + 0x00, 0x00, // ySubscriptXOffset + 0x00, 0x8C, // ySubscriptYOffset + 0x02, 0x8A, // ySuperScriptXSize + 0x02, 0xBB, // ySuperScriptYSize + 0x00, 0x00, // ySuperScriptXOffset + 0x01, 0xDF, // ySuperScriptYOffset + 0x00, 0x31, // yStrikeOutSize + 0x01, 0x02, // yStrikeOutPosition + 0x00, 0x00, // sFamilyClass + 0x02, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Panose + 0x00, 0x00, 0x00, 0x01, // ulUnicodeRange1 (Bits 0-31) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange2 (Bits 32-63) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange3 (Bits 64-95) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange4 (Bits 96-127) + 0x47, 0x49, 0x60, 0x20, // achVendID + 0x00, 0x20, // fsSelection + 0x00, 0x2D, // usFirstCharIndex + 0x00, 0x7A, // usLastCharIndex + 0x03, // sTypoAscender + 0x20, // sTypeDescender + 0x00, 0x38, // sTypoLineGap + 0x00, 0x5A, // usWinAscent + 0x02, 0xB4, // usWinDescent + 0x00, 0xCE, 0x00, 0x00, // ulCodePageRange1 (Bits 0-31) + 0x00, 0x01, 0x00, 0x00, // ulCodePageRange2 (Bits 32-63) + 0x00, // sxHeight + 0x00, // sCapHeight + 0x01, // usDefaultChar + 0xCD, // usBreakChar + 0x02 // usMaxContext + ]; + + var tableEntry = this.createTableEntry("OS/2", virtualOffset, OS2); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += OS2.length; + + var cmap = [ + 0x00, 0x00, // version + 0x00, 0x00 // numTables + ]; + + var tableEntry = this.createTableEntry("cmap", virtualOffset, cmap); + //otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += cmap.length; + + + var name = [ + 0x00, 0x00, // format + 0x00, 0x00, // Number of names Record + 0x00, 0x00 // Storage + ]; + + var tableEntry = this.createTableEntry("name", virtualOffset, name); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += name.length; + + + var hmtx = [ + 0x01, 0xF4, 0x00, + 0x00 + ]; + var tableEntry = this.createTableEntry("hmtx", virtualOffset, hmtx); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += hmtx.length; + + var maxp = [ + 0x00, 0x00, 0x50, 0x00, // Version number + 0x00, 0x01 // Nums of glyphs + ]; + var tableEntry = this.createTableEntry("maxp", virtualOffset, maxp); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += maxp.length; + + var head = [ + 0x00, 0x01, 0x00, 0x00, // Version number + 0x00, 0x00, 0x50, 0x00, // fontRevision + 0x00, 0x00, 0x00, 0x00, // checksumAdjustement + 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber + 0x00, 0x00, // Flags + 0x00, 0x00, // unitsPerEM + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified + 0x00, 0x00, // xMin + 0x00, 0x00, // yMin + 0x00, 0x00, // xMax + 0x00, 0x00, // yMax + 0x00, 0x00, // macStyle + 0x00, 0x00, // lowestRecPPEM + 0x00, 0x00, // fontDirectionHint + 0x00, 0x00, // indexToLocFormat + 0x00, 0x00 // glyphDataFormat + ]; + var tableEntry = this.createTableEntry("head", virtualOffset, head); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += head.length; + + + var hhea = [ + 0x00, 0x01, 0x00, 0x00, // Version number + 0x00, 0x00, // Typographic Ascent + 0x00, 0x00, // Typographic Descent + 0x00, 0x00, // Line Gap + 0x01, 0xF4, // advanceWidthMax + 0x00, 0x00, // minLeftSidebearing + 0x00, 0x00, // minRightSidebearing + 0x00, 0x00, // xMaxExtent + 0x00, 0x00, // caretSlopeRise + 0x00, 0x00, // caretOffset + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // metricDataFormat + 0x00, 0x01 // numberOfHMetrics + ]; + var tableEntry = this.createTableEntry("hhea", virtualOffset, hhea); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += hhea.length; + + + var post = [ + 0x00, 0x03, 0x00, 0x00, // Version number + 0x00, 0x00, 0x01, 0x00, // italicAngle + 0x00, 0x00, // underlinePosition + 0x00, 0x00, // underlineThickness + 0x00, 0x00, 0x00, 0x01, // isFixedPitch + 0x00, 0x00, 0x00, 0x00, // minMemType42 + 0x00, 0x00, 0x00, 0x00, // maxMemType42 + 0x00, 0x00, 0x00, 0x00, // minMemType1 + 0x00, 0x00, 0x00, 0x00 // maxMemType1 + ]; + var tableEntry = this.createTableEntry("post", virtualOffset, post); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += post.length; + + // Set the CFF data + otf.set(aData, currentOffset); + currentOffset += aData.length; + + var data = []; + for (var i = 0; i < currentOffset; i++) + data.push(otf[i]); + + writeToFile(data, "/tmp/pdf.js.otf"); } }; From 1c30621906d6cef79407215a1ad79d1374b06992 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sat, 11 Jun 2011 22:08:30 +0200 Subject: [PATCH 24/72] OTF does not complain anymore about a duplicate .notdef --- PDFFont.js | 183 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 123 insertions(+), 60 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 3dc4d4957..34011a9cf 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -736,8 +736,8 @@ Type1Font.prototype = { var defaultUsedCount = 0; var widths = {}; - for (var glyph in aCharstrings.map) { - var width = aCharstrings.get(glyph)[1]; + for (var i = 0; i < aCharstrings.length; i++) { + var width = aCharstrings[i].charstring[1]; var usedCount = (widths[width] || 0) + 1; if (usedCount > defaultUsedCount) { @@ -827,20 +827,45 @@ Type1Font.prototype = { } }, + getOrderedCharStrings: function(aFont) { + var dict = aFont.get("CharStrings") + var charstrings = []; + for (var glyph in dict.map) { + charstrings.push({ + glyph: glyph, + charstring: dict.map[glyph].slice() + }); + } + + charstrings.sort(function(a, b) { + return CFFStrings.indexOf(a.glyph) > CFFStrings.indexOf(b.glyph); + }); + charstrings.shift(); + + return charstrings; + }, + convertToCFF: function(aFont) { - var charstrings = aFont.get("CharStrings") + var charstrings = this.getOrderedCharStrings(aFont); var defaultWidth = this.getDefaultWidth(charstrings); var charstringsCount = 0; var charstringsDataLength = 0; - var glyphs = {}; + var glyphs = []; + var glyphsChecker = {}; var subrs = aFont.get("Private").get("Subrs"); var parser = new Type1Parser(); - for (var glyph in charstrings.map) { - var charstring = charstrings.get(glyph); - glyphs[glyph] = parser.flattenCharstring(charstring, defaultWidth, subrs); + for (var i = 0; i < charstrings.length; i++) { + var charstring = charstrings[i].charstring; + var glyph = charstrings[i].glyph; + if (glyphsChecker[glyph]) + error("glyphs already exists!"); + glyphsChecker[glyph] = true; + + var flattened = parser.flattenCharstring(charstring, defaultWidth, subrs); + glyphs.push(flattened); charstringsCount++; - charstringsDataLength += glyphs[glyph].length; + charstringsDataLength += flattened.length; } //log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); @@ -877,8 +902,8 @@ Type1Font.prototype = { // Fill the charset header (first byte is the encoding) var charset = [0x00]; - for (var glyph in glyphs) { - var index = CFFStrings.indexOf(glyph); + for (var i = 0; i < glyphs.length; i++) { + var index = CFFStrings.indexOf(charstrings[i].glyph); var bytes = this.integerToBytes(index, 2); charset.push(bytes[0]); charset.push(bytes[1]); @@ -902,11 +927,11 @@ Type1Font.prototype = { // Encode the glyph and add it to the FUX var r = [[0x40, 0xEA]]; - for (var glyph in glyphs) { - var data = glyphs[glyph].slice(); + for (var i = 0; i < glyphs.length; i++) { + var data = glyphs[i].slice(); var charstring = []; - for (var i = 0; i < data.length; i++) { - var c = data[i]; + for (var j = 0; j < data.length; j++) { + var c = data[j]; if (!IsNum(c)) { var token = getNumFor[c]; if (!token) @@ -914,8 +939,8 @@ Type1Font.prototype = { charstring.push(token); } else { var bytes = this.encodeNumber(c); - for (var j = 0; j < bytes.length; j++) - charstring.push(bytes[j]); + for (var k = 0; k < bytes.length; k++) + charstring.push(bytes[k]); } } r.push(charstring); @@ -1092,16 +1117,16 @@ Type1Font.prototype = { var otf = new Uint8Array(20000); var currentOffset = 0; - var header = this.createOpenTypeHeader(9); + var numTables = 9; + var header = this.createOpenTypeHeader(numTables); otf.set(header, currentOffset); currentOffset += header.length; - var tablesLength = 9 * 16; - var virtualOffset = tablesLength + currentOffset; - var tableEntry = this.createTableEntry("CFF ", tablesLength + currentOffset, aData); + var baseOffset = numTables * (4 * 4) + currentOffset; + var virtualOffset = baseOffset; + var tableEntry = this.createTableEntry("CFF ", baseOffset, aData); otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; - virtualOffset += aData.length; var OS2 = [ @@ -1130,18 +1155,18 @@ Type1Font.prototype = { 0x00, 0x20, // fsSelection 0x00, 0x2D, // usFirstCharIndex 0x00, 0x7A, // usLastCharIndex - 0x03, // sTypoAscender - 0x20, // sTypeDescender + 0x00, 0x03, // sTypoAscender + 0x00, 0x20, // sTypeDescender 0x00, 0x38, // sTypoLineGap 0x00, 0x5A, // usWinAscent 0x02, 0xB4, // usWinDescent 0x00, 0xCE, 0x00, 0x00, // ulCodePageRange1 (Bits 0-31) 0x00, 0x01, 0x00, 0x00, // ulCodePageRange2 (Bits 32-63) - 0x00, // sxHeight - 0x00, // sCapHeight - 0x01, // usDefaultChar - 0xCD, // usBreakChar - 0x02 // usMaxContext + 0x00, 0x00, // sxHeight + 0x00, 0x00, // sCapHeight + 0x00, 0x01, // usDefaultChar + 0x00, 0xCD, // usBreakChar + 0x00, 0x02 // usMaxContext ]; var tableEntry = this.createTableEntry("OS/2", virtualOffset, OS2); @@ -1149,46 +1174,28 @@ Type1Font.prototype = { currentOffset += tableEntry.length; virtualOffset += OS2.length; + /** CMAP */ + var cmap = [ 0x00, 0x00, // version - 0x00, 0x00 // numTables + 0x00, 0x01, // numTables + 0x00, 0x03, // platformID + 0x00, 0x00, // encodingID + 0x00, 0x00, 0x00, 0x00, //offset + 0x00, 0x00, // format + 0x00, 0x40, // length + 0x00, 0x00, // language + 0x45, 0x46, 0x00, 0x45 ]; var tableEntry = this.createTableEntry("cmap", virtualOffset, cmap); - //otf.set(tableEntry, currentOffset); + otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; virtualOffset += cmap.length; + log(currentOffset + "::" + virtualOffset); - var name = [ - 0x00, 0x00, // format - 0x00, 0x00, // Number of names Record - 0x00, 0x00 // Storage - ]; - - var tableEntry = this.createTableEntry("name", virtualOffset, name); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += name.length; - - - var hmtx = [ - 0x01, 0xF4, 0x00, - 0x00 - ]; - var tableEntry = this.createTableEntry("hmtx", virtualOffset, hmtx); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += hmtx.length; - - var maxp = [ - 0x00, 0x00, 0x50, 0x00, // Version number - 0x00, 0x01 // Nums of glyphs - ]; - var tableEntry = this.createTableEntry("maxp", virtualOffset, maxp); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += maxp.length; + /** HEAD */ var head = [ 0x00, 0x01, 0x00, 0x00, // Version number @@ -1215,16 +1222,19 @@ Type1Font.prototype = { virtualOffset += head.length; + /** HHEA */ + var hhea = [ 0x00, 0x01, 0x00, 0x00, // Version number 0x00, 0x00, // Typographic Ascent 0x00, 0x00, // Typographic Descent 0x00, 0x00, // Line Gap - 0x01, 0xF4, // advanceWidthMax + 0xFF, 0xFF, // advanceWidthMax 0x00, 0x00, // minLeftSidebearing 0x00, 0x00, // minRightSidebearing 0x00, 0x00, // xMaxExtent 0x00, 0x00, // caretSlopeRise + 0x00, 0x00, // caretSlopeRun 0x00, 0x00, // caretOffset 0x00, 0x00, // -reserved- 0x00, 0x00, // -reserved- @@ -1238,6 +1248,52 @@ Type1Font.prototype = { currentOffset += tableEntry.length; virtualOffset += hhea.length; + /** HMTX */ + + var charstrings = this.getOrderedCharStrings(aFont); + var hmtx = [0x01, 0xF4, 0x00, 0x00]; + for (var i = 0; i < charstrings.length; i++) { + var charstring = charstrings[i].charstring; + var width = this.integerToBytes(charstring[1], 2); + var lsb = this.integerToBytes(charstring[0], 2); + hmtx.push(width[0]); + hmtx.push(width[1]); + hmtx.push(lsb[0]); + hmtx.push(lsb[1]); + } + + var tableEntry = this.createTableEntry("hmtx", virtualOffset, hmtx); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += hmtx.length; + + + /** MAXP */ + + var maxp = [ + 0x00, 0x00, 0x50, 0x00, // Version number + ].concat(this.integerToBytes(charstrings.length, 2)); // Num of glyphs + + var tableEntry = this.createTableEntry("maxp", virtualOffset, maxp); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += maxp.length; + + + /** NAME */ + + var name = [ + 0x00, 0x00, // format + 0x00, 0x00, // Number of names Record + 0x00, 0x00 // Storage + ]; + var tableEntry = this.createTableEntry("name", virtualOffset, name); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += name.length; + + + /** POST */ var post = [ 0x00, 0x03, 0x00, 0x00, // Version number @@ -1259,6 +1315,13 @@ Type1Font.prototype = { otf.set(aData, currentOffset); currentOffset += aData.length; + var tables = [OS2, cmap, hmtx, head, hhea, maxp, name, post]; + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; + otf.set(table, currentOffset); + currentOffset += table.length; + } + var data = []; for (var i = 0; i < currentOffset; i++) data.push(otf[i]); From 667acb089c50d70f6638fef9b269c90ce3732102 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sat, 11 Jun 2011 22:36:18 +0200 Subject: [PATCH 25/72] OTF does not complain anymore about missize between the CFF data and the htmx table --- PDFFont.js | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 34011a9cf..33dd97866 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1124,7 +1124,7 @@ Type1Font.prototype = { var baseOffset = numTables * (4 * 4) + currentOffset; var virtualOffset = baseOffset; - var tableEntry = this.createTableEntry("CFF ", baseOffset, aData); + var tableEntry = this.createTableEntry("CFF ", virtualOffset, aData); otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; virtualOffset += aData.length; @@ -1192,7 +1192,6 @@ Type1Font.prototype = { otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; virtualOffset += cmap.length; - log(currentOffset + "::" + virtualOffset); /** HEAD */ @@ -1223,6 +1222,7 @@ Type1Font.prototype = { /** HHEA */ + var charstrings = this.getOrderedCharStrings(aFont); var hhea = [ 0x00, 0x01, 0x00, 0x00, // Version number @@ -1240,9 +1240,9 @@ Type1Font.prototype = { 0x00, 0x00, // -reserved- 0x00, 0x00, // -reserved- 0x00, 0x00, // -reserved- - 0x00, 0x00, // metricDataFormat - 0x00, 0x01 // numberOfHMetrics + 0x00, 0x00 // metricDataFormat ]; + hhea = hhea.concat(this.encodeNumber(charstrings.length, 2)); // numberOfHMetrics var tableEntry = this.createTableEntry("hhea", virtualOffset, hhea); otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; @@ -1250,16 +1250,13 @@ Type1Font.prototype = { /** HMTX */ - var charstrings = this.getOrderedCharStrings(aFont); var hmtx = [0x01, 0xF4, 0x00, 0x00]; for (var i = 0; i < charstrings.length; i++) { var charstring = charstrings[i].charstring; + log(charstrings[i].glyph + " " + charstring[1] + " :: " + charstring); var width = this.integerToBytes(charstring[1], 2); var lsb = this.integerToBytes(charstring[0], 2); - hmtx.push(width[0]); - hmtx.push(width[1]); - hmtx.push(lsb[0]); - hmtx.push(lsb[1]); + hmtx = hmtx.concat(width, lsb); } var tableEntry = this.createTableEntry("hmtx", virtualOffset, hmtx); @@ -1315,12 +1312,13 @@ Type1Font.prototype = { otf.set(aData, currentOffset); currentOffset += aData.length; - var tables = [OS2, cmap, hmtx, head, hhea, maxp, name, post]; + var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; for (var i = 0; i < tables.length; i++) { var table = tables[i]; otf.set(table, currentOffset); currentOffset += table.length; } + log(currentOffset + "::" + virtualOffset + "\n"); var data = []; for (var i = 0; i < currentOffset; i++) From e0beef4e252049273703efa4c6f8cc90662bffaf Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 00:54:47 +0200 Subject: [PATCH 26/72] No more errors for an empty unicode table --- PDFFont.js | 28 +- glyphlist.js | 4283 ++++++++++++++++++++++++++++++++++++++++++++++++++ test.html | 1 + 3 files changed, 4304 insertions(+), 8 deletions(-) create mode 100644 glyphlist.js diff --git a/PDFFont.js b/PDFFont.js index 33dd97866..0eb29d323 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1175,19 +1175,33 @@ Type1Font.prototype = { virtualOffset += OS2.length; /** CMAP */ + var charstrings = this.getOrderedCharStrings(aFont); var cmap = [ 0x00, 0x00, // version 0x00, 0x01, // numTables - 0x00, 0x03, // platformID + 0x00, 0x01, // platformID 0x00, 0x00, // encodingID - 0x00, 0x00, 0x00, 0x00, //offset - 0x00, 0x00, // format - 0x00, 0x40, // length - 0x00, 0x00, // language - 0x45, 0x46, 0x00, 0x45 + 0x00, 0x00, 0x00, 0x0C, //offset + 0x00, 0x00, + 0x01, 0x06, + 0x00, 0x00 ]; + var data = []; + for (var i = 0; i < 262; i++) { + data.push(0x00); + } + + for (var i = 0; i < charstrings.length; i++) { + var pos = GlyphsUnicode[charstrings[i].glyph]; + var b1 = parseInt("0x" + pos[0] + pos[1]); + var b2 = parseInt("0x" + pos[2] + pos[3]); + var pos = this.bytesToInteger([b1, b2]); + data[pos] = i + 1; + } + cmap = cmap.concat(data); + var tableEntry = this.createTableEntry("cmap", virtualOffset, cmap); otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; @@ -1222,7 +1236,6 @@ Type1Font.prototype = { /** HHEA */ - var charstrings = this.getOrderedCharStrings(aFont); var hhea = [ 0x00, 0x01, 0x00, 0x00, // Version number @@ -1253,7 +1266,6 @@ Type1Font.prototype = { var hmtx = [0x01, 0xF4, 0x00, 0x00]; for (var i = 0; i < charstrings.length; i++) { var charstring = charstrings[i].charstring; - log(charstrings[i].glyph + " " + charstring[1] + " :: " + charstring); var width = this.integerToBytes(charstring[1], 2); var lsb = this.integerToBytes(charstring[0], 2); hmtx = hmtx.concat(width, lsb); diff --git a/glyphlist.js b/glyphlist.js new file mode 100644 index 000000000..de07af4a5 --- /dev/null +++ b/glyphlist.js @@ -0,0 +1,4283 @@ +var GlyphsUnicode = { + A: "0041", + AE: "00C6", + AEacute: "01FC", + AEmacron: "01E2", + AEsmall: "F7E6", + Aacute: "00C1", + Aacutesmall: "F7E1", + Abreve: "0102", + Abreveacute: "1EAE", + Abrevecyrillic: "04D0", + Abrevedotbelow: "1EB6", + Abrevegrave: "1EB0", + Abrevehookabove: "1EB2", + Abrevetilde: "1EB4", + Acaron: "01CD", + Acircle: "24B6", + Acircumflex: "00C2", + Acircumflexacute: "1EA4", + Acircumflexdotbelow: "1EAC", + Acircumflexgrave: "1EA6", + Acircumflexhookabove: "1EA8", + Acircumflexsmall: "F7E2", + Acircumflextilde: "1EAA", + Acute: "F6C9", + Acutesmall: "F7B4", + Acyrillic: "0410", + Adblgrave: "0200", + Adieresis: "00C4", + Adieresiscyrillic: "04D2", + Adieresismacron: "01DE", + Adieresissmall: "F7E4", + Adotbelow: "1EA0", + Adotmacron: "01E0", + Agrave: "00C0", + Agravesmall: "F7E0", + Ahookabove: "1EA2", + Aiecyrillic: "04D4", + Ainvertedbreve: "0202", + Alpha: "0391", + Alphatonos: "0386", + Amacron: "0100", + Amonospace: "FF21", + Aogonek: "0104", + Aring: "00C5", + Aringacute: "01FA", + Aringbelow: "1E00", + Aringsmall: "F7E5", + Asmall: "F761", + Atilde: "00C3", + Atildesmall: "F7E3", + Aybarmenian: "0531", + B: "0042", + Bcircle: "24B7", + Bdotaccent: "1E02", + Bdotbelow: "1E04", + Becyrillic: "0411", + Benarmenian: "0532", + Beta: "0392", + Bhook: "0181", + Blinebelow: "1E06", + Bmonospace: "FF22", + Brevesmall: "F6F4", + Bsmall: "F762", + Btopbar: "0182", + C: "0043", + Caarmenian: "053E", + Cacute: "0106", + Caron: "F6CA", + Caronsmall: "F6F5", + Ccaron: "010C", + Ccedilla: "00C7", + Ccedillaacute: "1E08", + Ccedillasmall: "F7E7", + Ccircle: "24B8", + Ccircumflex: "0108", + Cdot: "010A", + Cdotaccent: "010A", + Cedillasmall: "F7B8", + Chaarmenian: "0549", + Cheabkhasiancyrillic: "04BC", + Checyrillic: "0427", + Chedescenderabkhasiancyrillic: "04BE", + Chedescendercyrillic: "04B6", + Chedieresiscyrillic: "04F4", + Cheharmenian: "0543", + Chekhakassiancyrillic: "04CB", + Cheverticalstrokecyrillic: "04B8", + Chi: "03A7", + Chook: "0187", + Circumflexsmall: "F6F6", + Cmonospace: "FF23", + Coarmenian: "0551", + Csmall: "F763", + D: "0044", + DZ: "01F1", + DZcaron: "01C4", + Daarmenian: "0534", + Dafrican: "0189", + Dcaron: "010E", + Dcedilla: "1E10", + Dcircle: "24B9", + Dcircumflexbelow: "1E12", + Dcroat: "0110", + Ddotaccent: "1E0A", + Ddotbelow: "1E0C", + Decyrillic: "0414", + Deicoptic: "03EE", + Delta: "2206", + Deltagreek: "0394", + Dhook: "018A", + Dieresis: "F6CB", + DieresisAcute: "F6CC", + DieresisGrave: "F6CD", + Dieresissmall: "F7A8", + Digammagreek: "03DC", + Djecyrillic: "0402", + Dlinebelow: "1E0E", + Dmonospace: "FF24", + Dotaccentsmall: "F6F7", + Dslash: "0110", + Dsmall: "F764", + Dtopbar: "018B", + Dz: "01F2", + Dzcaron: "01C5", + Dzeabkhasiancyrillic: "04E0", + Dzecyrillic: "0405", + Dzhecyrillic: "040F", + E: "0045", + Eacute: "00C9", + Eacutesmall: "F7E9", + Ebreve: "0114", + Ecaron: "011A", + Ecedillabreve: "1E1C", + Echarmenian: "0535", + Ecircle: "24BA", + Ecircumflex: "00CA", + Ecircumflexacute: "1EBE", + Ecircumflexbelow: "1E18", + Ecircumflexdotbelow: "1EC6", + Ecircumflexgrave: "1EC0", + Ecircumflexhookabove: "1EC2", + Ecircumflexsmall: "F7EA", + Ecircumflextilde: "1EC4", + Ecyrillic: "0404", + Edblgrave: "0204", + Edieresis: "00CB", + Edieresissmall: "F7EB", + Edot: "0116", + Edotaccent: "0116", + Edotbelow: "1EB8", + Efcyrillic: "0424", + Egrave: "00C8", + Egravesmall: "F7E8", + Eharmenian: "0537", + Ehookabove: "1EBA", + Eightroman: "2167", + Einvertedbreve: "0206", + Eiotifiedcyrillic: "0464", + Elcyrillic: "041B", + Elevenroman: "216A", + Emacron: "0112", + Emacronacute: "1E16", + Emacrongrave: "1E14", + Emcyrillic: "041C", + Emonospace: "FF25", + Encyrillic: "041D", + Endescendercyrillic: "04A2", + Eng: "014A", + Enghecyrillic: "04A4", + Enhookcyrillic: "04C7", + Eogonek: "0118", + Eopen: "0190", + Epsilon: "0395", + Epsilontonos: "0388", + Ercyrillic: "0420", + Ereversed: "018E", + Ereversedcyrillic: "042D", + Escyrillic: "0421", + Esdescendercyrillic: "04AA", + Esh: "01A9", + Esmall: "F765", + Eta: "0397", + Etarmenian: "0538", + Etatonos: "0389", + Eth: "00D0", + Ethsmall: "F7F0", + Etilde: "1EBC", + Etildebelow: "1E1A", + Euro: "20AC", + Ezh: "01B7", + Ezhcaron: "01EE", + Ezhreversed: "01B8", + F: "0046", + Fcircle: "24BB", + Fdotaccent: "1E1E", + Feharmenian: "0556", + Feicoptic: "03E4", + Fhook: "0191", + Fitacyrillic: "0472", + Fiveroman: "2164", + Fmonospace: "FF26", + Fourroman: "2163", + Fsmall: "F766", + G: "0047", + GBsquare: "3387", + Gacute: "01F4", + Gamma: "0393", + Gammaafrican: "0194", + Gangiacoptic: "03EA", + Gbreve: "011E", + Gcaron: "01E6", + Gcedilla: "0122", + Gcircle: "24BC", + Gcircumflex: "011C", + Gcommaaccent: "0122", + Gdot: "0120", + Gdotaccent: "0120", + Gecyrillic: "0413", + Ghadarmenian: "0542", + Ghemiddlehookcyrillic: "0494", + Ghestrokecyrillic: "0492", + Gheupturncyrillic: "0490", + Ghook: "0193", + Gimarmenian: "0533", + Gjecyrillic: "0403", + Gmacron: "1E20", + Gmonospace: "FF27", + Grave: "F6CE", + Gravesmall: "F760", + Gsmall: "F767", + Gsmallhook: "029B", + Gstroke: "01E4", + H: "0048", + H18533: "25CF", + H18543: "25AA", + H18551: "25AB", + H22073: "25A1", + HPsquare: "33CB", + Haabkhasiancyrillic: "04A8", + Hadescendercyrillic: "04B2", + Hardsigncyrillic: "042A", + Hbar: "0126", + Hbrevebelow: "1E2A", + Hcedilla: "1E28", + Hcircle: "24BD", + Hcircumflex: "0124", + Hdieresis: "1E26", + Hdotaccent: "1E22", + Hdotbelow: "1E24", + Hmonospace: "FF28", + Hoarmenian: "0540", + Horicoptic: "03E8", + Hsmall: "F768", + Hungarumlaut: "F6CF", + Hungarumlautsmall: "F6F8", + Hzsquare: "3390", + I: "0049", + IAcyrillic: "042F", + IJ: "0132", + IUcyrillic: "042E", + Iacute: "00CD", + Iacutesmall: "F7ED", + Ibreve: "012C", + Icaron: "01CF", + Icircle: "24BE", + Icircumflex: "00CE", + Icircumflexsmall: "F7EE", + Icyrillic: "0406", + Idblgrave: "0208", + Idieresis: "00CF", + Idieresisacute: "1E2E", + Idieresiscyrillic: "04E4", + Idieresissmall: "F7EF", + Idot: "0130", + Idotaccent: "0130", + Idotbelow: "1ECA", + Iebrevecyrillic: "04D6", + Iecyrillic: "0415", + Ifraktur: "2111", + Igrave: "00CC", + Igravesmall: "F7EC", + Ihookabove: "1EC8", + Iicyrillic: "0418", + Iinvertedbreve: "020A", + Iishortcyrillic: "0419", + Imacron: "012A", + Imacroncyrillic: "04E2", + Imonospace: "FF29", + Iniarmenian: "053B", + Iocyrillic: "0401", + Iogonek: "012E", + Iota: "0399", + Iotaafrican: "0196", + Iotadieresis: "03AA", + Iotatonos: "038A", + Ismall: "F769", + Istroke: "0197", + Itilde: "0128", + Itildebelow: "1E2C", + Izhitsacyrillic: "0474", + Izhitsadblgravecyrillic: "0476", + J: "004A", + Jaarmenian: "0541", + Jcircle: "24BF", + Jcircumflex: "0134", + Jecyrillic: "0408", + Jheharmenian: "054B", + Jmonospace: "FF2A", + Jsmall: "F76A", + K: "004B", + KBsquare: "3385", + KKsquare: "33CD", + Kabashkircyrillic: "04A0", + Kacute: "1E30", + Kacyrillic: "041A", + Kadescendercyrillic: "049A", + Kahookcyrillic: "04C3", + Kappa: "039A", + Kastrokecyrillic: "049E", + Kaverticalstrokecyrillic: "049C", + Kcaron: "01E8", + Kcedilla: "0136", + Kcircle: "24C0", + Kcommaaccent: "0136", + Kdotbelow: "1E32", + Keharmenian: "0554", + Kenarmenian: "053F", + Khacyrillic: "0425", + Kheicoptic: "03E6", + Khook: "0198", + Kjecyrillic: "040C", + Klinebelow: "1E34", + Kmonospace: "FF2B", + Koppacyrillic: "0480", + Koppagreek: "03DE", + Ksicyrillic: "046E", + Ksmall: "F76B", + L: "004C", + LJ: "01C7", + LL: "F6BF", + Lacute: "0139", + Lambda: "039B", + Lcaron: "013D", + Lcedilla: "013B", + Lcircle: "24C1", + Lcircumflexbelow: "1E3C", + Lcommaaccent: "013B", + Ldot: "013F", + Ldotaccent: "013F", + Ldotbelow: "1E36", + Ldotbelowmacron: "1E38", + Liwnarmenian: "053C", + Lj: "01C8", + Ljecyrillic: "0409", + Llinebelow: "1E3A", + Lmonospace: "FF2C", + Lslash: "0141", + Lslashsmall: "F6F9", + Lsmall: "F76C", + M: "004D", + MBsquare: "3386", + Macron: "F6D0", + Macronsmall: "F7AF", + Macute: "1E3E", + Mcircle: "24C2", + Mdotaccent: "1E40", + Mdotbelow: "1E42", + Menarmenian: "0544", + Mmonospace: "FF2D", + Msmall: "F76D", + Mturned: "019C", + Mu: "039C", + N: "004E", + NJ: "01CA", + Nacute: "0143", + Ncaron: "0147", + Ncedilla: "0145", + Ncircle: "24C3", + Ncircumflexbelow: "1E4A", + Ncommaaccent: "0145", + Ndotaccent: "1E44", + Ndotbelow: "1E46", + Nhookleft: "019D", + Nineroman: "2168", + Nj: "01CB", + Njecyrillic: "040A", + Nlinebelow: "1E48", + Nmonospace: "FF2E", + Nowarmenian: "0546", + Nsmall: "F76E", + Ntilde: "00D1", + Ntildesmall: "F7F1", + Nu: "039D", + O: "004F", + OE: "0152", + OEsmall: "F6FA", + Oacute: "00D3", + Oacutesmall: "F7F3", + Obarredcyrillic: "04E8", + Obarreddieresiscyrillic: "04EA", + Obreve: "014E", + Ocaron: "01D1", + Ocenteredtilde: "019F", + Ocircle: "24C4", + Ocircumflex: "00D4", + Ocircumflexacute: "1ED0", + Ocircumflexdotbelow: "1ED8", + Ocircumflexgrave: "1ED2", + Ocircumflexhookabove: "1ED4", + Ocircumflexsmall: "F7F4", + Ocircumflextilde: "1ED6", + Ocyrillic: "041E", + Odblacute: "0150", + Odblgrave: "020C", + Odieresis: "00D6", + Odieresiscyrillic: "04E6", + Odieresissmall: "F7F6", + Odotbelow: "1ECC", + Ogoneksmall: "F6FB", + Ograve: "00D2", + Ogravesmall: "F7F2", + Oharmenian: "0555", + Ohm: "2126", + Ohookabove: "1ECE", + Ohorn: "01A0", + Ohornacute: "1EDA", + Ohorndotbelow: "1EE2", + Ohorngrave: "1EDC", + Ohornhookabove: "1EDE", + Ohorntilde: "1EE0", + Ohungarumlaut: "0150", + Oi: "01A2", + Oinvertedbreve: "020E", + Omacron: "014C", + Omacronacute: "1E52", + Omacrongrave: "1E50", + Omega: "2126", + Omegacyrillic: "0460", + Omegagreek: "03A9", + Omegaroundcyrillic: "047A", + Omegatitlocyrillic: "047C", + Omegatonos: "038F", + Omicron: "039F", + Omicrontonos: "038C", + Omonospace: "FF2F", + Oneroman: "2160", + Oogonek: "01EA", + Oogonekmacron: "01EC", + Oopen: "0186", + Oslash: "00D8", + Oslashacute: "01FE", + Oslashsmall: "F7F8", + Osmall: "F76F", + Ostrokeacute: "01FE", + Otcyrillic: "047E", + Otilde: "00D5", + Otildeacute: "1E4C", + Otildedieresis: "1E4E", + Otildesmall: "F7F5", + P: "0050", + Pacute: "1E54", + Pcircle: "24C5", + Pdotaccent: "1E56", + Pecyrillic: "041F", + Peharmenian: "054A", + Pemiddlehookcyrillic: "04A6", + Phi: "03A6", + Phook: "01A4", + Pi: "03A0", + Piwrarmenian: "0553", + Pmonospace: "FF30", + Psi: "03A8", + Psicyrillic: "0470", + Psmall: "F770", + Q: "0051", + Qcircle: "24C6", + Qmonospace: "FF31", + Qsmall: "F771", + R: "0052", + Raarmenian: "054C", + Racute: "0154", + Rcaron: "0158", + Rcedilla: "0156", + Rcircle: "24C7", + Rcommaaccent: "0156", + Rdblgrave: "0210", + Rdotaccent: "1E58", + Rdotbelow: "1E5A", + Rdotbelowmacron: "1E5C", + Reharmenian: "0550", + Rfraktur: "211C", + Rho: "03A1", + Ringsmall: "F6FC", + Rinvertedbreve: "0212", + Rlinebelow: "1E5E", + Rmonospace: "FF32", + Rsmall: "F772", + Rsmallinverted: "0281", + Rsmallinvertedsuperior: "02B6", + S: "0053", + SF010000: "250C", + SF020000: "2514", + SF030000: "2510", + SF040000: "2518", + SF050000: "253C", + SF060000: "252C", + SF070000: "2534", + SF080000: "251C", + SF090000: "2524", + SF100000: "2500", + SF110000: "2502", + SF190000: "2561", + SF200000: "2562", + SF210000: "2556", + SF220000: "2555", + SF230000: "2563", + SF240000: "2551", + SF250000: "2557", + SF260000: "255D", + SF270000: "255C", + SF280000: "255B", + SF360000: "255E", + SF370000: "255F", + SF380000: "255A", + SF390000: "2554", + SF400000: "2569", + SF410000: "2566", + SF420000: "2560", + SF430000: "2550", + SF440000: "256C", + SF450000: "2567", + SF460000: "2568", + SF470000: "2564", + SF480000: "2565", + SF490000: "2559", + SF500000: "2558", + SF510000: "2552", + SF520000: "2553", + SF530000: "256B", + SF540000: "256A", + Sacute: "015A", + Sacutedotaccent: "1E64", + Sampigreek: "03E0", + Scaron: "0160", + Scarondotaccent: "1E66", + Scaronsmall: "F6FD", + Scedilla: "015E", + Schwa: "018F", + Schwacyrillic: "04D8", + Schwadieresiscyrillic: "04DA", + Scircle: "24C8", + Scircumflex: "015C", + Scommaaccent: "0218", + Sdotaccent: "1E60", + Sdotbelow: "1E62", + Sdotbelowdotaccent: "1E68", + Seharmenian: "054D", + Sevenroman: "2166", + Shaarmenian: "0547", + Shacyrillic: "0428", + Shchacyrillic: "0429", + Sheicoptic: "03E2", + Shhacyrillic: "04BA", + Shimacoptic: "03EC", + Sigma: "03A3", + Sixroman: "2165", + Smonospace: "FF33", + Softsigncyrillic: "042C", + Ssmall: "F773", + Stigmagreek: "03DA", + T: "0054", + Tau: "03A4", + Tbar: "0166", + Tcaron: "0164", + Tcedilla: "0162", + Tcircle: "24C9", + Tcircumflexbelow: "1E70", + Tcommaaccent: "0162", + Tdotaccent: "1E6A", + Tdotbelow: "1E6C", + Tecyrillic: "0422", + Tedescendercyrillic: "04AC", + Tenroman: "2169", + Tetsecyrillic: "04B4", + Theta: "0398", + Thook: "01AC", + Thorn: "00DE", + Thornsmall: "F7FE", + Threeroman: "2162", + Tildesmall: "F6FE", + Tiwnarmenian: "054F", + Tlinebelow: "1E6E", + Tmonospace: "FF34", + Toarmenian: "0539", + Tonefive: "01BC", + Tonesix: "0184", + Tonetwo: "01A7", + Tretroflexhook: "01AE", + Tsecyrillic: "0426", + Tshecyrillic: "040B", + Tsmall: "F774", + Twelveroman: "216B", + Tworoman: "2161", + U: "0055", + Uacute: "00DA", + Uacutesmall: "F7FA", + Ubreve: "016C", + Ucaron: "01D3", + Ucircle: "24CA", + Ucircumflex: "00DB", + Ucircumflexbelow: "1E76", + Ucircumflexsmall: "F7FB", + Ucyrillic: "0423", + Udblacute: "0170", + Udblgrave: "0214", + Udieresis: "00DC", + Udieresisacute: "01D7", + Udieresisbelow: "1E72", + Udieresiscaron: "01D9", + Udieresiscyrillic: "04F0", + Udieresisgrave: "01DB", + Udieresismacron: "01D5", + Udieresissmall: "F7FC", + Udotbelow: "1EE4", + Ugrave: "00D9", + Ugravesmall: "F7F9", + Uhookabove: "1EE6", + Uhorn: "01AF", + Uhornacute: "1EE8", + Uhorndotbelow: "1EF0", + Uhorngrave: "1EEA", + Uhornhookabove: "1EEC", + Uhorntilde: "1EEE", + Uhungarumlaut: "0170", + Uhungarumlautcyrillic: "04F2", + Uinvertedbreve: "0216", + Ukcyrillic: "0478", + Umacron: "016A", + Umacroncyrillic: "04EE", + Umacrondieresis: "1E7A", + Umonospace: "FF35", + Uogonek: "0172", + Upsilon: "03A5", + Upsilon1: "03D2", + Upsilonacutehooksymbolgreek: "03D3", + Upsilonafrican: "01B1", + Upsilondieresis: "03AB", + Upsilondieresishooksymbolgreek: "03D4", + Upsilonhooksymbol: "03D2", + Upsilontonos: "038E", + Uring: "016E", + Ushortcyrillic: "040E", + Usmall: "F775", + Ustraightcyrillic: "04AE", + Ustraightstrokecyrillic: "04B0", + Utilde: "0168", + Utildeacute: "1E78", + Utildebelow: "1E74", + V: "0056", + Vcircle: "24CB", + Vdotbelow: "1E7E", + Vecyrillic: "0412", + Vewarmenian: "054E", + Vhook: "01B2", + Vmonospace: "FF36", + Voarmenian: "0548", + Vsmall: "F776", + Vtilde: "1E7C", + W: "0057", + Wacute: "1E82", + Wcircle: "24CC", + Wcircumflex: "0174", + Wdieresis: "1E84", + Wdotaccent: "1E86", + Wdotbelow: "1E88", + Wgrave: "1E80", + Wmonospace: "FF37", + Wsmall: "F777", + X: "0058", + Xcircle: "24CD", + Xdieresis: "1E8C", + Xdotaccent: "1E8A", + Xeharmenian: "053D", + Xi: "039E", + Xmonospace: "FF38", + Xsmall: "F778", + Y: "0059", + Yacute: "00DD", + Yacutesmall: "F7FD", + Yatcyrillic: "0462", + Ycircle: "24CE", + Ycircumflex: "0176", + Ydieresis: "0178", + Ydieresissmall: "F7FF", + Ydotaccent: "1E8E", + Ydotbelow: "1EF4", + Yericyrillic: "042B", + Yerudieresiscyrillic: "04F8", + Ygrave: "1EF2", + Yhook: "01B3", + Yhookabove: "1EF6", + Yiarmenian: "0545", + Yicyrillic: "0407", + Yiwnarmenian: "0552", + Ymonospace: "FF39", + Ysmall: "F779", + Ytilde: "1EF8", + Yusbigcyrillic: "046A", + Yusbigiotifiedcyrillic: "046C", + Yuslittlecyrillic: "0466", + Yuslittleiotifiedcyrillic: "0468", + Z: "005A", + Zaarmenian: "0536", + Zacute: "0179", + Zcaron: "017D", + Zcaronsmall: "F6FF", + Zcircle: "24CF", + Zcircumflex: "1E90", + Zdot: "017B", + Zdotaccent: "017B", + Zdotbelow: "1E92", + Zecyrillic: "0417", + Zedescendercyrillic: "0498", + Zedieresiscyrillic: "04DE", + Zeta: "0396", + Zhearmenian: "053A", + Zhebrevecyrillic: "04C1", + Zhecyrillic: "0416", + Zhedescendercyrillic: "0496", + Zhedieresiscyrillic: "04DC", + Zlinebelow: "1E94", + Zmonospace: "FF3A", + Zsmall: "F77A", + Zstroke: "01B5", + a: "0061", + aabengali: "0986", + aacute: "00E1", + aadeva: "0906", + aagujarati: "0A86", + aagurmukhi: "0A06", + aamatragurmukhi: "0A3E", + aarusquare: "3303", + aavowelsignbengali: "09BE", + aavowelsigndeva: "093E", + aavowelsigngujarati: "0ABE", + abbreviationmarkarmenian: "055F", + abbreviationsigndeva: "0970", + abengali: "0985", + abopomofo: "311A", + abreve: "0103", + abreveacute: "1EAF", + abrevecyrillic: "04D1", + abrevedotbelow: "1EB7", + abrevegrave: "1EB1", + abrevehookabove: "1EB3", + abrevetilde: "1EB5", + acaron: "01CE", + acircle: "24D0", + acircumflex: "00E2", + acircumflexacute: "1EA5", + acircumflexdotbelow: "1EAD", + acircumflexgrave: "1EA7", + acircumflexhookabove: "1EA9", + acircumflextilde: "1EAB", + acute: "00B4", + acutebelowcmb: "0317", + acutecmb: "0301", + acutecomb: "0301", + acutedeva: "0954", + acutelowmod: "02CF", + acutetonecmb: "0341", + acyrillic: "0430", + adblgrave: "0201", + addakgurmukhi: "0A71", + adeva: "0905", + adieresis: "00E4", + adieresiscyrillic: "04D3", + adieresismacron: "01DF", + adotbelow: "1EA1", + adotmacron: "01E1", + ae: "00E6", + aeacute: "01FD", + aekorean: "3150", + aemacron: "01E3", + afii00208: "2015", + afii08941: "20A4", + afii10017: "0410", + afii10018: "0411", + afii10019: "0412", + afii10020: "0413", + afii10021: "0414", + afii10022: "0415", + afii10023: "0401", + afii10024: "0416", + afii10025: "0417", + afii10026: "0418", + afii10027: "0419", + afii10028: "041A", + afii10029: "041B", + afii10030: "041C", + afii10031: "041D", + afii10032: "041E", + afii10033: "041F", + afii10034: "0420", + afii10035: "0421", + afii10036: "0422", + afii10037: "0423", + afii10038: "0424", + afii10039: "0425", + afii10040: "0426", + afii10041: "0427", + afii10042: "0428", + afii10043: "0429", + afii10044: "042A", + afii10045: "042B", + afii10046: "042C", + afii10047: "042D", + afii10048: "042E", + afii10049: "042F", + afii10050: "0490", + afii10051: "0402", + afii10052: "0403", + afii10053: "0404", + afii10054: "0405", + afii10055: "0406", + afii10056: "0407", + afii10057: "0408", + afii10058: "0409", + afii10059: "040A", + afii10060: "040B", + afii10061: "040C", + afii10062: "040E", + afii10063: "F6C4", + afii10064: "F6C5", + afii10065: "0430", + afii10066: "0431", + afii10067: "0432", + afii10068: "0433", + afii10069: "0434", + afii10070: "0435", + afii10071: "0451", + afii10072: "0436", + afii10073: "0437", + afii10074: "0438", + afii10075: "0439", + afii10076: "043A", + afii10077: "043B", + afii10078: "043C", + afii10079: "043D", + afii10080: "043E", + afii10081: "043F", + afii10082: "0440", + afii10083: "0441", + afii10084: "0442", + afii10085: "0443", + afii10086: "0444", + afii10087: "0445", + afii10088: "0446", + afii10089: "0447", + afii10090: "0448", + afii10091: "0449", + afii10092: "044A", + afii10093: "044B", + afii10094: "044C", + afii10095: "044D", + afii10096: "044E", + afii10097: "044F", + afii10098: "0491", + afii10099: "0452", + afii10100: "0453", + afii10101: "0454", + afii10102: "0455", + afii10103: "0456", + afii10104: "0457", + afii10105: "0458", + afii10106: "0459", + afii10107: "045A", + afii10108: "045B", + afii10109: "045C", + afii10110: "045E", + afii10145: "040F", + afii10146: "0462", + afii10147: "0472", + afii10148: "0474", + afii10192: "F6C6", + afii10193: "045F", + afii10194: "0463", + afii10195: "0473", + afii10196: "0475", + afii10831: "F6C7", + afii10832: "F6C8", + afii10846: "04D9", + afii299: "200E", + afii300: "200F", + afii301: "200D", + afii57381: "066A", + afii57388: "060C", + afii57392: "0660", + afii57393: "0661", + afii57394: "0662", + afii57395: "0663", + afii57396: "0664", + afii57397: "0665", + afii57398: "0666", + afii57399: "0667", + afii57400: "0668", + afii57401: "0669", + afii57403: "061B", + afii57407: "061F", + afii57409: "0621", + afii57410: "0622", + afii57411: "0623", + afii57412: "0624", + afii57413: "0625", + afii57414: "0626", + afii57415: "0627", + afii57416: "0628", + afii57417: "0629", + afii57418: "062A", + afii57419: "062B", + afii57420: "062C", + afii57421: "062D", + afii57422: "062E", + afii57423: "062F", + afii57424: "0630", + afii57425: "0631", + afii57426: "0632", + afii57427: "0633", + afii57428: "0634", + afii57429: "0635", + afii57430: "0636", + afii57431: "0637", + afii57432: "0638", + afii57433: "0639", + afii57434: "063A", + afii57440: "0640", + afii57441: "0641", + afii57442: "0642", + afii57443: "0643", + afii57444: "0644", + afii57445: "0645", + afii57446: "0646", + afii57448: "0648", + afii57449: "0649", + afii57450: "064A", + afii57451: "064B", + afii57452: "064C", + afii57453: "064D", + afii57454: "064E", + afii57455: "064F", + afii57456: "0650", + afii57457: "0651", + afii57458: "0652", + afii57470: "0647", + afii57505: "06A4", + afii57506: "067E", + afii57507: "0686", + afii57508: "0698", + afii57509: "06AF", + afii57511: "0679", + afii57512: "0688", + afii57513: "0691", + afii57514: "06BA", + afii57519: "06D2", + afii57534: "06D5", + afii57636: "20AA", + afii57645: "05BE", + afii57658: "05C3", + afii57664: "05D0", + afii57665: "05D1", + afii57666: "05D2", + afii57667: "05D3", + afii57668: "05D4", + afii57669: "05D5", + afii57670: "05D6", + afii57671: "05D7", + afii57672: "05D8", + afii57673: "05D9", + afii57674: "05DA", + afii57675: "05DB", + afii57676: "05DC", + afii57677: "05DD", + afii57678: "05DE", + afii57679: "05DF", + afii57680: "05E0", + afii57681: "05E1", + afii57682: "05E2", + afii57683: "05E3", + afii57684: "05E4", + afii57685: "05E5", + afii57686: "05E6", + afii57687: "05E7", + afii57688: "05E8", + afii57689: "05E9", + afii57690: "05EA", + afii57694: "FB2A", + afii57695: "FB2B", + afii57700: "FB4B", + afii57705: "FB1F", + afii57716: "05F0", + afii57717: "05F1", + afii57718: "05F2", + afii57723: "FB35", + afii57793: "05B4", + afii57794: "05B5", + afii57795: "05B6", + afii57796: "05BB", + afii57797: "05B8", + afii57798: "05B7", + afii57799: "05B0", + afii57800: "05B2", + afii57801: "05B1", + afii57802: "05B3", + afii57803: "05C2", + afii57804: "05C1", + afii57806: "05B9", + afii57807: "05BC", + afii57839: "05BD", + afii57841: "05BF", + afii57842: "05C0", + afii57929: "02BC", + afii61248: "2105", + afii61289: "2113", + afii61352: "2116", + afii61573: "202C", + afii61574: "202D", + afii61575: "202E", + afii61664: "200C", + afii63167: "066D", + afii64937: "02BD", + agrave: "00E0", + agujarati: "0A85", + agurmukhi: "0A05", + ahiragana: "3042", + ahookabove: "1EA3", + aibengali: "0990", + aibopomofo: "311E", + aideva: "0910", + aiecyrillic: "04D5", + aigujarati: "0A90", + aigurmukhi: "0A10", + aimatragurmukhi: "0A48", + ainarabic: "0639", + ainfinalarabic: "FECA", + aininitialarabic: "FECB", + ainmedialarabic: "FECC", + ainvertedbreve: "0203", + aivowelsignbengali: "09C8", + aivowelsigndeva: "0948", + aivowelsigngujarati: "0AC8", + akatakana: "30A2", + akatakanahalfwidth: "FF71", + akorean: "314F", + alef: "05D0", + alefarabic: "0627", + alefdageshhebrew: "FB30", + aleffinalarabic: "FE8E", + alefhamzaabovearabic: "0623", + alefhamzaabovefinalarabic: "FE84", + alefhamzabelowarabic: "0625", + alefhamzabelowfinalarabic: "FE88", + alefhebrew: "05D0", + aleflamedhebrew: "FB4F", + alefmaddaabovearabic: "0622", + alefmaddaabovefinalarabic: "FE82", + alefmaksuraarabic: "0649", + alefmaksurafinalarabic: "FEF0", + alefmaksurainitialarabic: "FEF3", + alefmaksuramedialarabic: "FEF4", + alefpatahhebrew: "FB2E", + alefqamatshebrew: "FB2F", + aleph: "2135", + allequal: "224C", + alpha: "03B1", + alphatonos: "03AC", + amacron: "0101", + amonospace: "FF41", + ampersand: "0026", + ampersandmonospace: "FF06", + ampersandsmall: "F726", + amsquare: "33C2", + anbopomofo: "3122", + angbopomofo: "3124", + angkhankhuthai: "0E5A", + angle: "2220", + anglebracketleft: "3008", + anglebracketleftvertical: "FE3F", + anglebracketright: "3009", + anglebracketrightvertical: "FE40", + angleleft: "2329", + angleright: "232A", + angstrom: "212B", + anoteleia: "0387", + anudattadeva: "0952", + anusvarabengali: "0982", + anusvaradeva: "0902", + anusvaragujarati: "0A82", + aogonek: "0105", + apaatosquare: "3300", + aparen: "249C", + apostrophearmenian: "055A", + apostrophemod: "02BC", + apple: "F8FF", + approaches: "2250", + approxequal: "2248", + approxequalorimage: "2252", + approximatelyequal: "2245", + araeaekorean: "318E", + araeakorean: "318D", + arc: "2312", + arighthalfring: "1E9A", + aring: "00E5", + aringacute: "01FB", + aringbelow: "1E01", + arrowboth: "2194", + arrowdashdown: "21E3", + arrowdashleft: "21E0", + arrowdashright: "21E2", + arrowdashup: "21E1", + arrowdblboth: "21D4", + arrowdbldown: "21D3", + arrowdblleft: "21D0", + arrowdblright: "21D2", + arrowdblup: "21D1", + arrowdown: "2193", + arrowdownleft: "2199", + arrowdownright: "2198", + arrowdownwhite: "21E9", + arrowheaddownmod: "02C5", + arrowheadleftmod: "02C2", + arrowheadrightmod: "02C3", + arrowheadupmod: "02C4", + arrowhorizex: "F8E7", + arrowleft: "2190", + arrowleftdbl: "21D0", + arrowleftdblstroke: "21CD", + arrowleftoverright: "21C6", + arrowleftwhite: "21E6", + arrowright: "2192", + arrowrightdblstroke: "21CF", + arrowrightheavy: "279E", + arrowrightoverleft: "21C4", + arrowrightwhite: "21E8", + arrowtableft: "21E4", + arrowtabright: "21E5", + arrowup: "2191", + arrowupdn: "2195", + arrowupdnbse: "21A8", + arrowupdownbase: "21A8", + arrowupleft: "2196", + arrowupleftofdown: "21C5", + arrowupright: "2197", + arrowupwhite: "21E7", + arrowvertex: "F8E6", + asciicircum: "005E", + asciicircummonospace: "FF3E", + asciitilde: "007E", + asciitildemonospace: "FF5E", + ascript: "0251", + ascriptturned: "0252", + asmallhiragana: "3041", + asmallkatakana: "30A1", + asmallkatakanahalfwidth: "FF67", + asterisk: "002A", + asteriskaltonearabic: "066D", + asteriskarabic: "066D", + asteriskmath: "2217", + asteriskmonospace: "FF0A", + asterisksmall: "FE61", + asterism: "2042", + asuperior: "F6E9", + asymptoticallyequal: "2243", + at: "0040", + atilde: "00E3", + atmonospace: "FF20", + atsmall: "FE6B", + aturned: "0250", + aubengali: "0994", + aubopomofo: "3120", + audeva: "0914", + augujarati: "0A94", + augurmukhi: "0A14", + aulengthmarkbengali: "09D7", + aumatragurmukhi: "0A4C", + auvowelsignbengali: "09CC", + auvowelsigndeva: "094C", + auvowelsigngujarati: "0ACC", + avagrahadeva: "093D", + aybarmenian: "0561", + ayin: "05E2", + ayinaltonehebrew: "FB20", + ayinhebrew: "05E2", + b: "0062", + babengali: "09AC", + backslash: "005C", + backslashmonospace: "FF3C", + badeva: "092C", + bagujarati: "0AAC", + bagurmukhi: "0A2C", + bahiragana: "3070", + bahtthai: "0E3F", + bakatakana: "30D0", + bar: "007C", + barmonospace: "FF5C", + bbopomofo: "3105", + bcircle: "24D1", + bdotaccent: "1E03", + bdotbelow: "1E05", + beamedsixteenthnotes: "266C", + because: "2235", + becyrillic: "0431", + beharabic: "0628", + behfinalarabic: "FE90", + behinitialarabic: "FE91", + behiragana: "3079", + behmedialarabic: "FE92", + behmeeminitialarabic: "FC9F", + behmeemisolatedarabic: "FC08", + behnoonfinalarabic: "FC6D", + bekatakana: "30D9", + benarmenian: "0562", + bet: "05D1", + beta: "03B2", + betasymbolgreek: "03D0", + betdagesh: "FB31", + betdageshhebrew: "FB31", + bethebrew: "05D1", + betrafehebrew: "FB4C", + bhabengali: "09AD", + bhadeva: "092D", + bhagujarati: "0AAD", + bhagurmukhi: "0A2D", + bhook: "0253", + bihiragana: "3073", + bikatakana: "30D3", + bilabialclick: "0298", + bindigurmukhi: "0A02", + birusquare: "3331", + blackcircle: "25CF", + blackdiamond: "25C6", + blackdownpointingtriangle: "25BC", + blackleftpointingpointer: "25C4", + blackleftpointingtriangle: "25C0", + blacklenticularbracketleft: "3010", + blacklenticularbracketleftvertical: "FE3B", + blacklenticularbracketright: "3011", + blacklenticularbracketrightvertical: "FE3C", + blacklowerlefttriangle: "25E3", + blacklowerrighttriangle: "25E2", + blackrectangle: "25AC", + blackrightpointingpointer: "25BA", + blackrightpointingtriangle: "25B6", + blacksmallsquare: "25AA", + blacksmilingface: "263B", + blacksquare: "25A0", + blackstar: "2605", + blackupperlefttriangle: "25E4", + blackupperrighttriangle: "25E5", + blackuppointingsmalltriangle: "25B4", + blackuppointingtriangle: "25B2", + blank: "2423", + blinebelow: "1E07", + block: "2588", + bmonospace: "FF42", + bobaimaithai: "0E1A", + bohiragana: "307C", + bokatakana: "30DC", + bparen: "249D", + bqsquare: "33C3", + braceex: "F8F4", + braceleft: "007B", + braceleftbt: "F8F3", + braceleftmid: "F8F2", + braceleftmonospace: "FF5B", + braceleftsmall: "FE5B", + bracelefttp: "F8F1", + braceleftvertical: "FE37", + braceright: "007D", + bracerightbt: "F8FE", + bracerightmid: "F8FD", + bracerightmonospace: "FF5D", + bracerightsmall: "FE5C", + bracerighttp: "F8FC", + bracerightvertical: "FE38", + bracketleft: "005B", + bracketleftbt: "F8F0", + bracketleftex: "F8EF", + bracketleftmonospace: "FF3B", + bracketlefttp: "F8EE", + bracketright: "005D", + bracketrightbt: "F8FB", + bracketrightex: "F8FA", + bracketrightmonospace: "FF3D", + bracketrighttp: "F8F9", + breve: "02D8", + brevebelowcmb: "032E", + brevecmb: "0306", + breveinvertedbelowcmb: "032F", + breveinvertedcmb: "0311", + breveinverteddoublecmb: "0361", + bridgebelowcmb: "032A", + bridgeinvertedbelowcmb: "033A", + brokenbar: "00A6", + bstroke: "0180", + bsuperior: "F6EA", + btopbar: "0183", + buhiragana: "3076", + bukatakana: "30D6", + bullet: "2022", + bulletinverse: "25D8", + bulletoperator: "2219", + bullseye: "25CE", + c: "0063", + caarmenian: "056E", + cabengali: "099A", + cacute: "0107", + cadeva: "091A", + cagujarati: "0A9A", + cagurmukhi: "0A1A", + calsquare: "3388", + candrabindubengali: "0981", + candrabinducmb: "0310", + candrabindudeva: "0901", + candrabindugujarati: "0A81", + capslock: "21EA", + careof: "2105", + caron: "02C7", + caronbelowcmb: "032C", + caroncmb: "030C", + carriagereturn: "21B5", + cbopomofo: "3118", + ccaron: "010D", + ccedilla: "00E7", + ccedillaacute: "1E09", + ccircle: "24D2", + ccircumflex: "0109", + ccurl: "0255", + cdot: "010B", + cdotaccent: "010B", + cdsquare: "33C5", + cedilla: "00B8", + cedillacmb: "0327", + cent: "00A2", + centigrade: "2103", + centinferior: "F6DF", + centmonospace: "FFE0", + centoldstyle: "F7A2", + centsuperior: "F6E0", + chaarmenian: "0579", + chabengali: "099B", + chadeva: "091B", + chagujarati: "0A9B", + chagurmukhi: "0A1B", + chbopomofo: "3114", + cheabkhasiancyrillic: "04BD", + checkmark: "2713", + checyrillic: "0447", + chedescenderabkhasiancyrillic: "04BF", + chedescendercyrillic: "04B7", + chedieresiscyrillic: "04F5", + cheharmenian: "0573", + chekhakassiancyrillic: "04CC", + cheverticalstrokecyrillic: "04B9", + chi: "03C7", + chieuchacirclekorean: "3277", + chieuchaparenkorean: "3217", + chieuchcirclekorean: "3269", + chieuchkorean: "314A", + chieuchparenkorean: "3209", + chochangthai: "0E0A", + chochanthai: "0E08", + chochingthai: "0E09", + chochoethai: "0E0C", + chook: "0188", + cieucacirclekorean: "3276", + cieucaparenkorean: "3216", + cieuccirclekorean: "3268", + cieuckorean: "3148", + cieucparenkorean: "3208", + cieucuparenkorean: "321C", + circle: "25CB", + circlemultiply: "2297", + circleot: "2299", + circleplus: "2295", + circlepostalmark: "3036", + circlewithlefthalfblack: "25D0", + circlewithrighthalfblack: "25D1", + circumflex: "02C6", + circumflexbelowcmb: "032D", + circumflexcmb: "0302", + clear: "2327", + clickalveolar: "01C2", + clickdental: "01C0", + clicklateral: "01C1", + clickretroflex: "01C3", + club: "2663", + clubsuitblack: "2663", + clubsuitwhite: "2667", + cmcubedsquare: "33A4", + cmonospace: "FF43", + cmsquaredsquare: "33A0", + coarmenian: "0581", + colon: "003A", + colonmonetary: "20A1", + colonmonospace: "FF1A", + colonsign: "20A1", + colonsmall: "FE55", + colontriangularhalfmod: "02D1", + colontriangularmod: "02D0", + comma: "002C", + commaabovecmb: "0313", + commaaboverightcmb: "0315", + commaaccent: "F6C3", + commaarabic: "060C", + commaarmenian: "055D", + commainferior: "F6E1", + commamonospace: "FF0C", + commareversedabovecmb: "0314", + commareversedmod: "02BD", + commasmall: "FE50", + commasuperior: "F6E2", + commaturnedabovecmb: "0312", + commaturnedmod: "02BB", + compass: "263C", + congruent: "2245", + contourintegral: "222E", + control: "2303", + controlACK: "0006", + controlBEL: "0007", + controlBS: "0008", + controlCAN: "0018", + controlCR: "000D", + controlDC1: "0011", + controlDC2: "0012", + controlDC3: "0013", + controlDC4: "0014", + controlDEL: "007F", + controlDLE: "0010", + controlEM: "0019", + controlENQ: "0005", + controlEOT: "0004", + controlESC: "001B", + controlETB: "0017", + controlETX: "0003", + controlFF: "000C", + controlFS: "001C", + controlGS: "001D", + controlHT: "0009", + controlLF: "000A", + controlNAK: "0015", + controlRS: "001E", + controlSI: "000F", + controlSO: "000E", + controlSOT: "0002", + controlSTX: "0001", + controlSUB: "001A", + controlSYN: "0016", + controlUS: "001F", + controlVT: "000B", + copyright: "00A9", + copyrightsans: "F8E9", + copyrightserif: "F6D9", + cornerbracketleft: "300C", + cornerbracketlefthalfwidth: "FF62", + cornerbracketleftvertical: "FE41", + cornerbracketright: "300D", + cornerbracketrighthalfwidth: "FF63", + cornerbracketrightvertical: "FE42", + corporationsquare: "337F", + cosquare: "33C7", + coverkgsquare: "33C6", + cparen: "249E", + cruzeiro: "20A2", + cstretched: "0297", + curlyand: "22CF", + curlyor: "22CE", + currency: "00A4", + cyrBreve: "F6D1", + cyrFlex: "F6D2", + cyrbreve: "F6D4", + cyrflex: "F6D5", + d: "0064", + daarmenian: "0564", + dabengali: "09A6", + dadarabic: "0636", + dadeva: "0926", + dadfinalarabic: "FEBE", + dadinitialarabic: "FEBF", + dadmedialarabic: "FEC0", + dagesh: "05BC", + dageshhebrew: "05BC", + dagger: "2020", + daggerdbl: "2021", + dagujarati: "0AA6", + dagurmukhi: "0A26", + dahiragana: "3060", + dakatakana: "30C0", + dalarabic: "062F", + dalet: "05D3", + daletdagesh: "FB33", + daletdageshhebrew: "FB33", + dalethatafpatah: "05D3 05B2", + dalethatafpatahhebrew: "05D3 05B2", + dalethatafsegol: "05D3 05B1", + dalethatafsegolhebrew: "05D3 05B1", + dalethebrew: "05D3", + dalethiriq: "05D3 05B4", + dalethiriqhebrew: "05D3 05B4", + daletholam: "05D3 05B9", + daletholamhebrew: "05D3 05B9", + daletpatah: "05D3 05B7", + daletpatahhebrew: "05D3 05B7", + daletqamats: "05D3 05B8", + daletqamatshebrew: "05D3 05B8", + daletqubuts: "05D3 05BB", + daletqubutshebrew: "05D3 05BB", + daletsegol: "05D3 05B6", + daletsegolhebrew: "05D3 05B6", + daletsheva: "05D3 05B0", + daletshevahebrew: "05D3 05B0", + dalettsere: "05D3 05B5", + dalettserehebrew: "05D3 05B5", + dalfinalarabic: "FEAA", + dammaarabic: "064F", + dammalowarabic: "064F", + dammatanaltonearabic: "064C", + dammatanarabic: "064C", + danda: "0964", + dargahebrew: "05A7", + dargalefthebrew: "05A7", + dasiapneumatacyrilliccmb: "0485", + dblGrave: "F6D3", + dblanglebracketleft: "300A", + dblanglebracketleftvertical: "FE3D", + dblanglebracketright: "300B", + dblanglebracketrightvertical: "FE3E", + dblarchinvertedbelowcmb: "032B", + dblarrowleft: "21D4", + dblarrowright: "21D2", + dbldanda: "0965", + dblgrave: "F6D6", + dblgravecmb: "030F", + dblintegral: "222C", + dbllowline: "2017", + dbllowlinecmb: "0333", + dbloverlinecmb: "033F", + dblprimemod: "02BA", + dblverticalbar: "2016", + dblverticallineabovecmb: "030E", + dbopomofo: "3109", + dbsquare: "33C8", + dcaron: "010F", + dcedilla: "1E11", + dcircle: "24D3", + dcircumflexbelow: "1E13", + dcroat: "0111", + ddabengali: "09A1", + ddadeva: "0921", + ddagujarati: "0AA1", + ddagurmukhi: "0A21", + ddalarabic: "0688", + ddalfinalarabic: "FB89", + dddhadeva: "095C", + ddhabengali: "09A2", + ddhadeva: "0922", + ddhagujarati: "0AA2", + ddhagurmukhi: "0A22", + ddotaccent: "1E0B", + ddotbelow: "1E0D", + decimalseparatorarabic: "066B", + decimalseparatorpersian: "066B", + decyrillic: "0434", + degree: "00B0", + dehihebrew: "05AD", + dehiragana: "3067", + deicoptic: "03EF", + dekatakana: "30C7", + deleteleft: "232B", + deleteright: "2326", + delta: "03B4", + deltaturned: "018D", + denominatorminusonenumeratorbengali: "09F8", + dezh: "02A4", + dhabengali: "09A7", + dhadeva: "0927", + dhagujarati: "0AA7", + dhagurmukhi: "0A27", + dhook: "0257", + dialytikatonos: "0385", + dialytikatonoscmb: "0344", + diamond: "2666", + diamondsuitwhite: "2662", + dieresis: "00A8", + dieresisacute: "F6D7", + dieresisbelowcmb: "0324", + dieresiscmb: "0308", + dieresisgrave: "F6D8", + dieresistonos: "0385", + dihiragana: "3062", + dikatakana: "30C2", + dittomark: "3003", + divide: "00F7", + divides: "2223", + divisionslash: "2215", + djecyrillic: "0452", + dkshade: "2593", + dlinebelow: "1E0F", + dlsquare: "3397", + dmacron: "0111", + dmonospace: "FF44", + dnblock: "2584", + dochadathai: "0E0E", + dodekthai: "0E14", + dohiragana: "3069", + dokatakana: "30C9", + dollar: "0024", + dollarinferior: "F6E3", + dollarmonospace: "FF04", + dollaroldstyle: "F724", + dollarsmall: "FE69", + dollarsuperior: "F6E4", + dong: "20AB", + dorusquare: "3326", + dotaccent: "02D9", + dotaccentcmb: "0307", + dotbelowcmb: "0323", + dotbelowcomb: "0323", + dotkatakana: "30FB", + dotlessi: "0131", + dotlessj: "F6BE", + dotlessjstrokehook: "0284", + dotmath: "22C5", + dottedcircle: "25CC", + doubleyodpatah: "FB1F", + doubleyodpatahhebrew: "FB1F", + downtackbelowcmb: "031E", + downtackmod: "02D5", + dparen: "249F", + dsuperior: "F6EB", + dtail: "0256", + dtopbar: "018C", + duhiragana: "3065", + dukatakana: "30C5", + dz: "01F3", + dzaltone: "02A3", + dzcaron: "01C6", + dzcurl: "02A5", + dzeabkhasiancyrillic: "04E1", + dzecyrillic: "0455", + dzhecyrillic: "045F", + e: "0065", + eacute: "00E9", + earth: "2641", + ebengali: "098F", + ebopomofo: "311C", + ebreve: "0115", + ecandradeva: "090D", + ecandragujarati: "0A8D", + ecandravowelsigndeva: "0945", + ecandravowelsigngujarati: "0AC5", + ecaron: "011B", + ecedillabreve: "1E1D", + echarmenian: "0565", + echyiwnarmenian: "0587", + ecircle: "24D4", + ecircumflex: "00EA", + ecircumflexacute: "1EBF", + ecircumflexbelow: "1E19", + ecircumflexdotbelow: "1EC7", + ecircumflexgrave: "1EC1", + ecircumflexhookabove: "1EC3", + ecircumflextilde: "1EC5", + ecyrillic: "0454", + edblgrave: "0205", + edeva: "090F", + edieresis: "00EB", + edot: "0117", + edotaccent: "0117", + edotbelow: "1EB9", + eegurmukhi: "0A0F", + eematragurmukhi: "0A47", + efcyrillic: "0444", + egrave: "00E8", + egujarati: "0A8F", + eharmenian: "0567", + ehbopomofo: "311D", + ehiragana: "3048", + ehookabove: "1EBB", + eibopomofo: "311F", + eight: "0038", + eightarabic: "0668", + eightbengali: "09EE", + eightcircle: "2467", + eightcircleinversesansserif: "2791", + eightdeva: "096E", + eighteencircle: "2471", + eighteenparen: "2485", + eighteenperiod: "2499", + eightgujarati: "0AEE", + eightgurmukhi: "0A6E", + eighthackarabic: "0668", + eighthangzhou: "3028", + eighthnotebeamed: "266B", + eightideographicparen: "3227", + eightinferior: "2088", + eightmonospace: "FF18", + eightoldstyle: "F738", + eightparen: "247B", + eightperiod: "248F", + eightpersian: "06F8", + eightroman: "2177", + eightsuperior: "2078", + eightthai: "0E58", + einvertedbreve: "0207", + eiotifiedcyrillic: "0465", + ekatakana: "30A8", + ekatakanahalfwidth: "FF74", + ekonkargurmukhi: "0A74", + ekorean: "3154", + elcyrillic: "043B", + element: "2208", + elevencircle: "246A", + elevenparen: "247E", + elevenperiod: "2492", + elevenroman: "217A", + ellipsis: "2026", + ellipsisvertical: "22EE", + emacron: "0113", + emacronacute: "1E17", + emacrongrave: "1E15", + emcyrillic: "043C", + emdash: "2014", + emdashvertical: "FE31", + emonospace: "FF45", + emphasismarkarmenian: "055B", + emptyset: "2205", + enbopomofo: "3123", + encyrillic: "043D", + endash: "2013", + endashvertical: "FE32", + endescendercyrillic: "04A3", + eng: "014B", + engbopomofo: "3125", + enghecyrillic: "04A5", + enhookcyrillic: "04C8", + enspace: "2002", + eogonek: "0119", + eokorean: "3153", + eopen: "025B", + eopenclosed: "029A", + eopenreversed: "025C", + eopenreversedclosed: "025E", + eopenreversedhook: "025D", + eparen: "24A0", + epsilon: "03B5", + epsilontonos: "03AD", + equal: "003D", + equalmonospace: "FF1D", + equalsmall: "FE66", + equalsuperior: "207C", + equivalence: "2261", + erbopomofo: "3126", + ercyrillic: "0440", + ereversed: "0258", + ereversedcyrillic: "044D", + escyrillic: "0441", + esdescendercyrillic: "04AB", + esh: "0283", + eshcurl: "0286", + eshortdeva: "090E", + eshortvowelsigndeva: "0946", + eshreversedloop: "01AA", + eshsquatreversed: "0285", + esmallhiragana: "3047", + esmallkatakana: "30A7", + esmallkatakanahalfwidth: "FF6A", + estimated: "212E", + esuperior: "F6EC", + eta: "03B7", + etarmenian: "0568", + etatonos: "03AE", + eth: "00F0", + etilde: "1EBD", + etildebelow: "1E1B", + etnahtafoukhhebrew: "0591", + etnahtafoukhlefthebrew: "0591", + etnahtahebrew: "0591", + etnahtalefthebrew: "0591", + eturned: "01DD", + eukorean: "3161", + euro: "20AC", + evowelsignbengali: "09C7", + evowelsigndeva: "0947", + evowelsigngujarati: "0AC7", + exclam: "0021", + exclamarmenian: "055C", + exclamdbl: "203C", + exclamdown: "00A1", + exclamdownsmall: "F7A1", + exclammonospace: "FF01", + exclamsmall: "F721", + existential: "2203", + ezh: "0292", + ezhcaron: "01EF", + ezhcurl: "0293", + ezhreversed: "01B9", + ezhtail: "01BA", + f: "0066", + fadeva: "095E", + fagurmukhi: "0A5E", + fahrenheit: "2109", + fathaarabic: "064E", + fathalowarabic: "064E", + fathatanarabic: "064B", + fbopomofo: "3108", + fcircle: "24D5", + fdotaccent: "1E1F", + feharabic: "0641", + feharmenian: "0586", + fehfinalarabic: "FED2", + fehinitialarabic: "FED3", + fehmedialarabic: "FED4", + feicoptic: "03E5", + female: "2640", + ff: "FB00", + ffi: "FB03", + ffl: "FB04", + fi: "FB01", + fifteencircle: "246E", + fifteenparen: "2482", + fifteenperiod: "2496", + figuredash: "2012", + filledbox: "25A0", + filledrect: "25AC", + finalkaf: "05DA", + finalkafdagesh: "FB3A", + finalkafdageshhebrew: "FB3A", + finalkafhebrew: "05DA", + finalkafqamats: "05DA 05B8", + finalkafqamatshebrew: "05DA 05B8", + finalkafsheva: "05DA 05B0", + finalkafshevahebrew: "05DA 05B0", + finalmem: "05DD", + finalmemhebrew: "05DD", + finalnun: "05DF", + finalnunhebrew: "05DF", + finalpe: "05E3", + finalpehebrew: "05E3", + finaltsadi: "05E5", + finaltsadihebrew: "05E5", + firsttonechinese: "02C9", + fisheye: "25C9", + fitacyrillic: "0473", + five: "0035", + fivearabic: "0665", + fivebengali: "09EB", + fivecircle: "2464", + fivecircleinversesansserif: "278E", + fivedeva: "096B", + fiveeighths: "215D", + fivegujarati: "0AEB", + fivegurmukhi: "0A6B", + fivehackarabic: "0665", + fivehangzhou: "3025", + fiveideographicparen: "3224", + fiveinferior: "2085", + fivemonospace: "FF15", + fiveoldstyle: "F735", + fiveparen: "2478", + fiveperiod: "248C", + fivepersian: "06F5", + fiveroman: "2174", + fivesuperior: "2075", + fivethai: "0E55", + fl: "FB02", + florin: "0192", + fmonospace: "FF46", + fmsquare: "3399", + fofanthai: "0E1F", + fofathai: "0E1D", + fongmanthai: "0E4F", + forall: "2200", + four: "0034", + fourarabic: "0664", + fourbengali: "09EA", + fourcircle: "2463", + fourcircleinversesansserif: "278D", + fourdeva: "096A", + fourgujarati: "0AEA", + fourgurmukhi: "0A6A", + fourhackarabic: "0664", + fourhangzhou: "3024", + fourideographicparen: "3223", + fourinferior: "2084", + fourmonospace: "FF14", + fournumeratorbengali: "09F7", + fouroldstyle: "F734", + fourparen: "2477", + fourperiod: "248B", + fourpersian: "06F4", + fourroman: "2173", + foursuperior: "2074", + fourteencircle: "246D", + fourteenparen: "2481", + fourteenperiod: "2495", + fourthai: "0E54", + fourthtonechinese: "02CB", + fparen: "24A1", + fraction: "2044", + franc: "20A3", + g: "0067", + gabengali: "0997", + gacute: "01F5", + gadeva: "0917", + gafarabic: "06AF", + gaffinalarabic: "FB93", + gafinitialarabic: "FB94", + gafmedialarabic: "FB95", + gagujarati: "0A97", + gagurmukhi: "0A17", + gahiragana: "304C", + gakatakana: "30AC", + gamma: "03B3", + gammalatinsmall: "0263", + gammasuperior: "02E0", + gangiacoptic: "03EB", + gbopomofo: "310D", + gbreve: "011F", + gcaron: "01E7", + gcedilla: "0123", + gcircle: "24D6", + gcircumflex: "011D", + gcommaaccent: "0123", + gdot: "0121", + gdotaccent: "0121", + gecyrillic: "0433", + gehiragana: "3052", + gekatakana: "30B2", + geometricallyequal: "2251", + gereshaccenthebrew: "059C", + gereshhebrew: "05F3", + gereshmuqdamhebrew: "059D", + germandbls: "00DF", + gershayimaccenthebrew: "059E", + gershayimhebrew: "05F4", + getamark: "3013", + ghabengali: "0998", + ghadarmenian: "0572", + ghadeva: "0918", + ghagujarati: "0A98", + ghagurmukhi: "0A18", + ghainarabic: "063A", + ghainfinalarabic: "FECE", + ghaininitialarabic: "FECF", + ghainmedialarabic: "FED0", + ghemiddlehookcyrillic: "0495", + ghestrokecyrillic: "0493", + gheupturncyrillic: "0491", + ghhadeva: "095A", + ghhagurmukhi: "0A5A", + ghook: "0260", + ghzsquare: "3393", + gihiragana: "304E", + gikatakana: "30AE", + gimarmenian: "0563", + gimel: "05D2", + gimeldagesh: "FB32", + gimeldageshhebrew: "FB32", + gimelhebrew: "05D2", + gjecyrillic: "0453", + glottalinvertedstroke: "01BE", + glottalstop: "0294", + glottalstopinverted: "0296", + glottalstopmod: "02C0", + glottalstopreversed: "0295", + glottalstopreversedmod: "02C1", + glottalstopreversedsuperior: "02E4", + glottalstopstroke: "02A1", + glottalstopstrokereversed: "02A2", + gmacron: "1E21", + gmonospace: "FF47", + gohiragana: "3054", + gokatakana: "30B4", + gparen: "24A2", + gpasquare: "33AC", + gradient: "2207", + grave: "0060", + gravebelowcmb: "0316", + gravecmb: "0300", + gravecomb: "0300", + gravedeva: "0953", + gravelowmod: "02CE", + gravemonospace: "FF40", + gravetonecmb: "0340", + greater: "003E", + greaterequal: "2265", + greaterequalorless: "22DB", + greatermonospace: "FF1E", + greaterorequivalent: "2273", + greaterorless: "2277", + greateroverequal: "2267", + greatersmall: "FE65", + gscript: "0261", + gstroke: "01E5", + guhiragana: "3050", + guillemotleft: "00AB", + guillemotright: "00BB", + guilsinglleft: "2039", + guilsinglright: "203A", + gukatakana: "30B0", + guramusquare: "3318", + gysquare: "33C9", + h: "0068", + haabkhasiancyrillic: "04A9", + haaltonearabic: "06C1", + habengali: "09B9", + hadescendercyrillic: "04B3", + hadeva: "0939", + hagujarati: "0AB9", + hagurmukhi: "0A39", + haharabic: "062D", + hahfinalarabic: "FEA2", + hahinitialarabic: "FEA3", + hahiragana: "306F", + hahmedialarabic: "FEA4", + haitusquare: "332A", + hakatakana: "30CF", + hakatakanahalfwidth: "FF8A", + halantgurmukhi: "0A4D", + hamzaarabic: "0621", + hamzadammaarabic: "0621 064F", + hamzadammatanarabic: "0621 064C", + hamzafathaarabic: "0621 064E", + hamzafathatanarabic: "0621 064B", + hamzalowarabic: "0621", + hamzalowkasraarabic: "0621 0650", + hamzalowkasratanarabic: "0621 064D", + hamzasukunarabic: "0621 0652", + hangulfiller: "3164", + hardsigncyrillic: "044A", + harpoonleftbarbup: "21BC", + harpoonrightbarbup: "21C0", + hasquare: "33CA", + hatafpatah: "05B2", + hatafpatah16: "05B2", + hatafpatah23: "05B2", + hatafpatah2f: "05B2", + hatafpatahhebrew: "05B2", + hatafpatahnarrowhebrew: "05B2", + hatafpatahquarterhebrew: "05B2", + hatafpatahwidehebrew: "05B2", + hatafqamats: "05B3", + hatafqamats1b: "05B3", + hatafqamats28: "05B3", + hatafqamats34: "05B3", + hatafqamatshebrew: "05B3", + hatafqamatsnarrowhebrew: "05B3", + hatafqamatsquarterhebrew: "05B3", + hatafqamatswidehebrew: "05B3", + hatafsegol: "05B1", + hatafsegol17: "05B1", + hatafsegol24: "05B1", + hatafsegol30: "05B1", + hatafsegolhebrew: "05B1", + hatafsegolnarrowhebrew: "05B1", + hatafsegolquarterhebrew: "05B1", + hatafsegolwidehebrew: "05B1", + hbar: "0127", + hbopomofo: "310F", + hbrevebelow: "1E2B", + hcedilla: "1E29", + hcircle: "24D7", + hcircumflex: "0125", + hdieresis: "1E27", + hdotaccent: "1E23", + hdotbelow: "1E25", + he: "05D4", + heart: "2665", + heartsuitblack: "2665", + heartsuitwhite: "2661", + hedagesh: "FB34", + hedageshhebrew: "FB34", + hehaltonearabic: "06C1", + heharabic: "0647", + hehebrew: "05D4", + hehfinalaltonearabic: "FBA7", + hehfinalalttwoarabic: "FEEA", + hehfinalarabic: "FEEA", + hehhamzaabovefinalarabic: "FBA5", + hehhamzaaboveisolatedarabic: "FBA4", + hehinitialaltonearabic: "FBA8", + hehinitialarabic: "FEEB", + hehiragana: "3078", + hehmedialaltonearabic: "FBA9", + hehmedialarabic: "FEEC", + heiseierasquare: "337B", + hekatakana: "30D8", + hekatakanahalfwidth: "FF8D", + hekutaarusquare: "3336", + henghook: "0267", + herutusquare: "3339", + het: "05D7", + hethebrew: "05D7", + hhook: "0266", + hhooksuperior: "02B1", + hieuhacirclekorean: "327B", + hieuhaparenkorean: "321B", + hieuhcirclekorean: "326D", + hieuhkorean: "314E", + hieuhparenkorean: "320D", + hihiragana: "3072", + hikatakana: "30D2", + hikatakanahalfwidth: "FF8B", + hiriq: "05B4", + hiriq14: "05B4", + hiriq21: "05B4", + hiriq2d: "05B4", + hiriqhebrew: "05B4", + hiriqnarrowhebrew: "05B4", + hiriqquarterhebrew: "05B4", + hiriqwidehebrew: "05B4", + hlinebelow: "1E96", + hmonospace: "FF48", + hoarmenian: "0570", + hohipthai: "0E2B", + hohiragana: "307B", + hokatakana: "30DB", + hokatakanahalfwidth: "FF8E", + holam: "05B9", + holam19: "05B9", + holam26: "05B9", + holam32: "05B9", + holamhebrew: "05B9", + holamnarrowhebrew: "05B9", + holamquarterhebrew: "05B9", + holamwidehebrew: "05B9", + honokhukthai: "0E2E", + hookabovecomb: "0309", + hookcmb: "0309", + hookpalatalizedbelowcmb: "0321", + hookretroflexbelowcmb: "0322", + hoonsquare: "3342", + horicoptic: "03E9", + horizontalbar: "2015", + horncmb: "031B", + hotsprings: "2668", + house: "2302", + hparen: "24A3", + hsuperior: "02B0", + hturned: "0265", + huhiragana: "3075", + huiitosquare: "3333", + hukatakana: "30D5", + hukatakanahalfwidth: "FF8C", + hungarumlaut: "02DD", + hungarumlautcmb: "030B", + hv: "0195", + hyphen: "002D", + hypheninferior: "F6E5", + hyphenmonospace: "FF0D", + hyphensmall: "FE63", + hyphensuperior: "F6E6", + hyphentwo: "2010", + i: "0069", + iacute: "00ED", + iacyrillic: "044F", + ibengali: "0987", + ibopomofo: "3127", + ibreve: "012D", + icaron: "01D0", + icircle: "24D8", + icircumflex: "00EE", + icyrillic: "0456", + idblgrave: "0209", + ideographearthcircle: "328F", + ideographfirecircle: "328B", + ideographicallianceparen: "323F", + ideographiccallparen: "323A", + ideographiccentrecircle: "32A5", + ideographicclose: "3006", + ideographiccomma: "3001", + ideographiccommaleft: "FF64", + ideographiccongratulationparen: "3237", + ideographiccorrectcircle: "32A3", + ideographicearthparen: "322F", + ideographicenterpriseparen: "323D", + ideographicexcellentcircle: "329D", + ideographicfestivalparen: "3240", + ideographicfinancialcircle: "3296", + ideographicfinancialparen: "3236", + ideographicfireparen: "322B", + ideographichaveparen: "3232", + ideographichighcircle: "32A4", + ideographiciterationmark: "3005", + ideographiclaborcircle: "3298", + ideographiclaborparen: "3238", + ideographicleftcircle: "32A7", + ideographiclowcircle: "32A6", + ideographicmedicinecircle: "32A9", + ideographicmetalparen: "322E", + ideographicmoonparen: "322A", + ideographicnameparen: "3234", + ideographicperiod: "3002", + ideographicprintcircle: "329E", + ideographicreachparen: "3243", + ideographicrepresentparen: "3239", + ideographicresourceparen: "323E", + ideographicrightcircle: "32A8", + ideographicsecretcircle: "3299", + ideographicselfparen: "3242", + ideographicsocietyparen: "3233", + ideographicspace: "3000", + ideographicspecialparen: "3235", + ideographicstockparen: "3231", + ideographicstudyparen: "323B", + ideographicsunparen: "3230", + ideographicsuperviseparen: "323C", + ideographicwaterparen: "322C", + ideographicwoodparen: "322D", + ideographiczero: "3007", + ideographmetalcircle: "328E", + ideographmooncircle: "328A", + ideographnamecircle: "3294", + ideographsuncircle: "3290", + ideographwatercircle: "328C", + ideographwoodcircle: "328D", + ideva: "0907", + idieresis: "00EF", + idieresisacute: "1E2F", + idieresiscyrillic: "04E5", + idotbelow: "1ECB", + iebrevecyrillic: "04D7", + iecyrillic: "0435", + ieungacirclekorean: "3275", + ieungaparenkorean: "3215", + ieungcirclekorean: "3267", + ieungkorean: "3147", + ieungparenkorean: "3207", + igrave: "00EC", + igujarati: "0A87", + igurmukhi: "0A07", + ihiragana: "3044", + ihookabove: "1EC9", + iibengali: "0988", + iicyrillic: "0438", + iideva: "0908", + iigujarati: "0A88", + iigurmukhi: "0A08", + iimatragurmukhi: "0A40", + iinvertedbreve: "020B", + iishortcyrillic: "0439", + iivowelsignbengali: "09C0", + iivowelsigndeva: "0940", + iivowelsigngujarati: "0AC0", + ij: "0133", + ikatakana: "30A4", + ikatakanahalfwidth: "FF72", + ikorean: "3163", + ilde: "02DC", + iluyhebrew: "05AC", + imacron: "012B", + imacroncyrillic: "04E3", + imageorapproximatelyequal: "2253", + imatragurmukhi: "0A3F", + imonospace: "FF49", + increment: "2206", + infinity: "221E", + iniarmenian: "056B", + integral: "222B", + integralbottom: "2321", + integralbt: "2321", + integralex: "F8F5", + integraltop: "2320", + integraltp: "2320", + intersection: "2229", + intisquare: "3305", + invbullet: "25D8", + invcircle: "25D9", + invsmileface: "263B", + iocyrillic: "0451", + iogonek: "012F", + iota: "03B9", + iotadieresis: "03CA", + iotadieresistonos: "0390", + iotalatin: "0269", + iotatonos: "03AF", + iparen: "24A4", + irigurmukhi: "0A72", + ismallhiragana: "3043", + ismallkatakana: "30A3", + ismallkatakanahalfwidth: "FF68", + issharbengali: "09FA", + istroke: "0268", + isuperior: "F6ED", + iterationhiragana: "309D", + iterationkatakana: "30FD", + itilde: "0129", + itildebelow: "1E2D", + iubopomofo: "3129", + iucyrillic: "044E", + ivowelsignbengali: "09BF", + ivowelsigndeva: "093F", + ivowelsigngujarati: "0ABF", + izhitsacyrillic: "0475", + izhitsadblgravecyrillic: "0477", + j: "006A", + jaarmenian: "0571", + jabengali: "099C", + jadeva: "091C", + jagujarati: "0A9C", + jagurmukhi: "0A1C", + jbopomofo: "3110", + jcaron: "01F0", + jcircle: "24D9", + jcircumflex: "0135", + jcrossedtail: "029D", + jdotlessstroke: "025F", + jecyrillic: "0458", + jeemarabic: "062C", + jeemfinalarabic: "FE9E", + jeeminitialarabic: "FE9F", + jeemmedialarabic: "FEA0", + jeharabic: "0698", + jehfinalarabic: "FB8B", + jhabengali: "099D", + jhadeva: "091D", + jhagujarati: "0A9D", + jhagurmukhi: "0A1D", + jheharmenian: "057B", + jis: "3004", + jmonospace: "FF4A", + jparen: "24A5", + jsuperior: "02B2", + k: "006B", + kabashkircyrillic: "04A1", + kabengali: "0995", + kacute: "1E31", + kacyrillic: "043A", + kadescendercyrillic: "049B", + kadeva: "0915", + kaf: "05DB", + kafarabic: "0643", + kafdagesh: "FB3B", + kafdageshhebrew: "FB3B", + kaffinalarabic: "FEDA", + kafhebrew: "05DB", + kafinitialarabic: "FEDB", + kafmedialarabic: "FEDC", + kafrafehebrew: "FB4D", + kagujarati: "0A95", + kagurmukhi: "0A15", + kahiragana: "304B", + kahookcyrillic: "04C4", + kakatakana: "30AB", + kakatakanahalfwidth: "FF76", + kappa: "03BA", + kappasymbolgreek: "03F0", + kapyeounmieumkorean: "3171", + kapyeounphieuphkorean: "3184", + kapyeounpieupkorean: "3178", + kapyeounssangpieupkorean: "3179", + karoriisquare: "330D", + kashidaautoarabic: "0640", + kashidaautonosidebearingarabic: "0640", + kasmallkatakana: "30F5", + kasquare: "3384", + kasraarabic: "0650", + kasratanarabic: "064D", + kastrokecyrillic: "049F", + katahiraprolongmarkhalfwidth: "FF70", + kaverticalstrokecyrillic: "049D", + kbopomofo: "310E", + kcalsquare: "3389", + kcaron: "01E9", + kcedilla: "0137", + kcircle: "24DA", + kcommaaccent: "0137", + kdotbelow: "1E33", + keharmenian: "0584", + kehiragana: "3051", + kekatakana: "30B1", + kekatakanahalfwidth: "FF79", + kenarmenian: "056F", + kesmallkatakana: "30F6", + kgreenlandic: "0138", + khabengali: "0996", + khacyrillic: "0445", + khadeva: "0916", + khagujarati: "0A96", + khagurmukhi: "0A16", + khaharabic: "062E", + khahfinalarabic: "FEA6", + khahinitialarabic: "FEA7", + khahmedialarabic: "FEA8", + kheicoptic: "03E7", + khhadeva: "0959", + khhagurmukhi: "0A59", + khieukhacirclekorean: "3278", + khieukhaparenkorean: "3218", + khieukhcirclekorean: "326A", + khieukhkorean: "314B", + khieukhparenkorean: "320A", + khokhaithai: "0E02", + khokhonthai: "0E05", + khokhuatthai: "0E03", + khokhwaithai: "0E04", + khomutthai: "0E5B", + khook: "0199", + khorakhangthai: "0E06", + khzsquare: "3391", + kihiragana: "304D", + kikatakana: "30AD", + kikatakanahalfwidth: "FF77", + kiroguramusquare: "3315", + kiromeetorusquare: "3316", + kirosquare: "3314", + kiyeokacirclekorean: "326E", + kiyeokaparenkorean: "320E", + kiyeokcirclekorean: "3260", + kiyeokkorean: "3131", + kiyeokparenkorean: "3200", + kiyeoksioskorean: "3133", + kjecyrillic: "045C", + klinebelow: "1E35", + klsquare: "3398", + kmcubedsquare: "33A6", + kmonospace: "FF4B", + kmsquaredsquare: "33A2", + kohiragana: "3053", + kohmsquare: "33C0", + kokaithai: "0E01", + kokatakana: "30B3", + kokatakanahalfwidth: "FF7A", + kooposquare: "331E", + koppacyrillic: "0481", + koreanstandardsymbol: "327F", + koroniscmb: "0343", + kparen: "24A6", + kpasquare: "33AA", + ksicyrillic: "046F", + ktsquare: "33CF", + kturned: "029E", + kuhiragana: "304F", + kukatakana: "30AF", + kukatakanahalfwidth: "FF78", + kvsquare: "33B8", + kwsquare: "33BE", + l: "006C", + labengali: "09B2", + lacute: "013A", + ladeva: "0932", + lagujarati: "0AB2", + lagurmukhi: "0A32", + lakkhangyaothai: "0E45", + lamaleffinalarabic: "FEFC", + lamalefhamzaabovefinalarabic: "FEF8", + lamalefhamzaaboveisolatedarabic: "FEF7", + lamalefhamzabelowfinalarabic: "FEFA", + lamalefhamzabelowisolatedarabic: "FEF9", + lamalefisolatedarabic: "FEFB", + lamalefmaddaabovefinalarabic: "FEF6", + lamalefmaddaaboveisolatedarabic: "FEF5", + lamarabic: "0644", + lambda: "03BB", + lambdastroke: "019B", + lamed: "05DC", + lameddagesh: "FB3C", + lameddageshhebrew: "FB3C", + lamedhebrew: "05DC", + lamedholam: "05DC 05B9", + lamedholamdagesh: "05DC 05B9 05BC", + lamedholamdageshhebrew: "05DC 05B9 05BC", + lamedholamhebrew: "05DC 05B9", + lamfinalarabic: "FEDE", + lamhahinitialarabic: "FCCA", + laminitialarabic: "FEDF", + lamjeeminitialarabic: "FCC9", + lamkhahinitialarabic: "FCCB", + lamlamhehisolatedarabic: "FDF2", + lammedialarabic: "FEE0", + lammeemhahinitialarabic: "FD88", + lammeeminitialarabic: "FCCC", + lammeemjeeminitialarabic: "FEDF FEE4 FEA0", + lammeemkhahinitialarabic: "FEDF FEE4 FEA8", + largecircle: "25EF", + lbar: "019A", + lbelt: "026C", + lbopomofo: "310C", + lcaron: "013E", + lcedilla: "013C", + lcircle: "24DB", + lcircumflexbelow: "1E3D", + lcommaaccent: "013C", + ldot: "0140", + ldotaccent: "0140", + ldotbelow: "1E37", + ldotbelowmacron: "1E39", + leftangleabovecmb: "031A", + lefttackbelowcmb: "0318", + less: "003C", + lessequal: "2264", + lessequalorgreater: "22DA", + lessmonospace: "FF1C", + lessorequivalent: "2272", + lessorgreater: "2276", + lessoverequal: "2266", + lesssmall: "FE64", + lezh: "026E", + lfblock: "258C", + lhookretroflex: "026D", + lira: "20A4", + liwnarmenian: "056C", + lj: "01C9", + ljecyrillic: "0459", + ll: "F6C0", + lladeva: "0933", + llagujarati: "0AB3", + llinebelow: "1E3B", + llladeva: "0934", + llvocalicbengali: "09E1", + llvocalicdeva: "0961", + llvocalicvowelsignbengali: "09E3", + llvocalicvowelsigndeva: "0963", + lmiddletilde: "026B", + lmonospace: "FF4C", + lmsquare: "33D0", + lochulathai: "0E2C", + logicaland: "2227", + logicalnot: "00AC", + logicalnotreversed: "2310", + logicalor: "2228", + lolingthai: "0E25", + longs: "017F", + lowlinecenterline: "FE4E", + lowlinecmb: "0332", + lowlinedashed: "FE4D", + lozenge: "25CA", + lparen: "24A7", + lslash: "0142", + lsquare: "2113", + lsuperior: "F6EE", + ltshade: "2591", + luthai: "0E26", + lvocalicbengali: "098C", + lvocalicdeva: "090C", + lvocalicvowelsignbengali: "09E2", + lvocalicvowelsigndeva: "0962", + lxsquare: "33D3", + m: "006D", + mabengali: "09AE", + macron: "00AF", + macronbelowcmb: "0331", + macroncmb: "0304", + macronlowmod: "02CD", + macronmonospace: "FFE3", + macute: "1E3F", + madeva: "092E", + magujarati: "0AAE", + magurmukhi: "0A2E", + mahapakhhebrew: "05A4", + mahapakhlefthebrew: "05A4", + mahiragana: "307E", + maichattawalowleftthai: "F895", + maichattawalowrightthai: "F894", + maichattawathai: "0E4B", + maichattawaupperleftthai: "F893", + maieklowleftthai: "F88C", + maieklowrightthai: "F88B", + maiekthai: "0E48", + maiekupperleftthai: "F88A", + maihanakatleftthai: "F884", + maihanakatthai: "0E31", + maitaikhuleftthai: "F889", + maitaikhuthai: "0E47", + maitholowleftthai: "F88F", + maitholowrightthai: "F88E", + maithothai: "0E49", + maithoupperleftthai: "F88D", + maitrilowleftthai: "F892", + maitrilowrightthai: "F891", + maitrithai: "0E4A", + maitriupperleftthai: "F890", + maiyamokthai: "0E46", + makatakana: "30DE", + makatakanahalfwidth: "FF8F", + male: "2642", + mansyonsquare: "3347", + maqafhebrew: "05BE", + mars: "2642", + masoracirclehebrew: "05AF", + masquare: "3383", + mbopomofo: "3107", + mbsquare: "33D4", + mcircle: "24DC", + mcubedsquare: "33A5", + mdotaccent: "1E41", + mdotbelow: "1E43", + meemarabic: "0645", + meemfinalarabic: "FEE2", + meeminitialarabic: "FEE3", + meemmedialarabic: "FEE4", + meemmeeminitialarabic: "FCD1", + meemmeemisolatedarabic: "FC48", + meetorusquare: "334D", + mehiragana: "3081", + meizierasquare: "337E", + mekatakana: "30E1", + mekatakanahalfwidth: "FF92", + mem: "05DE", + memdagesh: "FB3E", + memdageshhebrew: "FB3E", + memhebrew: "05DE", + menarmenian: "0574", + merkhahebrew: "05A5", + merkhakefulahebrew: "05A6", + merkhakefulalefthebrew: "05A6", + merkhalefthebrew: "05A5", + mhook: "0271", + mhzsquare: "3392", + middledotkatakanahalfwidth: "FF65", + middot: "00B7", + mieumacirclekorean: "3272", + mieumaparenkorean: "3212", + mieumcirclekorean: "3264", + mieumkorean: "3141", + mieumpansioskorean: "3170", + mieumparenkorean: "3204", + mieumpieupkorean: "316E", + mieumsioskorean: "316F", + mihiragana: "307F", + mikatakana: "30DF", + mikatakanahalfwidth: "FF90", + minus: "2212", + minusbelowcmb: "0320", + minuscircle: "2296", + minusmod: "02D7", + minusplus: "2213", + minute: "2032", + miribaarusquare: "334A", + mirisquare: "3349", + mlonglegturned: "0270", + mlsquare: "3396", + mmcubedsquare: "33A3", + mmonospace: "FF4D", + mmsquaredsquare: "339F", + mohiragana: "3082", + mohmsquare: "33C1", + mokatakana: "30E2", + mokatakanahalfwidth: "FF93", + molsquare: "33D6", + momathai: "0E21", + moverssquare: "33A7", + moverssquaredsquare: "33A8", + mparen: "24A8", + mpasquare: "33AB", + mssquare: "33B3", + msuperior: "F6EF", + mturned: "026F", + mu: "00B5", + mu1: "00B5", + muasquare: "3382", + muchgreater: "226B", + muchless: "226A", + mufsquare: "338C", + mugreek: "03BC", + mugsquare: "338D", + muhiragana: "3080", + mukatakana: "30E0", + mukatakanahalfwidth: "FF91", + mulsquare: "3395", + multiply: "00D7", + mumsquare: "339B", + munahhebrew: "05A3", + munahlefthebrew: "05A3", + musicalnote: "266A", + musicalnotedbl: "266B", + musicflatsign: "266D", + musicsharpsign: "266F", + mussquare: "33B2", + muvsquare: "33B6", + muwsquare: "33BC", + mvmegasquare: "33B9", + mvsquare: "33B7", + mwmegasquare: "33BF", + mwsquare: "33BD", + n: "006E", + nabengali: "09A8", + nabla: "2207", + nacute: "0144", + nadeva: "0928", + nagujarati: "0AA8", + nagurmukhi: "0A28", + nahiragana: "306A", + nakatakana: "30CA", + nakatakanahalfwidth: "FF85", + napostrophe: "0149", + nasquare: "3381", + nbopomofo: "310B", + nbspace: "00A0", + ncaron: "0148", + ncedilla: "0146", + ncircle: "24DD", + ncircumflexbelow: "1E4B", + ncommaaccent: "0146", + ndotaccent: "1E45", + ndotbelow: "1E47", + nehiragana: "306D", + nekatakana: "30CD", + nekatakanahalfwidth: "FF88", + newsheqelsign: "20AA", + nfsquare: "338B", + ngabengali: "0999", + ngadeva: "0919", + ngagujarati: "0A99", + ngagurmukhi: "0A19", + ngonguthai: "0E07", + nhiragana: "3093", + nhookleft: "0272", + nhookretroflex: "0273", + nieunacirclekorean: "326F", + nieunaparenkorean: "320F", + nieuncieuckorean: "3135", + nieuncirclekorean: "3261", + nieunhieuhkorean: "3136", + nieunkorean: "3134", + nieunpansioskorean: "3168", + nieunparenkorean: "3201", + nieunsioskorean: "3167", + nieuntikeutkorean: "3166", + nihiragana: "306B", + nikatakana: "30CB", + nikatakanahalfwidth: "FF86", + nikhahitleftthai: "F899", + nikhahitthai: "0E4D", + nine: "0039", + ninearabic: "0669", + ninebengali: "09EF", + ninecircle: "2468", + ninecircleinversesansserif: "2792", + ninedeva: "096F", + ninegujarati: "0AEF", + ninegurmukhi: "0A6F", + ninehackarabic: "0669", + ninehangzhou: "3029", + nineideographicparen: "3228", + nineinferior: "2089", + ninemonospace: "FF19", + nineoldstyle: "F739", + nineparen: "247C", + nineperiod: "2490", + ninepersian: "06F9", + nineroman: "2178", + ninesuperior: "2079", + nineteencircle: "2472", + nineteenparen: "2486", + nineteenperiod: "249A", + ninethai: "0E59", + nj: "01CC", + njecyrillic: "045A", + nkatakana: "30F3", + nkatakanahalfwidth: "FF9D", + nlegrightlong: "019E", + nlinebelow: "1E49", + nmonospace: "FF4E", + nmsquare: "339A", + nnabengali: "09A3", + nnadeva: "0923", + nnagujarati: "0AA3", + nnagurmukhi: "0A23", + nnnadeva: "0929", + nohiragana: "306E", + nokatakana: "30CE", + nokatakanahalfwidth: "FF89", + nonbreakingspace: "00A0", + nonenthai: "0E13", + nonuthai: "0E19", + noonarabic: "0646", + noonfinalarabic: "FEE6", + noonghunnaarabic: "06BA", + noonghunnafinalarabic: "FB9F", + noonhehinitialarabic: "FEE7 FEEC", + nooninitialarabic: "FEE7", + noonjeeminitialarabic: "FCD2", + noonjeemisolatedarabic: "FC4B", + noonmedialarabic: "FEE8", + noonmeeminitialarabic: "FCD5", + noonmeemisolatedarabic: "FC4E", + noonnoonfinalarabic: "FC8D", + notcontains: "220C", + notelement: "2209", + notelementof: "2209", + notequal: "2260", + notgreater: "226F", + notgreaternorequal: "2271", + notgreaternorless: "2279", + notidentical: "2262", + notless: "226E", + notlessnorequal: "2270", + notparallel: "2226", + notprecedes: "2280", + notsubset: "2284", + notsucceeds: "2281", + notsuperset: "2285", + nowarmenian: "0576", + nparen: "24A9", + nssquare: "33B1", + nsuperior: "207F", + ntilde: "00F1", + nu: "03BD", + nuhiragana: "306C", + nukatakana: "30CC", + nukatakanahalfwidth: "FF87", + nuktabengali: "09BC", + nuktadeva: "093C", + nuktagujarati: "0ABC", + nuktagurmukhi: "0A3C", + numbersign: "0023", + numbersignmonospace: "FF03", + numbersignsmall: "FE5F", + numeralsigngreek: "0374", + numeralsignlowergreek: "0375", + numero: "2116", + nun: "05E0", + nundagesh: "FB40", + nundageshhebrew: "FB40", + nunhebrew: "05E0", + nvsquare: "33B5", + nwsquare: "33BB", + nyabengali: "099E", + nyadeva: "091E", + nyagujarati: "0A9E", + nyagurmukhi: "0A1E", + o: "006F", + oacute: "00F3", + oangthai: "0E2D", + obarred: "0275", + obarredcyrillic: "04E9", + obarreddieresiscyrillic: "04EB", + obengali: "0993", + obopomofo: "311B", + obreve: "014F", + ocandradeva: "0911", + ocandragujarati: "0A91", + ocandravowelsigndeva: "0949", + ocandravowelsigngujarati: "0AC9", + ocaron: "01D2", + ocircle: "24DE", + ocircumflex: "00F4", + ocircumflexacute: "1ED1", + ocircumflexdotbelow: "1ED9", + ocircumflexgrave: "1ED3", + ocircumflexhookabove: "1ED5", + ocircumflextilde: "1ED7", + ocyrillic: "043E", + odblacute: "0151", + odblgrave: "020D", + odeva: "0913", + odieresis: "00F6", + odieresiscyrillic: "04E7", + odotbelow: "1ECD", + oe: "0153", + oekorean: "315A", + ogonek: "02DB", + ogonekcmb: "0328", + ograve: "00F2", + ogujarati: "0A93", + oharmenian: "0585", + ohiragana: "304A", + ohookabove: "1ECF", + ohorn: "01A1", + ohornacute: "1EDB", + ohorndotbelow: "1EE3", + ohorngrave: "1EDD", + ohornhookabove: "1EDF", + ohorntilde: "1EE1", + ohungarumlaut: "0151", + oi: "01A3", + oinvertedbreve: "020F", + okatakana: "30AA", + okatakanahalfwidth: "FF75", + okorean: "3157", + olehebrew: "05AB", + omacron: "014D", + omacronacute: "1E53", + omacrongrave: "1E51", + omdeva: "0950", + omega: "03C9", + omega1: "03D6", + omegacyrillic: "0461", + omegalatinclosed: "0277", + omegaroundcyrillic: "047B", + omegatitlocyrillic: "047D", + omegatonos: "03CE", + omgujarati: "0AD0", + omicron: "03BF", + omicrontonos: "03CC", + omonospace: "FF4F", + one: "0031", + onearabic: "0661", + onebengali: "09E7", + onecircle: "2460", + onecircleinversesansserif: "278A", + onedeva: "0967", + onedotenleader: "2024", + oneeighth: "215B", + onefitted: "F6DC", + onegujarati: "0AE7", + onegurmukhi: "0A67", + onehackarabic: "0661", + onehalf: "00BD", + onehangzhou: "3021", + oneideographicparen: "3220", + oneinferior: "2081", + onemonospace: "FF11", + onenumeratorbengali: "09F4", + oneoldstyle: "F731", + oneparen: "2474", + oneperiod: "2488", + onepersian: "06F1", + onequarter: "00BC", + oneroman: "2170", + onesuperior: "00B9", + onethai: "0E51", + onethird: "2153", + oogonek: "01EB", + oogonekmacron: "01ED", + oogurmukhi: "0A13", + oomatragurmukhi: "0A4B", + oopen: "0254", + oparen: "24AA", + openbullet: "25E6", + option: "2325", + ordfeminine: "00AA", + ordmasculine: "00BA", + orthogonal: "221F", + oshortdeva: "0912", + oshortvowelsigndeva: "094A", + oslash: "00F8", + oslashacute: "01FF", + osmallhiragana: "3049", + osmallkatakana: "30A9", + osmallkatakanahalfwidth: "FF6B", + ostrokeacute: "01FF", + osuperior: "F6F0", + otcyrillic: "047F", + otilde: "00F5", + otildeacute: "1E4D", + otildedieresis: "1E4F", + oubopomofo: "3121", + overline: "203E", + overlinecenterline: "FE4A", + overlinecmb: "0305", + overlinedashed: "FE49", + overlinedblwavy: "FE4C", + overlinewavy: "FE4B", + overscore: "00AF", + ovowelsignbengali: "09CB", + ovowelsigndeva: "094B", + ovowelsigngujarati: "0ACB", + p: "0070", + paampssquare: "3380", + paasentosquare: "332B", + pabengali: "09AA", + pacute: "1E55", + padeva: "092A", + pagedown: "21DF", + pageup: "21DE", + pagujarati: "0AAA", + pagurmukhi: "0A2A", + pahiragana: "3071", + paiyannoithai: "0E2F", + pakatakana: "30D1", + palatalizationcyrilliccmb: "0484", + palochkacyrillic: "04C0", + pansioskorean: "317F", + paragraph: "00B6", + parallel: "2225", + parenleft: "0028", + parenleftaltonearabic: "FD3E", + parenleftbt: "F8ED", + parenleftex: "F8EC", + parenleftinferior: "208D", + parenleftmonospace: "FF08", + parenleftsmall: "FE59", + parenleftsuperior: "207D", + parenlefttp: "F8EB", + parenleftvertical: "FE35", + parenright: "0029", + parenrightaltonearabic: "FD3F", + parenrightbt: "F8F8", + parenrightex: "F8F7", + parenrightinferior: "208E", + parenrightmonospace: "FF09", + parenrightsmall: "FE5A", + parenrightsuperior: "207E", + parenrighttp: "F8F6", + parenrightvertical: "FE36", + partialdiff: "2202", + paseqhebrew: "05C0", + pashtahebrew: "0599", + pasquare: "33A9", + patah: "05B7", + patah11: "05B7", + patah1d: "05B7", + patah2a: "05B7", + patahhebrew: "05B7", + patahnarrowhebrew: "05B7", + patahquarterhebrew: "05B7", + patahwidehebrew: "05B7", + pazerhebrew: "05A1", + pbopomofo: "3106", + pcircle: "24DF", + pdotaccent: "1E57", + pe: "05E4", + pecyrillic: "043F", + pedagesh: "FB44", + pedageshhebrew: "FB44", + peezisquare: "333B", + pefinaldageshhebrew: "FB43", + peharabic: "067E", + peharmenian: "057A", + pehebrew: "05E4", + pehfinalarabic: "FB57", + pehinitialarabic: "FB58", + pehiragana: "307A", + pehmedialarabic: "FB59", + pekatakana: "30DA", + pemiddlehookcyrillic: "04A7", + perafehebrew: "FB4E", + percent: "0025", + percentarabic: "066A", + percentmonospace: "FF05", + percentsmall: "FE6A", + period: "002E", + periodarmenian: "0589", + periodcentered: "00B7", + periodhalfwidth: "FF61", + periodinferior: "F6E7", + periodmonospace: "FF0E", + periodsmall: "FE52", + periodsuperior: "F6E8", + perispomenigreekcmb: "0342", + perpendicular: "22A5", + perthousand: "2030", + peseta: "20A7", + pfsquare: "338A", + phabengali: "09AB", + phadeva: "092B", + phagujarati: "0AAB", + phagurmukhi: "0A2B", + phi: "03C6", + phi1: "03D5", + phieuphacirclekorean: "327A", + phieuphaparenkorean: "321A", + phieuphcirclekorean: "326C", + phieuphkorean: "314D", + phieuphparenkorean: "320C", + philatin: "0278", + phinthuthai: "0E3A", + phisymbolgreek: "03D5", + phook: "01A5", + phophanthai: "0E1E", + phophungthai: "0E1C", + phosamphaothai: "0E20", + pi: "03C0", + pieupacirclekorean: "3273", + pieupaparenkorean: "3213", + pieupcieuckorean: "3176", + pieupcirclekorean: "3265", + pieupkiyeokkorean: "3172", + pieupkorean: "3142", + pieupparenkorean: "3205", + pieupsioskiyeokkorean: "3174", + pieupsioskorean: "3144", + pieupsiostikeutkorean: "3175", + pieupthieuthkorean: "3177", + pieuptikeutkorean: "3173", + pihiragana: "3074", + pikatakana: "30D4", + pisymbolgreek: "03D6", + piwrarmenian: "0583", + plus: "002B", + plusbelowcmb: "031F", + pluscircle: "2295", + plusminus: "00B1", + plusmod: "02D6", + plusmonospace: "FF0B", + plussmall: "FE62", + plussuperior: "207A", + pmonospace: "FF50", + pmsquare: "33D8", + pohiragana: "307D", + pointingindexdownwhite: "261F", + pointingindexleftwhite: "261C", + pointingindexrightwhite: "261E", + pointingindexupwhite: "261D", + pokatakana: "30DD", + poplathai: "0E1B", + postalmark: "3012", + postalmarkface: "3020", + pparen: "24AB", + precedes: "227A", + prescription: "211E", + primemod: "02B9", + primereversed: "2035", + product: "220F", + projective: "2305", + prolongedkana: "30FC", + propellor: "2318", + propersubset: "2282", + propersuperset: "2283", + proportion: "2237", + proportional: "221D", + psi: "03C8", + psicyrillic: "0471", + psilipneumatacyrilliccmb: "0486", + pssquare: "33B0", + puhiragana: "3077", + pukatakana: "30D7", + pvsquare: "33B4", + pwsquare: "33BA", + q: "0071", + qadeva: "0958", + qadmahebrew: "05A8", + qafarabic: "0642", + qaffinalarabic: "FED6", + qafinitialarabic: "FED7", + qafmedialarabic: "FED8", + qamats: "05B8", + qamats10: "05B8", + qamats1a: "05B8", + qamats1c: "05B8", + qamats27: "05B8", + qamats29: "05B8", + qamats33: "05B8", + qamatsde: "05B8", + qamatshebrew: "05B8", + qamatsnarrowhebrew: "05B8", + qamatsqatanhebrew: "05B8", + qamatsqatannarrowhebrew: "05B8", + qamatsqatanquarterhebrew: "05B8", + qamatsqatanwidehebrew: "05B8", + qamatsquarterhebrew: "05B8", + qamatswidehebrew: "05B8", + qarneyparahebrew: "059F", + qbopomofo: "3111", + qcircle: "24E0", + qhook: "02A0", + qmonospace: "FF51", + qof: "05E7", + qofdagesh: "FB47", + qofdageshhebrew: "FB47", + qofhatafpatah: "05E7 05B2", + qofhatafpatahhebrew: "05E7 05B2", + qofhatafsegol: "05E7 05B1", + qofhatafsegolhebrew: "05E7 05B1", + qofhebrew: "05E7", + qofhiriq: "05E7 05B4", + qofhiriqhebrew: "05E7 05B4", + qofholam: "05E7 05B9", + qofholamhebrew: "05E7 05B9", + qofpatah: "05E7 05B7", + qofpatahhebrew: "05E7 05B7", + qofqamats: "05E7 05B8", + qofqamatshebrew: "05E7 05B8", + qofqubuts: "05E7 05BB", + qofqubutshebrew: "05E7 05BB", + qofsegol: "05E7 05B6", + qofsegolhebrew: "05E7 05B6", + qofsheva: "05E7 05B0", + qofshevahebrew: "05E7 05B0", + qoftsere: "05E7 05B5", + qoftserehebrew: "05E7 05B5", + qparen: "24AC", + quarternote: "2669", + qubuts: "05BB", + qubuts18: "05BB", + qubuts25: "05BB", + qubuts31: "05BB", + qubutshebrew: "05BB", + qubutsnarrowhebrew: "05BB", + qubutsquarterhebrew: "05BB", + qubutswidehebrew: "05BB", + question: "003F", + questionarabic: "061F", + questionarmenian: "055E", + questiondown: "00BF", + questiondownsmall: "F7BF", + questiongreek: "037E", + questionmonospace: "FF1F", + questionsmall: "F73F", + quotedbl: "0022", + quotedblbase: "201E", + quotedblleft: "201C", + quotedblmonospace: "FF02", + quotedblprime: "301E", + quotedblprimereversed: "301D", + quotedblright: "201D", + quoteleft: "2018", + quoteleftreversed: "201B", + quotereversed: "201B", + quoteright: "2019", + quoterightn: "0149", + quotesinglbase: "201A", + quotesingle: "0027", + quotesinglemonospace: "FF07", + r: "0072", + raarmenian: "057C", + rabengali: "09B0", + racute: "0155", + radeva: "0930", + radical: "221A", + radicalex: "F8E5", + radoverssquare: "33AE", + radoverssquaredsquare: "33AF", + radsquare: "33AD", + rafe: "05BF", + rafehebrew: "05BF", + ragujarati: "0AB0", + ragurmukhi: "0A30", + rahiragana: "3089", + rakatakana: "30E9", + rakatakanahalfwidth: "FF97", + ralowerdiagonalbengali: "09F1", + ramiddlediagonalbengali: "09F0", + ramshorn: "0264", + ratio: "2236", + rbopomofo: "3116", + rcaron: "0159", + rcedilla: "0157", + rcircle: "24E1", + rcommaaccent: "0157", + rdblgrave: "0211", + rdotaccent: "1E59", + rdotbelow: "1E5B", + rdotbelowmacron: "1E5D", + referencemark: "203B", + reflexsubset: "2286", + reflexsuperset: "2287", + registered: "00AE", + registersans: "F8E8", + registerserif: "F6DA", + reharabic: "0631", + reharmenian: "0580", + rehfinalarabic: "FEAE", + rehiragana: "308C", + rehyehaleflamarabic: "0631 FEF3 FE8E 0644", + rekatakana: "30EC", + rekatakanahalfwidth: "FF9A", + resh: "05E8", + reshdageshhebrew: "FB48", + reshhatafpatah: "05E8 05B2", + reshhatafpatahhebrew: "05E8 05B2", + reshhatafsegol: "05E8 05B1", + reshhatafsegolhebrew: "05E8 05B1", + reshhebrew: "05E8", + reshhiriq: "05E8 05B4", + reshhiriqhebrew: "05E8 05B4", + reshholam: "05E8 05B9", + reshholamhebrew: "05E8 05B9", + reshpatah: "05E8 05B7", + reshpatahhebrew: "05E8 05B7", + reshqamats: "05E8 05B8", + reshqamatshebrew: "05E8 05B8", + reshqubuts: "05E8 05BB", + reshqubutshebrew: "05E8 05BB", + reshsegol: "05E8 05B6", + reshsegolhebrew: "05E8 05B6", + reshsheva: "05E8 05B0", + reshshevahebrew: "05E8 05B0", + reshtsere: "05E8 05B5", + reshtserehebrew: "05E8 05B5", + reversedtilde: "223D", + reviahebrew: "0597", + reviamugrashhebrew: "0597", + revlogicalnot: "2310", + rfishhook: "027E", + rfishhookreversed: "027F", + rhabengali: "09DD", + rhadeva: "095D", + rho: "03C1", + rhook: "027D", + rhookturned: "027B", + rhookturnedsuperior: "02B5", + rhosymbolgreek: "03F1", + rhotichookmod: "02DE", + rieulacirclekorean: "3271", + rieulaparenkorean: "3211", + rieulcirclekorean: "3263", + rieulhieuhkorean: "3140", + rieulkiyeokkorean: "313A", + rieulkiyeoksioskorean: "3169", + rieulkorean: "3139", + rieulmieumkorean: "313B", + rieulpansioskorean: "316C", + rieulparenkorean: "3203", + rieulphieuphkorean: "313F", + rieulpieupkorean: "313C", + rieulpieupsioskorean: "316B", + rieulsioskorean: "313D", + rieulthieuthkorean: "313E", + rieultikeutkorean: "316A", + rieulyeorinhieuhkorean: "316D", + rightangle: "221F", + righttackbelowcmb: "0319", + righttriangle: "22BF", + rihiragana: "308A", + rikatakana: "30EA", + rikatakanahalfwidth: "FF98", + ring: "02DA", + ringbelowcmb: "0325", + ringcmb: "030A", + ringhalfleft: "02BF", + ringhalfleftarmenian: "0559", + ringhalfleftbelowcmb: "031C", + ringhalfleftcentered: "02D3", + ringhalfright: "02BE", + ringhalfrightbelowcmb: "0339", + ringhalfrightcentered: "02D2", + rinvertedbreve: "0213", + rittorusquare: "3351", + rlinebelow: "1E5F", + rlongleg: "027C", + rlonglegturned: "027A", + rmonospace: "FF52", + rohiragana: "308D", + rokatakana: "30ED", + rokatakanahalfwidth: "FF9B", + roruathai: "0E23", + rparen: "24AD", + rrabengali: "09DC", + rradeva: "0931", + rragurmukhi: "0A5C", + rreharabic: "0691", + rrehfinalarabic: "FB8D", + rrvocalicbengali: "09E0", + rrvocalicdeva: "0960", + rrvocalicgujarati: "0AE0", + rrvocalicvowelsignbengali: "09C4", + rrvocalicvowelsigndeva: "0944", + rrvocalicvowelsigngujarati: "0AC4", + rsuperior: "F6F1", + rtblock: "2590", + rturned: "0279", + rturnedsuperior: "02B4", + ruhiragana: "308B", + rukatakana: "30EB", + rukatakanahalfwidth: "FF99", + rupeemarkbengali: "09F2", + rupeesignbengali: "09F3", + rupiah: "F6DD", + ruthai: "0E24", + rvocalicbengali: "098B", + rvocalicdeva: "090B", + rvocalicgujarati: "0A8B", + rvocalicvowelsignbengali: "09C3", + rvocalicvowelsigndeva: "0943", + rvocalicvowelsigngujarati: "0AC3", + s: "0073", + sabengali: "09B8", + sacute: "015B", + sacutedotaccent: "1E65", + sadarabic: "0635", + sadeva: "0938", + sadfinalarabic: "FEBA", + sadinitialarabic: "FEBB", + sadmedialarabic: "FEBC", + sagujarati: "0AB8", + sagurmukhi: "0A38", + sahiragana: "3055", + sakatakana: "30B5", + sakatakanahalfwidth: "FF7B", + sallallahoualayhewasallamarabic: "FDFA", + samekh: "05E1", + samekhdagesh: "FB41", + samekhdageshhebrew: "FB41", + samekhhebrew: "05E1", + saraaathai: "0E32", + saraaethai: "0E41", + saraaimaimalaithai: "0E44", + saraaimaimuanthai: "0E43", + saraamthai: "0E33", + saraathai: "0E30", + saraethai: "0E40", + saraiileftthai: "F886", + saraiithai: "0E35", + saraileftthai: "F885", + saraithai: "0E34", + saraothai: "0E42", + saraueeleftthai: "F888", + saraueethai: "0E37", + saraueleftthai: "F887", + sarauethai: "0E36", + sarauthai: "0E38", + sarauuthai: "0E39", + sbopomofo: "3119", + scaron: "0161", + scarondotaccent: "1E67", + scedilla: "015F", + schwa: "0259", + schwacyrillic: "04D9", + schwadieresiscyrillic: "04DB", + schwahook: "025A", + scircle: "24E2", + scircumflex: "015D", + scommaaccent: "0219", + sdotaccent: "1E61", + sdotbelow: "1E63", + sdotbelowdotaccent: "1E69", + seagullbelowcmb: "033C", + second: "2033", + secondtonechinese: "02CA", + section: "00A7", + seenarabic: "0633", + seenfinalarabic: "FEB2", + seeninitialarabic: "FEB3", + seenmedialarabic: "FEB4", + segol: "05B6", + segol13: "05B6", + segol1f: "05B6", + segol2c: "05B6", + segolhebrew: "05B6", + segolnarrowhebrew: "05B6", + segolquarterhebrew: "05B6", + segoltahebrew: "0592", + segolwidehebrew: "05B6", + seharmenian: "057D", + sehiragana: "305B", + sekatakana: "30BB", + sekatakanahalfwidth: "FF7E", + semicolon: "003B", + semicolonarabic: "061B", + semicolonmonospace: "FF1B", + semicolonsmall: "FE54", + semivoicedmarkkana: "309C", + semivoicedmarkkanahalfwidth: "FF9F", + sentisquare: "3322", + sentosquare: "3323", + seven: "0037", + sevenarabic: "0667", + sevenbengali: "09ED", + sevencircle: "2466", + sevencircleinversesansserif: "2790", + sevendeva: "096D", + seveneighths: "215E", + sevengujarati: "0AED", + sevengurmukhi: "0A6D", + sevenhackarabic: "0667", + sevenhangzhou: "3027", + sevenideographicparen: "3226", + seveninferior: "2087", + sevenmonospace: "FF17", + sevenoldstyle: "F737", + sevenparen: "247A", + sevenperiod: "248E", + sevenpersian: "06F7", + sevenroman: "2176", + sevensuperior: "2077", + seventeencircle: "2470", + seventeenparen: "2484", + seventeenperiod: "2498", + seventhai: "0E57", + sfthyphen: "00AD", + shaarmenian: "0577", + shabengali: "09B6", + shacyrillic: "0448", + shaddaarabic: "0651", + shaddadammaarabic: "FC61", + shaddadammatanarabic: "FC5E", + shaddafathaarabic: "FC60", + shaddafathatanarabic: "0651 064B", + shaddakasraarabic: "FC62", + shaddakasratanarabic: "FC5F", + shade: "2592", + shadedark: "2593", + shadelight: "2591", + shademedium: "2592", + shadeva: "0936", + shagujarati: "0AB6", + shagurmukhi: "0A36", + shalshelethebrew: "0593", + shbopomofo: "3115", + shchacyrillic: "0449", + sheenarabic: "0634", + sheenfinalarabic: "FEB6", + sheeninitialarabic: "FEB7", + sheenmedialarabic: "FEB8", + sheicoptic: "03E3", + sheqel: "20AA", + sheqelhebrew: "20AA", + sheva: "05B0", + sheva115: "05B0", + sheva15: "05B0", + sheva22: "05B0", + sheva2e: "05B0", + shevahebrew: "05B0", + shevanarrowhebrew: "05B0", + shevaquarterhebrew: "05B0", + shevawidehebrew: "05B0", + shhacyrillic: "04BB", + shimacoptic: "03ED", + shin: "05E9", + shindagesh: "FB49", + shindageshhebrew: "FB49", + shindageshshindot: "FB2C", + shindageshshindothebrew: "FB2C", + shindageshsindot: "FB2D", + shindageshsindothebrew: "FB2D", + shindothebrew: "05C1", + shinhebrew: "05E9", + shinshindot: "FB2A", + shinshindothebrew: "FB2A", + shinsindot: "FB2B", + shinsindothebrew: "FB2B", + shook: "0282", + sigma: "03C3", + sigma1: "03C2", + sigmafinal: "03C2", + sigmalunatesymbolgreek: "03F2", + sihiragana: "3057", + sikatakana: "30B7", + sikatakanahalfwidth: "FF7C", + siluqhebrew: "05BD", + siluqlefthebrew: "05BD", + similar: "223C", + sindothebrew: "05C2", + siosacirclekorean: "3274", + siosaparenkorean: "3214", + sioscieuckorean: "317E", + sioscirclekorean: "3266", + sioskiyeokkorean: "317A", + sioskorean: "3145", + siosnieunkorean: "317B", + siosparenkorean: "3206", + siospieupkorean: "317D", + siostikeutkorean: "317C", + six: "0036", + sixarabic: "0666", + sixbengali: "09EC", + sixcircle: "2465", + sixcircleinversesansserif: "278F", + sixdeva: "096C", + sixgujarati: "0AEC", + sixgurmukhi: "0A6C", + sixhackarabic: "0666", + sixhangzhou: "3026", + sixideographicparen: "3225", + sixinferior: "2086", + sixmonospace: "FF16", + sixoldstyle: "F736", + sixparen: "2479", + sixperiod: "248D", + sixpersian: "06F6", + sixroman: "2175", + sixsuperior: "2076", + sixteencircle: "246F", + sixteencurrencydenominatorbengali: "09F9", + sixteenparen: "2483", + sixteenperiod: "2497", + sixthai: "0E56", + slash: "002F", + slashmonospace: "FF0F", + slong: "017F", + slongdotaccent: "1E9B", + smileface: "263A", + smonospace: "FF53", + sofpasuqhebrew: "05C3", + softhyphen: "00AD", + softsigncyrillic: "044C", + sohiragana: "305D", + sokatakana: "30BD", + sokatakanahalfwidth: "FF7F", + soliduslongoverlaycmb: "0338", + solidusshortoverlaycmb: "0337", + sorusithai: "0E29", + sosalathai: "0E28", + sosothai: "0E0B", + sosuathai: "0E2A", + space: "0020", + spacehackarabic: "0020", + spade: "2660", + spadesuitblack: "2660", + spadesuitwhite: "2664", + sparen: "24AE", + squarebelowcmb: "033B", + squarecc: "33C4", + squarecm: "339D", + squarediagonalcrosshatchfill: "25A9", + squarehorizontalfill: "25A4", + squarekg: "338F", + squarekm: "339E", + squarekmcapital: "33CE", + squareln: "33D1", + squarelog: "33D2", + squaremg: "338E", + squaremil: "33D5", + squaremm: "339C", + squaremsquared: "33A1", + squareorthogonalcrosshatchfill: "25A6", + squareupperlefttolowerrightfill: "25A7", + squareupperrighttolowerleftfill: "25A8", + squareverticalfill: "25A5", + squarewhitewithsmallblack: "25A3", + srsquare: "33DB", + ssabengali: "09B7", + ssadeva: "0937", + ssagujarati: "0AB7", + ssangcieuckorean: "3149", + ssanghieuhkorean: "3185", + ssangieungkorean: "3180", + ssangkiyeokkorean: "3132", + ssangnieunkorean: "3165", + ssangpieupkorean: "3143", + ssangsioskorean: "3146", + ssangtikeutkorean: "3138", + ssuperior: "F6F2", + sterling: "00A3", + sterlingmonospace: "FFE1", + strokelongoverlaycmb: "0336", + strokeshortoverlaycmb: "0335", + subset: "2282", + subsetnotequal: "228A", + subsetorequal: "2286", + succeeds: "227B", + suchthat: "220B", + suhiragana: "3059", + sukatakana: "30B9", + sukatakanahalfwidth: "FF7D", + sukunarabic: "0652", + summation: "2211", + sun: "263C", + superset: "2283", + supersetnotequal: "228B", + supersetorequal: "2287", + svsquare: "33DC", + syouwaerasquare: "337C", + t: "0074", + tabengali: "09A4", + tackdown: "22A4", + tackleft: "22A3", + tadeva: "0924", + tagujarati: "0AA4", + tagurmukhi: "0A24", + taharabic: "0637", + tahfinalarabic: "FEC2", + tahinitialarabic: "FEC3", + tahiragana: "305F", + tahmedialarabic: "FEC4", + taisyouerasquare: "337D", + takatakana: "30BF", + takatakanahalfwidth: "FF80", + tatweelarabic: "0640", + tau: "03C4", + tav: "05EA", + tavdages: "FB4A", + tavdagesh: "FB4A", + tavdageshhebrew: "FB4A", + tavhebrew: "05EA", + tbar: "0167", + tbopomofo: "310A", + tcaron: "0165", + tccurl: "02A8", + tcedilla: "0163", + tcheharabic: "0686", + tchehfinalarabic: "FB7B", + tchehinitialarabic: "FB7C", + tchehmedialarabic: "FB7D", + tchehmeeminitialarabic: "FB7C FEE4", + tcircle: "24E3", + tcircumflexbelow: "1E71", + tcommaaccent: "0163", + tdieresis: "1E97", + tdotaccent: "1E6B", + tdotbelow: "1E6D", + tecyrillic: "0442", + tedescendercyrillic: "04AD", + teharabic: "062A", + tehfinalarabic: "FE96", + tehhahinitialarabic: "FCA2", + tehhahisolatedarabic: "FC0C", + tehinitialarabic: "FE97", + tehiragana: "3066", + tehjeeminitialarabic: "FCA1", + tehjeemisolatedarabic: "FC0B", + tehmarbutaarabic: "0629", + tehmarbutafinalarabic: "FE94", + tehmedialarabic: "FE98", + tehmeeminitialarabic: "FCA4", + tehmeemisolatedarabic: "FC0E", + tehnoonfinalarabic: "FC73", + tekatakana: "30C6", + tekatakanahalfwidth: "FF83", + telephone: "2121", + telephoneblack: "260E", + telishagedolahebrew: "05A0", + telishaqetanahebrew: "05A9", + tencircle: "2469", + tenideographicparen: "3229", + tenparen: "247D", + tenperiod: "2491", + tenroman: "2179", + tesh: "02A7", + tet: "05D8", + tetdagesh: "FB38", + tetdageshhebrew: "FB38", + tethebrew: "05D8", + tetsecyrillic: "04B5", + tevirhebrew: "059B", + tevirlefthebrew: "059B", + thabengali: "09A5", + thadeva: "0925", + thagujarati: "0AA5", + thagurmukhi: "0A25", + thalarabic: "0630", + thalfinalarabic: "FEAC", + thanthakhatlowleftthai: "F898", + thanthakhatlowrightthai: "F897", + thanthakhatthai: "0E4C", + thanthakhatupperleftthai: "F896", + theharabic: "062B", + thehfinalarabic: "FE9A", + thehinitialarabic: "FE9B", + thehmedialarabic: "FE9C", + thereexists: "2203", + therefore: "2234", + theta: "03B8", + theta1: "03D1", + thetasymbolgreek: "03D1", + thieuthacirclekorean: "3279", + thieuthaparenkorean: "3219", + thieuthcirclekorean: "326B", + thieuthkorean: "314C", + thieuthparenkorean: "320B", + thirteencircle: "246C", + thirteenparen: "2480", + thirteenperiod: "2494", + thonangmonthothai: "0E11", + thook: "01AD", + thophuthaothai: "0E12", + thorn: "00FE", + thothahanthai: "0E17", + thothanthai: "0E10", + thothongthai: "0E18", + thothungthai: "0E16", + thousandcyrillic: "0482", + thousandsseparatorarabic: "066C", + thousandsseparatorpersian: "066C", + three: "0033", + threearabic: "0663", + threebengali: "09E9", + threecircle: "2462", + threecircleinversesansserif: "278C", + threedeva: "0969", + threeeighths: "215C", + threegujarati: "0AE9", + threegurmukhi: "0A69", + threehackarabic: "0663", + threehangzhou: "3023", + threeideographicparen: "3222", + threeinferior: "2083", + threemonospace: "FF13", + threenumeratorbengali: "09F6", + threeoldstyle: "F733", + threeparen: "2476", + threeperiod: "248A", + threepersian: "06F3", + threequarters: "00BE", + threequartersemdash: "F6DE", + threeroman: "2172", + threesuperior: "00B3", + threethai: "0E53", + thzsquare: "3394", + tihiragana: "3061", + tikatakana: "30C1", + tikatakanahalfwidth: "FF81", + tikeutacirclekorean: "3270", + tikeutaparenkorean: "3210", + tikeutcirclekorean: "3262", + tikeutkorean: "3137", + tikeutparenkorean: "3202", + tilde: "02DC", + tildebelowcmb: "0330", + tildecmb: "0303", + tildecomb: "0303", + tildedoublecmb: "0360", + tildeoperator: "223C", + tildeoverlaycmb: "0334", + tildeverticalcmb: "033E", + timescircle: "2297", + tipehahebrew: "0596", + tipehalefthebrew: "0596", + tippigurmukhi: "0A70", + titlocyrilliccmb: "0483", + tiwnarmenian: "057F", + tlinebelow: "1E6F", + tmonospace: "FF54", + toarmenian: "0569", + tohiragana: "3068", + tokatakana: "30C8", + tokatakanahalfwidth: "FF84", + tonebarextrahighmod: "02E5", + tonebarextralowmod: "02E9", + tonebarhighmod: "02E6", + tonebarlowmod: "02E8", + tonebarmidmod: "02E7", + tonefive: "01BD", + tonesix: "0185", + tonetwo: "01A8", + tonos: "0384", + tonsquare: "3327", + topatakthai: "0E0F", + tortoiseshellbracketleft: "3014", + tortoiseshellbracketleftsmall: "FE5D", + tortoiseshellbracketleftvertical: "FE39", + tortoiseshellbracketright: "3015", + tortoiseshellbracketrightsmall: "FE5E", + tortoiseshellbracketrightvertical: "FE3A", + totaothai: "0E15", + tpalatalhook: "01AB", + tparen: "24AF", + trademark: "2122", + trademarksans: "F8EA", + trademarkserif: "F6DB", + tretroflexhook: "0288", + triagdn: "25BC", + triaglf: "25C4", + triagrt: "25BA", + triagup: "25B2", + ts: "02A6", + tsadi: "05E6", + tsadidagesh: "FB46", + tsadidageshhebrew: "FB46", + tsadihebrew: "05E6", + tsecyrillic: "0446", + tsere: "05B5", + tsere12: "05B5", + tsere1e: "05B5", + tsere2b: "05B5", + tserehebrew: "05B5", + tserenarrowhebrew: "05B5", + tserequarterhebrew: "05B5", + tserewidehebrew: "05B5", + tshecyrillic: "045B", + tsuperior: "F6F3", + ttabengali: "099F", + ttadeva: "091F", + ttagujarati: "0A9F", + ttagurmukhi: "0A1F", + tteharabic: "0679", + ttehfinalarabic: "FB67", + ttehinitialarabic: "FB68", + ttehmedialarabic: "FB69", + tthabengali: "09A0", + tthadeva: "0920", + tthagujarati: "0AA0", + tthagurmukhi: "0A20", + tturned: "0287", + tuhiragana: "3064", + tukatakana: "30C4", + tukatakanahalfwidth: "FF82", + tusmallhiragana: "3063", + tusmallkatakana: "30C3", + tusmallkatakanahalfwidth: "FF6F", + twelvecircle: "246B", + twelveparen: "247F", + twelveperiod: "2493", + twelveroman: "217B", + twentycircle: "2473", + twentyhangzhou: "5344", + twentyparen: "2487", + twentyperiod: "249B", + two: "0032", + twoarabic: "0662", + twobengali: "09E8", + twocircle: "2461", + twocircleinversesansserif: "278B", + twodeva: "0968", + twodotenleader: "2025", + twodotleader: "2025", + twodotleadervertical: "FE30", + twogujarati: "0AE8", + twogurmukhi: "0A68", + twohackarabic: "0662", + twohangzhou: "3022", + twoideographicparen: "3221", + twoinferior: "2082", + twomonospace: "FF12", + twonumeratorbengali: "09F5", + twooldstyle: "F732", + twoparen: "2475", + twoperiod: "2489", + twopersian: "06F2", + tworoman: "2171", + twostroke: "01BB", + twosuperior: "00B2", + twothai: "0E52", + twothirds: "2154", + u: "0075", + uacute: "00FA", + ubar: "0289", + ubengali: "0989", + ubopomofo: "3128", + ubreve: "016D", + ucaron: "01D4", + ucircle: "24E4", + ucircumflex: "00FB", + ucircumflexbelow: "1E77", + ucyrillic: "0443", + udattadeva: "0951", + udblacute: "0171", + udblgrave: "0215", + udeva: "0909", + udieresis: "00FC", + udieresisacute: "01D8", + udieresisbelow: "1E73", + udieresiscaron: "01DA", + udieresiscyrillic: "04F1", + udieresisgrave: "01DC", + udieresismacron: "01D6", + udotbelow: "1EE5", + ugrave: "00F9", + ugujarati: "0A89", + ugurmukhi: "0A09", + uhiragana: "3046", + uhookabove: "1EE7", + uhorn: "01B0", + uhornacute: "1EE9", + uhorndotbelow: "1EF1", + uhorngrave: "1EEB", + uhornhookabove: "1EED", + uhorntilde: "1EEF", + uhungarumlaut: "0171", + uhungarumlautcyrillic: "04F3", + uinvertedbreve: "0217", + ukatakana: "30A6", + ukatakanahalfwidth: "FF73", + ukcyrillic: "0479", + ukorean: "315C", + umacron: "016B", + umacroncyrillic: "04EF", + umacrondieresis: "1E7B", + umatragurmukhi: "0A41", + umonospace: "FF55", + underscore: "005F", + underscoredbl: "2017", + underscoremonospace: "FF3F", + underscorevertical: "FE33", + underscorewavy: "FE4F", + union: "222A", + universal: "2200", + uogonek: "0173", + uparen: "24B0", + upblock: "2580", + upperdothebrew: "05C4", + upsilon: "03C5", + upsilondieresis: "03CB", + upsilondieresistonos: "03B0", + upsilonlatin: "028A", + upsilontonos: "03CD", + uptackbelowcmb: "031D", + uptackmod: "02D4", + uragurmukhi: "0A73", + uring: "016F", + ushortcyrillic: "045E", + usmallhiragana: "3045", + usmallkatakana: "30A5", + usmallkatakanahalfwidth: "FF69", + ustraightcyrillic: "04AF", + ustraightstrokecyrillic: "04B1", + utilde: "0169", + utildeacute: "1E79", + utildebelow: "1E75", + uubengali: "098A", + uudeva: "090A", + uugujarati: "0A8A", + uugurmukhi: "0A0A", + uumatragurmukhi: "0A42", + uuvowelsignbengali: "09C2", + uuvowelsigndeva: "0942", + uuvowelsigngujarati: "0AC2", + uvowelsignbengali: "09C1", + uvowelsigndeva: "0941", + uvowelsigngujarati: "0AC1", + v: "0076", + vadeva: "0935", + vagujarati: "0AB5", + vagurmukhi: "0A35", + vakatakana: "30F7", + vav: "05D5", + vavdagesh: "FB35", + vavdagesh65: "FB35", + vavdageshhebrew: "FB35", + vavhebrew: "05D5", + vavholam: "FB4B", + vavholamhebrew: "FB4B", + vavvavhebrew: "05F0", + vavyodhebrew: "05F1", + vcircle: "24E5", + vdotbelow: "1E7F", + vecyrillic: "0432", + veharabic: "06A4", + vehfinalarabic: "FB6B", + vehinitialarabic: "FB6C", + vehmedialarabic: "FB6D", + vekatakana: "30F9", + venus: "2640", + verticalbar: "007C", + verticallineabovecmb: "030D", + verticallinebelowcmb: "0329", + verticallinelowmod: "02CC", + verticallinemod: "02C8", + vewarmenian: "057E", + vhook: "028B", + vikatakana: "30F8", + viramabengali: "09CD", + viramadeva: "094D", + viramagujarati: "0ACD", + visargabengali: "0983", + visargadeva: "0903", + visargagujarati: "0A83", + vmonospace: "FF56", + voarmenian: "0578", + voicediterationhiragana: "309E", + voicediterationkatakana: "30FE", + voicedmarkkana: "309B", + voicedmarkkanahalfwidth: "FF9E", + vokatakana: "30FA", + vparen: "24B1", + vtilde: "1E7D", + vturned: "028C", + vuhiragana: "3094", + vukatakana: "30F4", + w: "0077", + wacute: "1E83", + waekorean: "3159", + wahiragana: "308F", + wakatakana: "30EF", + wakatakanahalfwidth: "FF9C", + wakorean: "3158", + wasmallhiragana: "308E", + wasmallkatakana: "30EE", + wattosquare: "3357", + wavedash: "301C", + wavyunderscorevertical: "FE34", + wawarabic: "0648", + wawfinalarabic: "FEEE", + wawhamzaabovearabic: "0624", + wawhamzaabovefinalarabic: "FE86", + wbsquare: "33DD", + wcircle: "24E6", + wcircumflex: "0175", + wdieresis: "1E85", + wdotaccent: "1E87", + wdotbelow: "1E89", + wehiragana: "3091", + weierstrass: "2118", + wekatakana: "30F1", + wekorean: "315E", + weokorean: "315D", + wgrave: "1E81", + whitebullet: "25E6", + whitecircle: "25CB", + whitecircleinverse: "25D9", + whitecornerbracketleft: "300E", + whitecornerbracketleftvertical: "FE43", + whitecornerbracketright: "300F", + whitecornerbracketrightvertical: "FE44", + whitediamond: "25C7", + whitediamondcontainingblacksmalldiamond: "25C8", + whitedownpointingsmalltriangle: "25BF", + whitedownpointingtriangle: "25BD", + whiteleftpointingsmalltriangle: "25C3", + whiteleftpointingtriangle: "25C1", + whitelenticularbracketleft: "3016", + whitelenticularbracketright: "3017", + whiterightpointingsmalltriangle: "25B9", + whiterightpointingtriangle: "25B7", + whitesmallsquare: "25AB", + whitesmilingface: "263A", + whitesquare: "25A1", + whitestar: "2606", + whitetelephone: "260F", + whitetortoiseshellbracketleft: "3018", + whitetortoiseshellbracketright: "3019", + whiteuppointingsmalltriangle: "25B5", + whiteuppointingtriangle: "25B3", + wihiragana: "3090", + wikatakana: "30F0", + wikorean: "315F", + wmonospace: "FF57", + wohiragana: "3092", + wokatakana: "30F2", + wokatakanahalfwidth: "FF66", + won: "20A9", + wonmonospace: "FFE6", + wowaenthai: "0E27", + wparen: "24B2", + wring: "1E98", + wsuperior: "02B7", + wturned: "028D", + wynn: "01BF", + x: "0078", + xabovecmb: "033D", + xbopomofo: "3112", + xcircle: "24E7", + xdieresis: "1E8D", + xdotaccent: "1E8B", + xeharmenian: "056D", + xi: "03BE", + xmonospace: "FF58", + xparen: "24B3", + xsuperior: "02E3", + y: "0079", + yaadosquare: "334E", + yabengali: "09AF", + yacute: "00FD", + yadeva: "092F", + yaekorean: "3152", + yagujarati: "0AAF", + yagurmukhi: "0A2F", + yahiragana: "3084", + yakatakana: "30E4", + yakatakanahalfwidth: "FF94", + yakorean: "3151", + yamakkanthai: "0E4E", + yasmallhiragana: "3083", + yasmallkatakana: "30E3", + yasmallkatakanahalfwidth: "FF6C", + yatcyrillic: "0463", + ycircle: "24E8", + ycircumflex: "0177", + ydieresis: "00FF", + ydotaccent: "1E8F", + ydotbelow: "1EF5", + yeharabic: "064A", + yehbarreearabic: "06D2", + yehbarreefinalarabic: "FBAF", + yehfinalarabic: "FEF2", + yehhamzaabovearabic: "0626", + yehhamzaabovefinalarabic: "FE8A", + yehhamzaaboveinitialarabic: "FE8B", + yehhamzaabovemedialarabic: "FE8C", + yehinitialarabic: "FEF3", + yehmedialarabic: "FEF4", + yehmeeminitialarabic: "FCDD", + yehmeemisolatedarabic: "FC58", + yehnoonfinalarabic: "FC94", + yehthreedotsbelowarabic: "06D1", + yekorean: "3156", + yen: "00A5", + yenmonospace: "FFE5", + yeokorean: "3155", + yeorinhieuhkorean: "3186", + yerahbenyomohebrew: "05AA", + yerahbenyomolefthebrew: "05AA", + yericyrillic: "044B", + yerudieresiscyrillic: "04F9", + yesieungkorean: "3181", + yesieungpansioskorean: "3183", + yesieungsioskorean: "3182", + yetivhebrew: "059A", + ygrave: "1EF3", + yhook: "01B4", + yhookabove: "1EF7", + yiarmenian: "0575", + yicyrillic: "0457", + yikorean: "3162", + yinyang: "262F", + yiwnarmenian: "0582", + ymonospace: "FF59", + yod: "05D9", + yoddagesh: "FB39", + yoddageshhebrew: "FB39", + yodhebrew: "05D9", + yodyodhebrew: "05F2", + yodyodpatahhebrew: "FB1F", + yohiragana: "3088", + yoikorean: "3189", + yokatakana: "30E8", + yokatakanahalfwidth: "FF96", + yokorean: "315B", + yosmallhiragana: "3087", + yosmallkatakana: "30E7", + yosmallkatakanahalfwidth: "FF6E", + yotgreek: "03F3", + yoyaekorean: "3188", + yoyakorean: "3187", + yoyakthai: "0E22", + yoyingthai: "0E0D", + yparen: "24B4", + ypogegrammeni: "037A", + ypogegrammenigreekcmb: "0345", + yr: "01A6", + yring: "1E99", + ysuperior: "02B8", + ytilde: "1EF9", + yturned: "028E", + yuhiragana: "3086", + yuikorean: "318C", + yukatakana: "30E6", + yukatakanahalfwidth: "FF95", + yukorean: "3160", + yusbigcyrillic: "046B", + yusbigiotifiedcyrillic: "046D", + yuslittlecyrillic: "0467", + yuslittleiotifiedcyrillic: "0469", + yusmallhiragana: "3085", + yusmallkatakana: "30E5", + yusmallkatakanahalfwidth: "FF6D", + yuyekorean: "318B", + yuyeokorean: "318A", + yyabengali: "09DF", + yyadeva: "095F", + z: "007A", + zaarmenian: "0566", + zacute: "017A", + zadeva: "095B", + zagurmukhi: "0A5B", + zaharabic: "0638", + zahfinalarabic: "FEC6", + zahinitialarabic: "FEC7", + zahiragana: "3056", + zahmedialarabic: "FEC8", + zainarabic: "0632", + zainfinalarabic: "FEB0", + zakatakana: "30B6", + zaqefgadolhebrew: "0595", + zaqefqatanhebrew: "0594", + zarqahebrew: "0598", + zayin: "05D6", + zayindagesh: "FB36", + zayindageshhebrew: "FB36", + zayinhebrew: "05D6", + zbopomofo: "3117", + zcaron: "017E", + zcircle: "24E9", + zcircumflex: "1E91", + zcurl: "0291", + zdot: "017C", + zdotaccent: "017C", + zdotbelow: "1E93", + zecyrillic: "0437", + zedescendercyrillic: "0499", + zedieresiscyrillic: "04DF", + zehiragana: "305C", + zekatakana: "30BC", + zero: "0030", + zeroarabic: "0660", + zerobengali: "09E6", + zerodeva: "0966", + zerogujarati: "0AE6", + zerogurmukhi: "0A66", + zerohackarabic: "0660", + zeroinferior: "2080", + zeromonospace: "FF10", + zerooldstyle: "F730", + zeropersian: "06F0", + zerosuperior: "2070", + zerothai: "0E50", + zerowidthjoiner: "FEFF", + zerowidthnonjoiner: "200C", + zerowidthspace: "200B", + zeta: "03B6", + zhbopomofo: "3113", + zhearmenian: "056A", + zhebrevecyrillic: "04C2", + zhecyrillic: "0436", + zhedescendercyrillic: "0497", + zhedieresiscyrillic: "04DD", + zihiragana: "3058", + zikatakana: "30B8", + zinorhebrew: "05AE", + zlinebelow: "1E95", + zmonospace: "FF5A", + zohiragana: "305E", + zokatakana: "30BE", + zparen: "24B5", + zretroflexhook: "0290", + zstroke: "01B6", + zuhiragana: "305A", + zukatakana: "30BA", +}; diff --git a/test.html b/test.html index b70351f2f..be6887578 100644 --- a/test.html +++ b/test.html @@ -7,6 +7,7 @@ + From ce9224538e205e25315d268ee1526c2cb2063ffd Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 01:51:27 +0200 Subject: [PATCH 27/72] Fix a bunch of errors reported by OTS except the error about 3-0-4 MS symbol in cmap --- PDFFont.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 0eb29d323..ed8df6e3b 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -926,7 +926,7 @@ Type1Font.prototype = { }; // Encode the glyph and add it to the FUX - var r = [[0x40, 0xEA]]; + var r = [[0x40, 0x0E]]; for (var i = 0; i < glyphs.length; i++) { var data = glyphs[i].slice(); var charstring = []; @@ -1102,6 +1102,10 @@ Type1Font.prototype = { var offset = aOffset; // length + // Per spec tables must be 4-bytes align so add some 0x00 if needed + while (aData.length & 3) + aData.push(0x00); + var length = aData.length; // checksum @@ -1216,7 +1220,7 @@ Type1Font.prototype = { 0x00, 0x00, 0x00, 0x00, // checksumAdjustement 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber 0x00, 0x00, // Flags - 0x00, 0x00, // unitsPerEM + 0x00, 0x40, // unitsPerEM (>= 16 && <=16384) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified 0x00, 0x00, // xMin @@ -1255,7 +1259,9 @@ Type1Font.prototype = { 0x00, 0x00, // -reserved- 0x00, 0x00 // metricDataFormat ]; - hhea = hhea.concat(this.encodeNumber(charstrings.length, 2)); // numberOfHMetrics + hhea = hhea.concat(this.integerToBytes(charstrings.length, 2)); // numberOfHMetrics + log(hhea); + var tableEntry = this.createTableEntry("hhea", virtualOffset, hhea); otf.set(tableEntry, currentOffset); currentOffset += tableEntry.length; From fdacb575c5601a205cdfc0c2d050ab57a536e0ef Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 01:56:21 +0200 Subject: [PATCH 28/72] Default the unit per EM size to 1000 --- PDFFont.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PDFFont.js b/PDFFont.js index ed8df6e3b..f96e3f04d 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1220,7 +1220,7 @@ Type1Font.prototype = { 0x00, 0x00, 0x00, 0x00, // checksumAdjustement 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber 0x00, 0x00, // Flags - 0x00, 0x40, // unitsPerEM (>= 16 && <=16384) + 0x03, 0xE8, // unitsPerEM (>= 16 && <=16384) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified 0x00, 0x00, // xMin From 828367a1b45e47643a763ff996c138f8bf8c729b Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 03:00:45 +0200 Subject: [PATCH 29/72] Make the Type1 to OTF code active, even if the sanitizer prevent fonts to load at the moment --- PDFFont.js | 86 +++++++++++++++++++++++++++++++++++++----------------- pdf.js | 5 ++-- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index f96e3f04d..1da594e28 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,3 +1,6 @@ + +var kMaxFontFileSize = 100000; + /** * This dictionary holds decoded fonts data. */ @@ -68,7 +71,13 @@ var TrueTypeFont = function(aFontName, aFontFile) { return; _Fonts[aFontName] = true; - //log("Loading a TrueType font: " + aFontName); + var debug = false; + function dump(aMsg) { + if (debug) + log(aMsg); + } + + dump("Loading a TrueType font: " + aFontName); var fontData = Base64Encoder.encode(aFontFile); Fonts.set(aFontName, fontData); @@ -648,6 +657,12 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { operandStack.push("rmoveto"); break; + case "div": + var num2 = operandStack.pop(); + var num1 = operandStack.pop(); + operandStack.push(num2 / num1); + break; + case "setcurrentpoint": case "dotsection": case "seac": @@ -680,7 +695,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var count = operandStack.pop(); var data = operandStack.pop(); if (index != 3) - log("callothersubr for index: " + index); + dump("callothersubr for index: " + index); operandStack.push(3); operandStack.push("callothersubr"); break; @@ -713,7 +728,7 @@ var Type1Font = function(aFontName, aFontFile) { if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); - if (!type1hack) { + if (!type1hack || true) { type1hack = true; var start = Date.now(); @@ -723,7 +738,13 @@ var Type1Font = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); var font = Fonts.get(fontName); - this.convertToOTF(this.convertToCFF(font), font); + var fontData = this.convertToOTF(this.convertToCFF(font), font); + fontData = Base64Encoder.encode(fontData); + Fonts.set(aFontName, fontData); + + // Add the css rule + var url = "url(data:font/otf;base64," + fontData + ");"; + document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); var end = Date.now(); log("Time to parse font is:" + (end - start)); @@ -846,6 +867,12 @@ Type1Font.prototype = { }, convertToCFF: function(aFont) { + var debug = false; + function dump(aMsg) { + if (debug) + log(aMsg); + }; + var charstrings = this.getOrderedCharStrings(aFont); var defaultWidth = this.getDefaultWidth(charstrings); @@ -856,7 +883,7 @@ Type1Font.prototype = { var subrs = aFont.get("Private").get("Subrs"); var parser = new Type1Parser(); for (var i = 0; i < charstrings.length; i++) { - var charstring = charstrings[i].charstring; + var charstring = charstrings[i].charstring.slice(); var glyph = charstrings[i].glyph; if (glyphsChecker[glyph]) error("glyphs already exists!"); @@ -867,10 +894,10 @@ Type1Font.prototype = { charstringsCount++; charstringsDataLength += flattened.length; } - //log("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); + dump("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); // Create a CFF font data - var cff = new Uint8Array(20000); + var cff = new Uint8Array(kMaxFontFileSize); var currentOffset = 0; // Font header (major version, minor version, header size, offset size) @@ -938,7 +965,12 @@ Type1Font.prototype = { error(c); charstring.push(token); } else { - var bytes = this.encodeNumber(c); + try { + var bytes = this.encodeNumber(c); + } catch(e) { + log("Glyph " + i + " has a wrong value: " + c + " in charstring: " + data); + log("the default value is glyph " + charstrings[i].glyph + " and is supposed to be: " + charstrings[i].charstring); + } for (var k = 0; k < bytes.length; k++) charstring.push(bytes[k]); } @@ -1035,20 +1067,17 @@ Type1Font.prototype = { currentOffset += shit.length; - log("==================== debug ===================="); - /* - log("== parse"); - var file = new Uint8Array(cff, 0, currentOffset); - var parser = new Type2Parser(); - parser.parse(new Stream(file)); - */ + dump("==================== debug ===================="); + //var file = new Uint8Array(cff, 0, currentOffset); + //var parser = new Type2Parser(); + //parser.parse(new Stream(file)); var data = []; for (var i = 0; i < currentOffset; i++) data.push(cff[i]); - log("== write to file"); - writeToFile(data, "/tmp/pdf.js.cff"); + //log("== write to file"); + //writeToFile(data, "/tmp/pdf.js.cff"); return data; }, @@ -1118,7 +1147,7 @@ Type1Font.prototype = { }, convertToOTF: function(aData, aFont) { - var otf = new Uint8Array(20000); + var otf = new Uint8Array(kMaxFontFileSize); var currentOffset = 0; var numTables = 9; @@ -1198,11 +1227,17 @@ Type1Font.prototype = { } for (var i = 0; i < charstrings.length; i++) { - var pos = GlyphsUnicode[charstrings[i].glyph]; + var glyph = charstrings[i].glyph; + if (glyph == ".notdef") + continue; + + var pos = GlyphsUnicode[glyph]; + if (!pos) + error(charstrings[i].glyph + " does not have an entry in the glyphs table"); var b1 = parseInt("0x" + pos[0] + pos[1]); var b2 = parseInt("0x" + pos[2] + pos[3]); - var pos = this.bytesToInteger([b1, b2]); - data[pos] = i + 1; + var num = this.bytesToInteger([b1, b2]); + data[num] = i + 1; } cmap = cmap.concat(data); @@ -1260,7 +1295,6 @@ Type1Font.prototype = { 0x00, 0x00 // metricDataFormat ]; hhea = hhea.concat(this.integerToBytes(charstrings.length, 2)); // numberOfHMetrics - log(hhea); var tableEntry = this.createTableEntry("hhea", virtualOffset, hhea); otf.set(tableEntry, currentOffset); @@ -1336,13 +1370,13 @@ Type1Font.prototype = { otf.set(table, currentOffset); currentOffset += table.length; } - log(currentOffset + "::" + virtualOffset + "\n"); - var data = []; + var fontData = []; for (var i = 0; i < currentOffset; i++) - data.push(otf[i]); + fontData.push(otf[i]); - writeToFile(data, "/tmp/pdf.js.otf"); + //writeToFile(data, "/tmp/pdf.js.otf"); + return fontData; } }; diff --git a/pdf.js b/pdf.js index ef8a18861..7bc788e20 100644 --- a/pdf.js +++ b/pdf.js @@ -2276,7 +2276,7 @@ var CanvasGraphics = (function() { if (!font) return; - var fontName = "Nimbus Roman No9 L"; + var fontName = ""; var subtype = font.get("Subtype").name; switch (subtype) { case "Type1": @@ -2285,7 +2285,8 @@ var CanvasGraphics = (function() { // XXX fetchIfRef looks expensive var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); - font = new Type1Font(fontDescriptor.get("FontName").name, fontFile); + fontName = fontDescriptor.get("FontName").name; + font = new Type1Font(fontName, fontFile); } break; From 632fcfed18fdfaaec728b2281cb5e270cee8f105 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 03:10:54 +0200 Subject: [PATCH 30/72] Turn on TrueType decoder even if the sanitizer prevent them to load at the moment --- PDFFont.js | 4 ++-- pdf.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 1da594e28..fa4faabdd 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -60,7 +60,7 @@ var Base64Encoder = { var str = []; var count = aData.length; for (var i = 0; i < count; i++) - str.push(aData.getChar ? aData.getChar : String.fromCharCode(aData[i])); + str.push(aData.getChar ? aData.getChar() : String.fromCharCode(aData[i])); return window.btoa(str.join("")); } @@ -71,7 +71,7 @@ var TrueTypeFont = function(aFontName, aFontFile) { return; _Fonts[aFontName] = true; - var debug = false; + var debug = true; function dump(aMsg) { if (debug) log(aMsg); diff --git a/pdf.js b/pdf.js index 7bc788e20..cdc035c28 100644 --- a/pdf.js +++ b/pdf.js @@ -2286,6 +2286,7 @@ var CanvasGraphics = (function() { var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); fontName = fontDescriptor.get("FontName").name; + fontName = fontName.replace("+", ""); // no + are allowed in the font name font = new Type1Font(fontName, fontFile); } break; @@ -2295,7 +2296,6 @@ var CanvasGraphics = (function() { break; case "TrueType": - break; var fontDescriptor = font.get("FontDescriptor"); if (fontDescriptor.num) { var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); From 066e4c45e0dd027dbb832e9e7178e2544f0f1bda Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 05:19:43 +0200 Subject: [PATCH 31/72] Remove the Type2 validation data from the tree --- t2data.js | 60 ------------------------------------------------------- test.html | 1 - 2 files changed, 61 deletions(-) delete mode 100644 t2data.js diff --git a/t2data.js b/t2data.js deleted file mode 100644 index 6e5f52344..000000000 --- a/t2data.js +++ /dev/null @@ -1,60 +0,0 @@ -var validationData = { -hyphen: ["-282","171","116","hstem","44","243","vstem","287","287","rmoveto","-243","-116","243","hlineto","endchar"], -period: ["-365","-13","20","hstem","125","156","rmoveto","-107","callsubr","endchar"], -zero: ["-13","28","645","28","hstem","24","158","136","158","vstem","250","688","rmoveto","-65","0","-66","-44","-41","-72","-32","-55","-22","-94","0","-83","rrcurveto","-206","94","-147","133","129","96","149","201","200","-97","151","-129","vhcurveto","68","-466","rmoveto","0","-59","-8","-78","-9","-27","-11","-29","-16","-14","-23","0","rrcurveto","-50","-19","59","148","hvcurveto","229","vlineto","151","19","58","48","49","20","-62","-147","vhcurveto","endchar"], -one: ["0","24","529","26","89","20","hstem","186","148","vstem","317","688","rmoveto","-252","-109","0","-26","rlineto","9","3","8","3","3","2","27","10","25","7","13","0","rrcurveto","25","11","-22","-47","hvcurveto","-385","vlineto","0","-82","-20","-17","-99","-1","rrcurveto","-24","375","24","vlineto","-91","2","-17","15","0","77","rrcurveto","570","vlineto","endchar"], -two: ["0","133","441","114","hstem","300","136","vstem","478","211","rmoveto","-24","hlineto","-29","-72","-12","-6","-102","0","rrcurveto","-151","0","162","154","rlineto","79","75","35","67","0","76","0","107","-77","76","-108","0","-50","0","-48","-20","-35","-36","-39","-39","-20","-34","-28","-75","rrcurveto","28","hlineto","31","62","37","28","54","0","44","0","33","-19","22","-36","12","-21","8","-28","0","-23","0","-44","-19","-55","-32","-49","-51","-78","-35","-42","-146","-156","rrcurveto","-23","416","vlineto","endchar"], -three: ["-14","50","571","81","hstemhm","283","146","-75","114","hintmask","58","523","rmoveto","38","61","33","23","52","0","63","0","39","-40","0","-63","0","-66","-35","-35","-95","-30","rrcurveto","-17","vlineto","83","-29","33","-17","36","-34","rrcurveto","hintmask","31","-29","18","-45","0","-48","0","-72","-38","-46","-60","0","-24","0","-19","11","-33","32","-39","39","-30","16","-30","0","rrcurveto","-38","-27","-24","-34","-54","61","-36","92","167","132","114","146","hvcurveto","0","46","-15","43","-29","35","-21","24","-17","12","-40","18","rrcurveto","hintmask","64","39","19","29","0","59","0","86","-61","51","-102","0","-99","0","-75","-50","-55","-103","rrcurveto","endchar"], -four: ["0","20","124","111","413","20","hstemhm","267","145","-143","143","hintmask","412","255","rmoveto","433","-80","vlineto","-59","-77","rlineto","-128","-167","-66","-92","-60","-95","rrcurveto","-113","248","-144","vlineto","hintmask","145","144","63","111","hlineto","-206","hmoveto","-209","0","209","297","rlineto","endchar"], -five: ["-8","61","496","127","hstem","373","78","vstem","149","549","rmoveto","278","0","43","127","-322","0","-97","-345","rlineto","95","-7","41","-6","44","-14","89","-28","53","-57","0","-66","0","-56","-44","-44","-56","0","-23","0","-27","12","-40","30","-43","32","-30","13","-26","0","rrcurveto","-36","-26","-25","-35","-53","58","-35","89","165","117","100","141","hvcurveto","0","104","-64","80","-109","31","-38","11","-31","4","-82","5","rrcurveto","endchar"], -six: ["-13","27","362","45","247","20","hstem","28","156","139","152","vstem","470","688","rmoveto","-133","-19","-62","-22","-76","-52","-112","-77","-59","-112","0","-133","rrcurveto","-173","92","-113","140","124","91","99","136","121","-73","78","-114","vhcurveto","-32","0","-22","-4","-30","-13","37","147","83","81","146","30","rrcurveto","-237","-286","rmoveto","67","23","-55","-159","-118","-12","-30","-49","hvcurveto","-23","0","-18","10","-10","19","-17","33","-10","73","0","93","0","62","6","57","7","5","8","6","14","4","14","0","rrcurveto","endchar"], -seven: ["0","20","519","137","hstem","477","676","rmoveto","-416","0","-44","-222","25","0","rlineto","19","64","24","21","55","0","rrcurveto","207","0","-200","-539","95","0","rlineto","endchar"], -eight: ["-13","32","640","29","hstemhm","28","121","-110","129","165","139","-133","119","hintmask","178","324","rmoveto","-57","-19","-23","-13","-26","-25","-29","-28","-15","-38","0","-44","rrcurveto","-102","85","-68","127","139","93","80","120","vhcurveto","0","87","-43","62","-107","68","rrcurveto","hintmask","96","34","40","41","0","64","rrcurveto","89","-76","56","-119","-135","-89","-70","-105","vhcurveto","0","-75","40","-55","99","-59","rrcurveto","120","98","rmoveto","-84","43","-46","55","0","56","rrcurveto","47","36","36","47","56","32","-45","-78","vhcurveto","0","-47","-8","-23","-33","-44","rrcurveto","hintmask","-98","-116","rmoveto","109","-73","24","-32","0","-74","rrcurveto","-67","-33","-41","-54","-61","-36","53","89","vhcurveto","0","55","11","31","40","59","rrcurveto","endchar"], -nine: ["-13","20","247","45","362","27","hstem","26","152","139","156","vstem","31","-13","rmoveto","140","20","68","25","78","59","102","77","54","107","0","127","rrcurveto","173","-92","113","-140","-125","-90","-99","-137","-118","73","-80","109","vhcurveto","38","0","26","5","27","14","-43","-152","-81","-79","-144","-29","rrcurveto","278","315","rmoveto","-1","-13","-1","-5","-3","-2","-7","-5","-19","-4","-15","0","rrcurveto","-62","-23","59","157","116","12","30","49","hvcurveto","25","0","17","-11","11","-23","15","-28","10","-70","0","-71","0","-27","-2","-32","-3","-45","rrcurveto","endchar"], -colon: ["-282","-13","169","147","169","hstem","166","156","rmoveto","-107","callsubr","316","vmoveto","-46","-38","-38","-45","-49","36","-37","48","47","38","38","46","47","-38","38","-47","hvcurveto","endchar"], -equal: ["-45","107","88","116","88","hstem","537","399","rmoveto","-504","-88","504","hlineto","-116","vmoveto","-504","-88","504","hlineto","endchar"], -A: ["107","0","25","172","39","223","20","191","20","hstem","689","25","rmoveto","-4","hlineto","-39","0","-16","21","-50","118","rrcurveto","-223","526","-28","0","-222","-548","rlineto","-38","-93","-12","-15","-48","-9","rrcurveto","-25","203","25","vlineto","-59","4","-23","11","0","26","0","13","8","25","21","54","rrcurveto","15","39","225","0","rlineto","34","-78","12","-36","0","-22","0","-21","-13","-10","-35","-2","-5","0","-13","-1","-14","-2","rrcurveto","-25","324","vlineto","-500","236","rmoveto","94","243","101","-243","rlineto","endchar"], -B: ["52","0","25","-25","32","614","30","-25","25","hstemhm","104","160","156","165","-138","172","hintmask","16","676","rmoveto","hintmask","-25","vlineto","69","-4","19","-16","0","-51","rrcurveto","-484","vlineto","0","-51","-15","-12","-73","-8","rrcurveto","hintmask","-25","322","vlineto","169","112","75","113","hvcurveto","0","45","-19","41","-35","31","-35","30","-34","15","-70","15","rrcurveto","hintmask","116","34","43","41","0","76","rrcurveto","103","-93","57","-167","vhcurveto","-61","-333","rmoveto","30","hlineto","hintmask","103","50","-54","-110","-97","-41","-50","-81","-44","-17","17","44","hvcurveto","504","vmoveto","36","15","13","39","vhcurveto","hintmask","70","32","-43","-94","hvcurveto","0","-105","-33","-28","-123","-3","rrcurveto","endchar"], -C: ["107","-19","48","629","33","hstem","49","177","vstem","657","152","rmoveto","-59","-62","-32","-24","-49","-19","-29","-12","-33","-6","-28","0","-66","0","-63","35","-29","52","-29","53","-14","72","0","100","0","207","64","110","120","0","47","0","43","-18","43","-38","rrcurveto","43","-39","23","-33","35","-76","rrcurveto","25","234","-27","hlineto","-14","-35","-11","-11","-19","0","-9","0","-15","4","-22","10","-59","24","-48","11","-48","0","rrcurveto","-199","-149","-154","-204","-206","147","-146","207","hvcurveto","71","0","59","17","55","37","32","22","22","20","45","50","rrcurveto","endchar"], -D: ["107","0","25","-25","35","607","34","-25","25","hstemhm","97","162","257","174","hintmask","97","91","rmoveto","0","-44","-25","-20","-58","-2","rrcurveto","hintmask","-25","316","vlineto","216","144","138","207","206","-140","125","-231","hvcurveto","-305","hlineto","hintmask","-25","vlineto","64","-6","19","-15","0","-46","rrcurveto","hintmask","162","23","rmoveto","23","20","12","39","vhcurveto","72","0","51","-33","34","-69","27","-53","14","-73","0","-84","0","-95","-20","-86","-32","-45","-34","-46","-48","-23","-65","0","rrcurveto","-45","-13","13","45","hvcurveto","endchar"], -E: ["52","0","25","-25","31","611","34","-25","25","hstemhm","104","162","hintmask","593","676","rmoveto","-577","hlineto","hintmask","-25","vlineto","69","-4","19","-16","0","-51","rrcurveto","-484","vlineto","0","-52","-14","-11","-74","-8","rrcurveto","hintmask","-25","585","vlineto","40","208","-28","0","rlineto","-31","-67","-20","-28","-36","-29","-46","-37","-55","-16","-76","0","rrcurveto","-64","-19","13","43","hvcurveto","242","vlineto","109","0","41","-39","12","-116","rrcurveto","26","338","-26","hlineto","-15","-114","-39","-36","-108","1","rrcurveto","232","vlineto","38","13","9","52","vhcurveto","163","0","49","-34","25","-133","rrcurveto","25","hlineto","endchar"], -F: ["-4","0","25","617","34","-25","25","hstemhm","104","162","hintmask","583","676","rmoveto","-567","hlineto","hintmask","-25","vlineto","69","-4","19","-15","0","-52","rrcurveto","-484","vlineto","0","-52","-14","-11","-74","-8","rrcurveto","-25","360","25","vlineto","-92","4","-18","12","0","55","rrcurveto","233","vlineto","102","-2","37","-37","14","-116","rrcurveto","25","338","-25","hlineto","-18","-114","-35","-35","-100","0","rrcurveto","232","vlineto","hintmask","37","13","10","51","vhcurveto","92","0","59","-17","31","-35","22","-25","11","-27","14","-63","rrcurveto","24","hlineto","endchar"], -G: ["163","-19","33","644","33","hstem","37","177","299","156","vstem","755","287","rmoveto","-343","-25","hlineto","86","-5","15","-11","0","-54","rrcurveto","-105","vlineto","-51","-29","-22","-67","vhcurveto","-63","0","-42","19","-33","43","-44","56","-21","86","0","120","0","209","63","111","121","0","46","0","43","-18","44","-38","43","-38","23","-34","35","-76","rrcurveto","25","234","-27","hlineto","-14","-35","-11","-11","-19","0","-9","0","-15","4","-22","10","-59","24","-48","11","-48","0","rrcurveto","-199","-149","-154","-206","-206","146","-144","210","hvcurveto","103","0","109","24","64","38","rrcurveto","127","vlineto","0","72","11","12","75","8","rrcurveto","endchar"], -I: ["-226","0","25","626","25","hstem","113","162","vstem","113","96","rmoveto","0","-50","-18","-14","-75","-7","rrcurveto","-25","350","25","vlineto","-76","5","-19","14","0","52","rrcurveto","484","vlineto","0","52","21","16","74","3","rrcurveto","25","-350","-25","vlineto","72","-5","21","-15","0","-51","rrcurveto","endchar"], -J: ["-96","33","714","25","hstem","3","116","109","162","vstem","390","559","rmoveto","0","72","14","15","75","5","rrcurveto","25","-352","-25","vlineto","81","-3","20","-14","0","-54","rrcurveto","-556","vlineto","-62","-19","-25","-45","-27","-18","13","19","vhcurveto","0","8","3","6","9","12","12","15","3","8","0","15","rrcurveto","40","-34","35","-39","-37","-33","-34","-38","vhcurveto","0","-41","28","-45","39","-23","26","-14","40","-9","40","0","rrcurveto","140","74","76","143","hvcurveto","endchar"], -L: ["52","0","25","-25","31","620","25","hstemhm","105","162","hintmask","638","227","rmoveto","-29","hlineto","-33","-78","-20","-31","-35","-32","-41","-37","-56","-18","-77","0","rrcurveto","-61","-19","13","43","hvcurveto","472","vlineto","0","75","14","13","87","4","rrcurveto","25","-349","-25","vlineto","68","-4","18","-16","0","-51","rrcurveto","-484","vlineto","hintmask","0","-51","-13","-11","-73","-9","rrcurveto","hintmask","-25","578","vlineto","endchar"], -M: ["329","0","25","626","25","hstem","105","42","531","155","vstem","678","609","rmoveto","-509","vlineto","0","-56","-14","-12","-75","-7","rrcurveto","-25","332","25","vlineto","-78","10","-10","11","0","71","rrcurveto","442","vlineto","0","72","15","15","73","5","rrcurveto","25","-252","vlineto","-200","-472","-200","472","-253","0","0","-25","rlineto","73","-6","16","-13","0","-52","rrcurveto","-475","vlineto","0","-60","-13","-12","-78","-8","rrcurveto","-25","234","25","vlineto","-82","6","-19","19","0","74","rrcurveto","0","470","252","-594","27","0","rlineto","endchar"], -N: ["107","-18","20","-2","25","626","25","hstemhm","104","44","431","44","hintmask","230","676","rmoveto","-211","-25","hlineto","20","0","17","-15","48","-57","rrcurveto","-474","vlineto","0","-57","-15","-14","-73","-9","rrcurveto","-25","227","25","vlineto","-77","9","-18","19","0","71","rrcurveto","0","402","rlineto","hintmask","447","-544","28","0","0","589","rlineto","0","57","13","13","65","10","rrcurveto","25","-215","-25","vlineto","74","-7","19","-21","0","-71","rrcurveto","-305","vlineto","endchar"], -O: ["163","-19","33","644","33","hstem","35","177","354","177","vstem","393","691","rmoveto","-209","-149","-148","-208","-207","147","-147","207","207","147","148","207","204","-149","151","-201","hvcurveto","-1","-33","rmoveto","110","64","-121","-208","-205","-62","-110","-115","-115","-62","110","202","216","63","116","117","hvcurveto","endchar"], -P: ["-4","0","25","616","35","-25","25","hstemhm","100","162","166","172","hintmask","262","303","rmoveto","135","1","34","3","44","18","81","31","44","61","0","77","rrcurveto","116","-95","66","-167","vhcurveto","-322","hlineto","hintmask","-25","vlineto","70","-6","14","-16","0","-70","rrcurveto","-442","vlineto","0","-62","-13","-20","-47","-6","-3","0","-10","-2","-11","-2","rrcurveto","-25","334","25","vlineto","-79","10","-9","9","0","73","rrcurveto","hintmask","489","vmoveto","23","17","12","33","83","33","-44","-108","vhcurveto","0","-63","-12","-38","-26","-23","-24","-20","-33","-7","-71","0","rrcurveto","endchar"], -R: ["107","0","25","617","34","-25","25","hstemhm","114","162","183","171","hintmask","715","25","rmoveto","-18","0","-13","6","-10","13","rrcurveto","-201","285","rlineto","59","19","25","13","28","25","29","27","16","40","0","45","rrcurveto","115","-99","63","-183","vhcurveto","-322","hlineto","hintmask","-25","vlineto","74","-5","14","-15","0","-72","rrcurveto","-442","vlineto","0","-73","-10","-10","-78","-9","rrcurveto","-25","338","25","vlineto","-78","10","-10","11","0","71","rrcurveto","196","27","vlineto","207","-313","205","0","rlineto","hintmask","-439","600","rmoveto","0","6","4","15","3","6","6","10","16","5","25","0","92","0","37","-43","0","-105","0","-64","-15","-39","-32","-21","-26","-17","-34","-7","-76","-1","rrcurveto","endchar"], -S: ["-59","-19","33","646","31","-19","20","hstemhm","44","109","241","119","hintmask","484","475","rmoveto","217","-30","vlineto","-7","-26","-8","-8","-17","0","-9","0","-11","3","-22","8","rrcurveto","hintmask","-47","16","-32","6","-38","0","-136","0","-83","-77","0","-126","0","-88","52","-63","119","-57","rrcurveto","67","-32","rlineto","88","-42","24","-26","0","-52","0","-69","-49","-45","-77","0","-58","0","-50","25","-38","49","-27","37","-14","33","-17","70","rrcurveto","-29","-247","29","hlineto","6","25","8","9","16","0","8","0","11","-3","22","-7","50","-17","37","-7","43","0","148","0","100","85","0","126","0","75","-46","75","-64","32","rrcurveto","-147","73","rlineto","-81","40","-22","25","0","48","0","62","42","38","68","0","45","0","42","-19","36","-37","34","-35","16","-29","20","-65","rrcurveto","endchar"], -T: ["52","0","25","631","20","hstem","253","162","vstem","253","117","rmoveto","0","-74","-11","-11","-86","-7","rrcurveto","-25","357","25","vlineto","-87","6","-11","10","0","76","rrcurveto","527","vlineto","123","-4","52","-46","17","-119","rrcurveto","29","0","-2","201","-600","0","-3","-201","29","0","rlineto","17","119","52","46","124","4","rrcurveto","endchar"], -V: ["107","-18","20","654","20","hstem","701","676","rmoveto","-213","-25","hlineto","71","-5","15","-7","0","-32","0","-16","-3","-12","-17","-43","rrcurveto","-127","-329","-138","334","rlineto","-19","45","-4","12","0","15","0","23","15","11","38","2","5","0","13","1","15","1","rrcurveto","25","-336","-25","vlineto","50","-7","9","-8","25","-55","rrcurveto","256","-599","27","0","228","587","rlineto","24","62","14","13","52","7","rrcurveto","endchar"], -W: ["385","-15","20","437","20","194","20","hstem","981","676","rmoveto","-182","-25","hlineto","54","-3","15","-10","0","-31","0","-13","-2","-15","-5","-14","rrcurveto","-112","-343","-108","336","rlineto","-10","32","-4","15","0","9","0","24","15","9","44","3","2","0","5","0","6","1","rrcurveto","25","-312","-25","vlineto","41","-2","20","-9","11","-24","rrcurveto","35","-96","-118","-308","-120","364","rlineto","-5","16","-2","8","0","9","0","29","12","9","52","4","rrcurveto","25","-294","-25","vlineto","42","-6","10","-9","17","-49","rrcurveto","212","-602","28","0","186","477","171","-477","27","0","200","602","rlineto","13","40","23","21","33","3","rrcurveto","endchar"], -a: ["-14","20","435","32","hstemhm","25","146","122","138","-137","137","hintmask","473","64","rmoveto","-10","-10","rlineto","-3","-3","-3","-1","-5","0","rrcurveto","-14","-7","9","16","hvcurveto","261","vlineto","84","-76","53","-122","-113","-76","-51","-75","-41","24","-25","41","40","28","24","34","vhcurveto","0","14","-6","13","-12","16","-9","10","-3","6","0","6","rrcurveto","21","28","16","35","vhcurveto","hintmask","58","26","-27","-61","hvcurveto","-73","vlineto","-119","-36","-49","-20","-37","-25","-43","-30","-21","-34","0","-43","0","-61","47","-45","64","0","57","0","46","20","55","50","11","-51","22","-19","49","0","43","0","31","16","38","41","rrcurveto","hintmask","-195","57","rmoveto","-27","-31","-20","-12","-24","0","-30","0","-21","27","0","40","0","58","42","42","80","21","rrcurveto","endchar"], -b: ["-59","-14","32","401","54","179","24","hstem","72","139","163","147","vstem","211","676","rmoveto","-194","-24","hlineto","46","-9","9","-9","0","-40","rrcurveto","-607","12","vlineto","79","56","rlineto","46","-41","36","-16","50","0","rrcurveto","133","93","104","149","138","-77","96","-111","hvcurveto","-48","0","-37","-17","-37","-39","rrcurveto","-57","vmoveto","17","43","20","16","33","0","rrcurveto","62","31","-67","-131","-138","-30","-65","-64","-42","-27","31","48","hvcurveto","endchar"], -c: ["-171","-14","67","389","31","hstem","25","141","vstem","412","109","rmoveto","-37","-42","-26","-14","-41","0","rrcurveto","-87","-55","87","136","103","32","63","52","hvcurveto","16","0","15","-8","6","-11","5","-9","0","0","0","-42","1","-49","18","-23","37","0","rrcurveto","42","26","24","39","62","-67","48","-88","-136","-100","-106","-144","-138","90","-99","124","hvcurveto","77","0","56","31","58","74","rrcurveto","endchar"], -d: ["-59","-14","56","-22","23","374","56","179","24","hstemhm","25","148","163","139","hintmask","339","-13","rmoveto","46","13","25","5","62","7","rrcurveto","62","8","0","23","rlineto","-46","3","-13","13","0","42","rrcurveto","575","-215","-24","vlineto","67","-5","9","-7","0","-46","rrcurveto","-183","vlineto","-43","46","-30","16","-46","0","rrcurveto","-110","-82","-107","-145","hvcurveto","hintmask","-136","76","-99","105","vhcurveto","53","0","33","16","47","50","rrcurveto","-3","60","rmoveto","0","-6","-10","-17","-12","-13","-20","-23","-21","-11","-22","0","rrcurveto","-53","-25","60","127","129","27","59","58","hvcurveto","33","0","31","-24","14","-38","rrcurveto","endchar"], -e: ["-171","-14","72","187","37","160","31","hstem","402","125","rmoveto","-40","-49","-31","-18","-43","0","-39","0","-29","18","-21","35","-18","32","-8","33","-4","69","rrcurveto","252","hlineto","-6","84","-15","47","-32","38","-33","39","-46","20","-55","0","rrcurveto","-125","-84","-99","-146","-146","82","-96","124","hvcurveto","81","0","49","32","65","93","rrcurveto","-262","171","rmoveto","3","120","18","40","49","0","28","0","19","-16","8","-31","5","-19","2","-28","2","-51","rrcurveto","-15","vlineto","endchar"], -f: ["-282","0","24","393","44","199","31","hstem","71","139","vstem","71","417","rmoveto","-333","vlineto","0","-43","-11","-12","-46","-5","rrcurveto","-24","278","24","vlineto","-70","2","-12","12","0","65","rrcurveto","314","87","44","-87","122","vlineto","55","15","22","35","18","11","-7","-12","vhcurveto","0","-4","-3","-7","-6","-10","-9","-15","-4","-11","0","-10","rrcurveto","-31","26","-24","34","37","25","25","37","59","-59","41","-84","vhcurveto","-67","0","-52","-24","-27","-44","-22","-35","-7","-41","0","-86","rrcurveto","-57","-44","hlineto","endchar"], -g: ["-206","32","122","120","87","27","216","53","-9","31","hstemhm","28","88","-79","137","132","135","-37","79","hintmask","-reserved-","482","398","rmoveto","53","-130","vlineto","hintmask","-reserved-","-44","16","-28","6","-40","0","-119","0","-84","-66","0","-95","0","-34","13","-33","23","-28","22","-24","19","-13","47","-20","-79","-27","-40","-38","0","-52","0","-41","18","-18","64","-23","rrcurveto","hintmask","-11","-63","-9","-33","-25","0","-41","rrcurveto","-58","75","-34","126","166","88","52","99","77","-62","46","-102","vhcurveto","-65","hlineto","-80","-20","7","28","30","28","22","40","hvcurveto","45","-1","rlineto","47","0","25","6","34","20","rrcurveto","hintmask","-reserved-","45","26","23","41","0","53","0","40","-12","30","-28","28","rrcurveto","-77","-450","rmoveto","hintmask","-11","54","26","-17","-33","-46","-55","-26","-99","-88","-46","23","44","hvcurveto","0","21","7","11","28","23","rrcurveto","hintmask","-reserved-","89","494","rmoveto","47","19","-38","-93","-93","-19","-36","-47","-48","-18","36","93","hvcurveto","94","19","37","47","vhcurveto","endchar"], -h: ["-59","0","24","382","67","179","24","hstem","69","139","138","139","vstem","208","676","rmoveto","-192","-24","hlineto","46","-9","7","-8","0","-41","rrcurveto","-510","vlineto","0","-42","-8","-9","-45","-9","rrcurveto","-24","241","24","vlineto","-37","5","-12","15","0","37","rrcurveto","267","vlineto","0","4","7","10","10","9","22","23","24","12","23","0","rrcurveto","35","17","-27","-56","hvcurveto","-242","vlineto","0","-37","-13","-16","-34","-4","rrcurveto","-24","235","24","vlineto","-36","3","-13","15","0","42","rrcurveto","248","vlineto","86","-53","55","-82","vhcurveto","-53","0","-37","-20","-52","-58","rrcurveto","endchar"], -i: ["-337","0","24","413","24","75","155","hstemhm","60","155","-146","139","hintmask","208","461","rmoveto","-192","-24","hlineto","44","-9","9","-9","0","-41","rrcurveto","-294","vlineto","0","-41","-7","-8","-46","-11","rrcurveto","-24","239","24","vlineto","-35","5","-12","14","0","38","rrcurveto","hintmask","-70","610","rmoveto","-43","-35","-35","-42","-45","33","-33","44","44","34","33","44","hvcurveto","43","-34","35","-43","vhcurveto","endchar"], -j: ["-282","-203","31","609","24","75","155","hstemhm","108","155","-142","139","hintmask","260","461","rmoveto","-202","-24","hlineto","51","-4","12","-12","0","-43","rrcurveto","-474","vlineto","-53","-15","-23","-33","-19","-13","7","10","vhcurveto","0","5","3","7","6","10","10","16","4","11","0","10","rrcurveto","30","-27","24","-32","-37","-25","-25","-36","-59","58","-41","82","vhcurveto","69","0","53","27","28","49","19","32","8","38","0","59","rrcurveto","hintmask","-75","689","rmoveto","-43","-34","-35","-42","-45","32","-33","45","43","35","34","43","hvcurveto","43","-35","35","-43","vhcurveto","endchar"], -k: ["-59","0","24","628","24","hstem","70","139","vstem","513","461","rmoveto","-214","-23","hlineto","11","-2","10","-1","3","-1","24","-3","11","-7","0","-13","0","-9","-10","-18","-11","-11","rrcurveto","-128","-128","0","431","-187","0","0","-24","rlineto","34","-3","14","-17","0","-38","rrcurveto","-510","vlineto","0","-39","-15","-18","-33","-3","rrcurveto","-24","239","24","vlineto","-47","7","-5","6","0","47","rrcurveto","0","114","23","24","95","-134","rlineto","17","-25","7","-12","0","-8","0","-12","-14","-6","-28","-1","rrcurveto","-24","234","24","vlineto","-11","0","-5","3","-9","12","rrcurveto","-194","268","rlineto","100","105","26","18","63","8","rrcurveto","endchar"], -l: ["-337","0","24","628","24","hstem","67","139","vstem","206","676","rmoveto","-190","-24","hlineto","35","-3","16","-18","0","-37","rrcurveto","-510","vlineto","0","-37","-17","-20","-34","-3","rrcurveto","-24","239","24","vlineto","-33","1","-16","19","0","40","rrcurveto","endchar"], -m: ["218","0","24","382","67","-36","24","hstemhm","71","139","138","139","138","139","hintmask","207","461","rmoveto","-191","-24","hlineto","44","-6","11","-12","0","-41","rrcurveto","-294","vlineto","0","-41","-11","-11","-44","-8","rrcurveto","-24","240","24","vlineto","-35","5","-11","14","0","38","rrcurveto","267","vlineto","0","6","16","19","13","10","rrcurveto","hintmask","21","16","17","7","17","0","rrcurveto","39","15","-23","-60","hvcurveto","-242","vlineto","0","-41","-11","-13","-37","-3","rrcurveto","-24","234","24","vlineto","-35","4","-12","15","0","38","rrcurveto","267","vlineto","0","5","16","19","13","10","22","17","17","7","17","0","rrcurveto","38","15","-24","-59","hvcurveto","-242","vlineto","0","-42","-11","-12","-38","-3","rrcurveto","-24","238","24","vlineto","-39","2","-11","12","0","43","rrcurveto","251","vlineto","86","-53","55","-82","vhcurveto","-57","0","-38","-23","-52","-64","-30","63","-35","24","-63","0","-63","0","-46","-27","-38","-60","rrcurveto","endchar"], -n: ["-59","0","24","382","67","-36","24","hstemhm","74","139","138","139","hintmask","212","461","rmoveto","-191","-24","hlineto","44","-7","9","-10","0","-42","rrcurveto","-294","vlineto","0","-42","-8","-9","-45","-9","rrcurveto","-24","241","24","vlineto","-37","5","-12","15","0","37","rrcurveto","267","vlineto","0","4","7","10","10","9","rrcurveto","hintmask","22","23","24","12","23","0","rrcurveto","35","17","-27","-56","hvcurveto","-242","vlineto","0","-37","-13","-16","-34","-4","rrcurveto","-24","235","24","vlineto","-39","3","-10","12","0","42","rrcurveto","251","vlineto","86","-53","55","-82","vhcurveto","-60","0","-46","-28","-37","-59","rrcurveto","endchar"], -o: ["-14","31","425","31","hstem","25","147","157","147","vstem","251","473","rmoveto","-127","-99","-105","-136","-143","95","-103","131","129","96","104","139","140","-96","104","-129","hvcurveto","1","-31","rmoveto","58","19","-55","-164","-154","-20","-52","-58","-59","-20","51","146","176","18","52","62","hvcurveto","endchar"], -p: ["-59","-205","21","-21","24","-24","205","-13","55","374","57","-36","24","hstemhm","75","139","162","148","hintmask","212","461","rmoveto","-191","-24","hlineto","44","-7","10","-10","0","-42","rrcurveto","-501","vlineto","0","-41","-8","-9","-48","-8","rrcurveto","hintmask","-24","273","vlineto","hintmask","21","vlineto","-61","3","-17","20","0","67","rrcurveto","141","vlineto","48","-47","26","-13","44","0","rrcurveto","112","80","104","148","139","-75","95","-111","hvcurveto","-59","0","-36","-23","-31","-58","rrcurveto","hintmask","2","-46","rmoveto","0","7","9","16","12","13","20","22","23","12","22","0","rrcurveto","52","24","-60","-131","-124","-28","-59","-57","hvcurveto","-33","0","-29","23","-15","38","rrcurveto","endchar"], -r: ["-171","0","24","413","24","-8","20","hstemhm","83","139","hintmask","218","461","rmoveto","-189","-24","hlineto","43","-6","11","-13","0","-40","rrcurveto","-294","vlineto","0","-41","-10","-11","-44","-8","rrcurveto","-24","266","24","vlineto","-61","4","-12","13","0","62","rrcurveto","189","vlineto","52","28","43","33","vhcurveto","8","0","9","-7","11","-16","19","-27","15","-9","26","0","rrcurveto","37","26","28","39","hvcurveto","hintmask","45","-34","33","-47","vhcurveto","-50","0","-38","-27","-47","-67","rrcurveto","endchar"], -s: ["-226","-14","34","420","33","hstem","27","100","138","96","vstem","340","326","rmoveto","145","-22","vlineto","-6","-15","-6","-5","-12","0","-6","0","-10","2","-16","5","-33","11","-23","4","-22","0","-91","0","-66","-62","0","-84","0","-66","41","-46","101","-43","68","-30","28","-25","0","-32","rrcurveto","-39","-30","-26","-45","vhcurveto","-70","0","-46","45","-21","87","rrcurveto","-28","-165","25","hlineto","11","21","6","7","9","0","5","0","8","-2","10","-4","27","-12","53","-11","28","0","91","0","63","62","0","90","0","71","-39","43","-99","42","-68","29","-28","25","0","34","rrcurveto","33","28","25","38","vhcurveto","26","0","27","-11","22","-21","21","-20","11","-18","15","-44","rrcurveto","endchar"], -t: ["-282","-12","71","358","44","hstem","72","139","vstem","305","461","rmoveto","-94","169","-25","hlineto","-61","-86","-40","-45","-65","-55","rrcurveto","-27","52","-324","vlineto","-65","43","-40","69","vhcurveto","67","0","40","30","41","82","rrcurveto","-25","11","rlineto","-20","-38","-16","-14","-21","0","rrcurveto","-28","-11","17","40","hvcurveto","301","94","vlineto","endchar"], -u: ["-59","-14","65","-31","23","394","24","hstemhm","65","139","138","139","hintmask","343","-13","rmoveto","43","15","24","4","65","7","rrcurveto","62","7","0","23","rlineto","-44","2","-12","13","0","43","rrcurveto","360","-201","-24","vlineto","50","-4","12","-12","0","-43","rrcurveto","-283","vlineto","hintmask","-33","-33","-21","-11","-28","0","rrcurveto","-41","-15","20","51","hvcurveto","339","-188","-24","vlineto","41","-8","8","-9","0","-42","rrcurveto","-252","vlineto","-88","50","-52","83","vhcurveto","52","0","35","16","58","50","rrcurveto","endchar"], -v: ["-14","20","435","20","hstem","485","461","rmoveto","-151","-24","hlineto","43","-2","12","-7","0","-24","0","-11","-4","-16","-7","-18","rrcurveto","-72","-182","-79","203","rlineto","-7","19","-2","6","0","6","0","15","10","7","25","2","2","0","8","1","8","1","rrcurveto","24","-250","-24","vlineto","23","-3","6","-3","6","-9","2","1","47","-100","16","-41","rrcurveto","120","-296","26","0","160","396","rlineto","19","44","8","8","31","3","rrcurveto","endchar"], -w: ["107","-14","20","435","20","hstem","707","461","rmoveto","-135","-24","hlineto","37","-4","11","-8","0","-23","0","-12","-13","-37","-31","-79","-15","-39","-8","-21","-12","-34","-10","41","-3","12","-21","74","-20","69","-7","28","0","9","0","16","10","5","38","3","rrcurveto","24","-234","-24","vlineto","39","-4","1","-1","19","-66","rrcurveto","6","-19","-68","-171","-24","64","rlineto","-9","21","-7","19","-5","14","-28","70","-11","32","0","13","0","16","10","8","28","4","rrcurveto","24","-222","-24","vlineto","26","-5","4","-6","27","-66","rrcurveto","148","-374","24","0","125","310","102","-310","23","0","155","401","rlineto","16","37","8","8","26","5","rrcurveto","endchar"], -x: ["0","20","421","20","hstem","484","24","rmoveto","-16","5","-7","4","-7","11","rrcurveto","-148","228","101","126","rlineto","19","23","20","11","31","5","rrcurveto","24","-168","-24","vlineto","9","-1","8","-1","3","0","23","-1","8","-6","0","-15","0","-15","-10","-17","-28","-32","-6","-6","-15","-19","-15","-21","-6","7","-4","6","-3","4","-32","42","-26","43","0","13","rrcurveto","0","12","14","6","33","1","rrcurveto","24","-250","-24","vlineto","26","-4","6","-5","20","-30","rrcurveto","128","-197","rlineto","-29","-37","-27","-33","-9","-14","-56","-74","-20","-17","-37","-2","rrcurveto","-24","169","24","vlineto","-36","2","-14","7","0","15","0","15","26","42","38","46","2","3","7","8","7","9","rrcurveto","42","-63","rlineto","23","-33","10","-19","0","-12","0","-12","-14","-6","-31","-2","rrcurveto","-24","241","vlineto","endchar"], -y: ["-205","57","589","20","hstem","16","119","vstem","480","461","rmoveto","-151","-24","hlineto","43","-2","12","-7","0","-24","0","-11","-2","-9","-9","-25","rrcurveto","-68","-192","-72","185","rlineto","-20","51","0","0","0","8","0","14","12","9","25","2","rrcurveto","16","1","0","24","-250","0","0","-24","rlineto","23","-3","6","-3","6","-9","3","0","45","-98","17","-42","rrcurveto","120","-295","-18","-53","rlineto","-17","-50","-26","-32","-23","0","-9","0","-8","8","0","9","0","1","0","2","1","3","1","5","1","5","0","4","rrcurveto","29","-24","20","-34","-37","-27","-26","-38","-46","40","-33","57","vhcurveto","34","0","29","12","21","21","21","23","20","40","35","94","rrcurveto","149","397","rlineto","17","42","10","10","31","3","rrcurveto","endchar"], -z: ["-171","0","32","409","20","hstem","420","160","rmoveto","-28","hlineto","-9","-32","-8","-18","-16","-21","-32","-44","-33","-13","-80","0","rrcurveto","-29","0","231","403","0","26","-371","0","-7","-142","26","0","rlineto","25","95","26","16","140","-1","rrcurveto","-234","-404","0","-25","383","0","rlineto","endchar"] -}; diff --git a/test.html b/test.html index be6887578..5d0b15403 100644 --- a/test.html +++ b/test.html @@ -6,7 +6,6 @@ - From 1f7aaf7b504208f414d21472057167d54041fb38 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 14:23:39 +0200 Subject: [PATCH 32/72] Add some code to make (the first one only...) font pass the sanitizer check --- PDFFont.js | 224 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 170 insertions(+), 54 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index fa4faabdd..d9074ff1e 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,3 +1,5 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- / +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ var kMaxFontFileSize = 100000; @@ -56,13 +58,18 @@ var Stack = function() { }; var Base64Encoder = { - encode: function(aData) { + encode: function(aFontName, aData) { var str = []; var count = aData.length; for (var i = 0; i < count; i++) str.push(aData.getChar ? aData.getChar() : String.fromCharCode(aData[i])); - return window.btoa(str.join("")); + // Add the css rule to the document + var fontData = window.btoa(str.join("")); + var url = "url(data:font/otf;base64," + fontData + ");"; + document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); + + return fontData; } }; @@ -78,12 +85,8 @@ var TrueTypeFont = function(aFontName, aFontFile) { } dump("Loading a TrueType font: " + aFontName); - var fontData = Base64Encoder.encode(aFontFile); + var fontData = Base64Encoder.encode(aFontName, aFontFile); Fonts.set(aFontName, fontData); - - // Add the css rule - var url = "url(data:font/ttf;base64," + fontData + ");"; - document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); }; var Type1Parser = function(aAsciiStream, aBinaryStream) { @@ -739,13 +742,9 @@ var Type1Font = function(aFontName, aFontFile) { var fontName = this.parser.parse(); var font = Fonts.get(fontName); var fontData = this.convertToOTF(this.convertToCFF(font), font); - fontData = Base64Encoder.encode(fontData); + fontData = Base64Encoder.encode(aFontName, fontData); Fonts.set(aFontName, fontData); - // Add the css rule - var url = "url(data:font/otf;base64," + fontData + ");"; - document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); - var end = Date.now(); log("Time to parse font is:" + (end - start)); } @@ -1082,19 +1081,9 @@ Type1Font.prototype = { return data; }, - - createOpenTypeHeader: function(aNumTables) { - // sfnt version (4 bytes) - var version = [0x4F, 0x54, 0x54, 0X4F]; - - // numTables (2 bytes) - var numTables = aNumTables; - - // searchRange (2bytes) - // XXX oh man this is dirty, there's probably something obvious to do to - // quickly get the maximum power of 2 value... + getMaxPower2: function(aNumber) { var maxPower = 0; - var value = numTables; + var value = aNumber; while (value >= 2) { value /= 2; maxPower++; @@ -1103,10 +1092,22 @@ Type1Font.prototype = { value = 2; for (var i = 1; i < maxPower; i++) value *= 2; - var searchRange = value * 16; + + return value; + }, + + createOpenTypeHeader: function(aNumTables) { + // sfnt version (4 bytes) + var version = [0x4F, 0x54, 0x54, 0X4F]; + + // numTables (2 bytes) + var numTables = aNumTables; + + // searchRange (2 bytes) + var searchRange = this.getMaxPower2(numTables) * 16; // entrySelector (2 bytes) - var entrySelector = Math.log(value) / Math.log(2); + var entrySelector = Math.log(this.getMaxPower2(numTables)) / Math.log(2); // rangeShift (2 bytes) var rangeShift = numTables * 16 - searchRange; @@ -1210,36 +1211,150 @@ Type1Font.prototype = { /** CMAP */ var charstrings = this.getOrderedCharStrings(aFont); - var cmap = [ - 0x00, 0x00, // version - 0x00, 0x01, // numTables - 0x00, 0x01, // platformID - 0x00, 0x00, // encodingID - 0x00, 0x00, 0x00, 0x0C, //offset - 0x00, 0x00, - 0x01, 0x06, - 0x00, 0x00 - ]; + if (false) { + var cmap = [ + 0x00, 0x00, // version + 0x00, 0x01, // numTables + 0x00, 0x01, // platformID + 0x00, 0x00, // encodingID + 0x00, 0x00, 0x00, 0x0C, //offset + 0x00, 0x00, + 0x01, 0x06, + 0x00, 0x00 + ]; - var data = []; - for (var i = 0; i < 262; i++) { - data.push(0x00); + var data = []; + for (var i = 0; i < 262; i++) { + data.push(0x00); + } + + for (var i = 0; i < charstrings.length; i++) { + var glyph = charstrings[i].glyph; + if (glyph == ".notdef") + continue; + + var pos = GlyphsUnicode[glyph]; + if (!pos) + error(charstrings[i].glyph + " does not have an entry in the glyphs table"); + var b1 = parseInt("0x" + pos[0] + pos[1]); + var b2 = parseInt("0x" + pos[2] + pos[3]); + var num = this.bytesToInteger([b1, b2]); + data[num] = i + 1; + } + cmap = cmap.concat(data); } + else { + var data = new Array(1000); + for (var i = 0; i < charstrings.length; i++) { + var glyph = charstrings[i].glyph; + if (glyph == ".notdef") + continue; - for (var i = 0; i < charstrings.length; i++) { - var glyph = charstrings[i].glyph; - if (glyph == ".notdef") - continue; + var pos = GlyphsUnicode[glyph]; + if (!pos) + error(charstrings[i].glyph + " does not have an entry in the glyphs table"); + var b1 = parseInt("0x" + pos[0] + pos[1]); + var b2 = parseInt("0x" + pos[2] + pos[3]); + var num = this.bytesToInteger([b1, b2]); + data[num] = i + 1; + } - var pos = GlyphsUnicode[glyph]; - if (!pos) - error(charstrings[i].glyph + " does not have an entry in the glyphs table"); - var b1 = parseInt("0x" + pos[0] + pos[1]); - var b2 = parseInt("0x" + pos[2] + pos[3]); - var num = this.bytesToInteger([b1, b2]); - data[num] = i + 1; + var ranges = []; + var range = []; + for (var i = 0; i < data.length; i++) { + var char = data[i]; + if (char) { + range.push(i); + } else if (range.length) { + //log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); + ranges.push(range.slice()); + range = []; + } + } + + + var cmap = []; + var segCount = ranges.length + 1; + + var segCount2 = segCount * 2; + var searchRange = this.getMaxPower2(segCount) * 2; + var searchEntry = Math.log(segCount) / Math.log(2); + var rangeShift = 2 * segCount - searchRange; + cmap = cmap.concat(this.integerToBytes(segCount2, 2)); + cmap = cmap.concat(this.integerToBytes(searchRange, 2)); + cmap = cmap.concat(this.integerToBytes(searchEntry, 2)); + cmap = cmap.concat(this.integerToBytes(rangeShift, 2)); + + // End characters code with an additional 0xFFFF to finish the array + var endCodes = []; + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat(this.integerToBytes(range[range.length - 1], 2)); + }; + cmap = cmap.concat([0xFF, 0xFF]); + + // reserved pad + cmap = cmap.concat([0x00, 0x00]); + + // Start characters code with an additional 0xFFFF to finish the array + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat(this.integerToBytes(range[0], 2)); + }; + cmap = cmap.concat([0xFF, 0xFF]); + + + // Fill idDelta + var idDelta = []; + var delta = 0; + var p = 1; + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + var start = range[0]; + var end = range[range.length - 1]; + var diff = end - start; + var delta = -(start - p); + + var value = this.integerToBytes(-delta, 2); + value[0] ^= 0xFF; + value[1] ^= 0xFF; + value[1] += 1; + + cmap = cmap.concat([value[0], value[1]]); + delta -= range.length; + p += range.length; + }; + cmap = cmap.concat([0x00, 0x01]); + + + // Fill id Offsets with 0x00 + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat([0x00, 0x00]); + }; + cmap = cmap.concat([0x00, 0x00]); + + + var cmapHeader = [ + 0x00, 0x00, // version + 0x00, 0x01, // numTables + 0x00, 0x03, // platformID + 0x00, 0x01, // encodingID + 0x00, 0x00, 0x00, 0x0C, // start of the table record + 0x00, 0x04 // format + ]; + cmapHeader = cmapHeader.concat(this.integerToBytes(cmap.length + 6, 2)); // length + cmapHeader = cmapHeader.concat(0x00, 0x00); // language + + // Fill up data! + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + for (var j = 0; j < range.length; j++) { + cmap = cmap.concat(range[j]); + } + }; + cmap = cmapHeader.concat(cmap); } - cmap = cmap.concat(data); var tableEntry = this.createTableEntry("cmap", virtualOffset, cmap); otf.set(tableEntry, currentOffset); @@ -1321,7 +1436,7 @@ Type1Font.prototype = { var maxp = [ 0x00, 0x00, 0x50, 0x00, // Version number - ].concat(this.integerToBytes(charstrings.length, 2)); // Num of glyphs + ].concat(this.integerToBytes(charstrings.length + 1, 2)); // Num of glyphs (+1 to pass the sanitizer...) var tableEntry = this.createTableEntry("maxp", virtualOffset, maxp); otf.set(tableEntry, currentOffset); @@ -1344,12 +1459,13 @@ Type1Font.prototype = { /** POST */ + // XXX get those info from the Font dict! var post = [ 0x00, 0x03, 0x00, 0x00, // Version number 0x00, 0x00, 0x01, 0x00, // italicAngle 0x00, 0x00, // underlinePosition 0x00, 0x00, // underlineThickness - 0x00, 0x00, 0x00, 0x01, // isFixedPitch + 0x00, 0x00, 0x00, 0x00, // isFixedPitch 0x00, 0x00, 0x00, 0x00, // minMemType42 0x00, 0x00, 0x00, 0x00, // maxMemType42 0x00, 0x00, 0x00, 0x00, // minMemType1 @@ -1375,7 +1491,7 @@ Type1Font.prototype = { for (var i = 0; i < currentOffset; i++) fontData.push(otf[i]); - //writeToFile(data, "/tmp/pdf.js.otf"); + writeToFile(fontData, "/tmp/pdf.js.otf"); return fontData; } }; From 6e7e8ee64ca2921551dda35b145aa47f47a20f18 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 21:53:39 +0200 Subject: [PATCH 33/72] OTF fonts works with the sanitizer (woot) --- PDFFont.js | 85 ++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 51 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index d9074ff1e..468b86ba1 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -721,7 +721,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }; -var type1hack = false; +var fontCount = 0; var Type1Font = function(aFontName, aFontFile) { if (_Fonts[aFontName]) return; @@ -731,8 +731,8 @@ var Type1Font = function(aFontName, aFontFile) { if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); - if (!type1hack || true) { - type1hack = true; + if (!fontCount || true) { + fontCount++; var start = Date.now(); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); @@ -851,17 +851,27 @@ Type1Font.prototype = { var dict = aFont.get("CharStrings") var charstrings = []; for (var glyph in dict.map) { + var unicode = GlyphsUnicode[glyph]; + if (!unicode) { + if (glyph != ".notdef") + warn(glyph + " does not have an entry in the glyphs unicode dictionary"); + continue; + } + + var b1 = parseInt("0x" + unicode[0] + unicode[1]); + var b2 = parseInt("0x" + unicode[2] + unicode[3]); + unicode = this.bytesToInteger([b1, b2]); + charstrings.push({ glyph: glyph, + unicode: unicode, charstring: dict.map[glyph].slice() }); } charstrings.sort(function(a, b) { - return CFFStrings.indexOf(a.glyph) > CFFStrings.indexOf(b.glyph); + return a.unicode > b.unicode; }); - charstrings.shift(); - return charstrings; }, @@ -1071,14 +1081,14 @@ Type1Font.prototype = { //var parser = new Type2Parser(); //parser.parse(new Stream(file)); - var data = []; + var fontData = []; for (var i = 0; i < currentOffset; i++) - data.push(cff[i]); + fontData.push(cff[i]); //log("== write to file"); - //writeToFile(data, "/tmp/pdf.js.cff"); + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".cff"); - return data; + return fontData; }, getMaxPower2: function(aNumber) { @@ -1228,36 +1238,14 @@ Type1Font.prototype = { data.push(0x00); } - for (var i = 0; i < charstrings.length; i++) { - var glyph = charstrings[i].glyph; - if (glyph == ".notdef") - continue; - - var pos = GlyphsUnicode[glyph]; - if (!pos) - error(charstrings[i].glyph + " does not have an entry in the glyphs table"); - var b1 = parseInt("0x" + pos[0] + pos[1]); - var b2 = parseInt("0x" + pos[2] + pos[3]); - var num = this.bytesToInteger([b1, b2]); - data[num] = i + 1; - } + for (var i = 0; i < charstrings.length; i++) + data[charstrings[i].unicode] = i + 1; cmap = cmap.concat(data); } else { var data = new Array(1000); - for (var i = 0; i < charstrings.length; i++) { - var glyph = charstrings[i].glyph; - if (glyph == ".notdef") - continue; - - var pos = GlyphsUnicode[glyph]; - if (!pos) - error(charstrings[i].glyph + " does not have an entry in the glyphs table"); - var b1 = parseInt("0x" + pos[0] + pos[1]); - var b2 = parseInt("0x" + pos[2] + pos[3]); - var num = this.bytesToInteger([b1, b2]); - data[num] = i + 1; - } + for (var i = 0; i < charstrings.length; i++) + data[charstrings[i].unicode] = i + 1; var ranges = []; var range = []; @@ -1266,7 +1254,10 @@ Type1Font.prototype = { if (char) { range.push(i); } else if (range.length) { - //log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); + if (0) { + log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); + log("range content is: " + range); + } ranges.push(range.slice()); range = []; } @@ -1286,7 +1277,6 @@ Type1Font.prototype = { cmap = cmap.concat(this.integerToBytes(rangeShift, 2)); // End characters code with an additional 0xFFFF to finish the array - var endCodes = []; for (var i = 0; i < ranges.length; i++) { var range = ranges[i]; cmap = cmap.concat(this.integerToBytes(range[range.length - 1], 2)); @@ -1303,25 +1293,20 @@ Type1Font.prototype = { }; cmap = cmap.concat([0xFF, 0xFF]); - // Fill idDelta - var idDelta = []; var delta = 0; - var p = 1; + var p = 0; for (var i = 0; i < ranges.length; i++) { var range = ranges[i]; var start = range[0]; - var end = range[range.length - 1]; - var diff = end - start; - var delta = -(start - p); + var delta = ((start - 1) - p) % 65536; - var value = this.integerToBytes(-delta, 2); + var value = this.integerToBytes(delta, 2); value[0] ^= 0xFF; value[1] ^= 0xFF; value[1] += 1; - cmap = cmap.concat([value[0], value[1]]); - delta -= range.length; + p += range.length; }; cmap = cmap.concat([0x00, 0x01]); @@ -1334,7 +1319,6 @@ Type1Font.prototype = { }; cmap = cmap.concat([0x00, 0x00]); - var cmapHeader = [ 0x00, 0x00, // version 0x00, 0x01, // numTables @@ -1349,9 +1333,8 @@ Type1Font.prototype = { // Fill up data! for (var i = 0; i < ranges.length; i++) { var range = ranges[i]; - for (var j = 0; j < range.length; j++) { + for (var j = 0; j < range.length; j++) cmap = cmap.concat(range[j]); - } }; cmap = cmapHeader.concat(cmap); } @@ -1491,7 +1474,7 @@ Type1Font.prototype = { for (var i = 0; i < currentOffset; i++) fontData.push(otf[i]); - writeToFile(fontData, "/tmp/pdf.js.otf"); + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; From 84c2e99bef768bfb7a47376ab08275dc3213aba3 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Sun, 12 Jun 2011 23:41:57 +0200 Subject: [PATCH 34/72] Add some missing glyphs --- PDFFont.js | 10 ++++++++-- glyphlist.js | 8 +++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 468b86ba1..84fdf94b1 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -929,7 +929,7 @@ Type1Font.prototype = { var familyName = fontInfo.get("FamilyName"); var weight = fontInfo.get("Weight"); var strings = [version, notice, fullName, - familyName, weight]; + familyName, weight, "asteriskmath"]; var stringsIndex = this.createCFFIndexHeader(strings); var stringsDataLength = stringsIndex.length; @@ -940,6 +940,8 @@ Type1Font.prototype = { var charset = [0x00]; for (var i = 0; i < glyphs.length; i++) { var index = CFFStrings.indexOf(charstrings[i].glyph); + if (index == -1) + index = CFFStrings.length + strings.indexOf(glyph); var bytes = this.integerToBytes(index, 2); charset.push(bytes[0]); charset.push(bytes[1]); @@ -1103,6 +1105,10 @@ Type1Font.prototype = { for (var i = 1; i < maxPower; i++) value *= 2; + if (fontCount == 5) { + log ("mp2: " + aNumber + "::" + value); + } + return value; }, @@ -1474,7 +1480,7 @@ Type1Font.prototype = { for (var i = 0; i < currentOffset; i++) fontData.push(otf[i]); - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); + writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; diff --git a/glyphlist.js b/glyphlist.js index de07af4a5..e147f6c8a 100644 --- a/glyphlist.js +++ b/glyphlist.js @@ -4279,5 +4279,11 @@ var GlyphsUnicode = { zretroflexhook: "0290", zstroke: "01B6", zuhiragana: "305A", - zukatakana: "30BA", + zukatakana: "30BA" }; + +// Add missing glyphs from the original Adobe's list +GlyphsUnicode["angbracketleft"] = "3008"; +GlyphsUnicode["angbracketright"] = "3009"; +GlyphsUnicode["circlecopyrt"] = "00A9"; + From 0a135091deb96caf165b51ed5bb050712f3708f9 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 01:38:05 +0200 Subject: [PATCH 35/72] Add a root 'Font' class as the outside world API --- PDFFont.js | 140 +++++++++++++++++++++++++++++++---------------------- pdf.js | 39 +++------------ 2 files changed, 91 insertions(+), 88 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 84fdf94b1..15e5f2c32 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,19 +1,86 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- / /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/** + * Hold a map of decoded fonts and of the standard fourteen Type1 fonts and + * their acronyms. + * TODO Add the standard fourteen Type1 fonts list by default + * http://cgit.freedesktop.org/poppler/poppler/tree/poppler/GfxFont.cc#n65 + */ +var Fonts = {}; + + +/** + * 'Font' is the class the outside world should use, it encapsulate all the font + * decoding logics whatever type it is (assuming the font type is supported). + * + * For example to read a Type1 font and to attach it to the document: + * var type1Font = new Font("MyFontName", binaryData, "Type1"); + * type1Font.bind(); + * + * As an improvment the last parameter can be replaced by an automatic guess + * of the font type based on the first byte of the file. + */ +var Font = function(aFontName, aFontFile, aFontType) { + this.name = aFontName; + + // If the font has already been decoded simply return + if (Fonts[aFontName]) { + this.font = Fonts[aFontName]; + return; + } + + switch (aFontType) { + case "Type1": + this.mimetype = "font/otf"; + this.font = new Type1(aFontName, aFontFile); + break; + case "TrueType": + this.mimetype = "font/ttf"; + this.font = new TrueType(aFontName, aFontFile); + break; + default: + error("Font " + aFontType + " is not supported"); + break; + } + + Fonts[aFontName] = this.font; + this.bind(); +}; + +Font.prototype = { + name: null, + font: null, + mimetype: null, + + bind: function() { + var data = this.font.data; + + // Compute the binary data to base 64 + var str = []; + var count = data.length; + for (var i = 0; i < count; i++) + str.push(data.getChar ? data.getChar() + : String.fromCharCode(data[i])); + + var dataBase64 = window.btoa(str.join("")); + + // Add the @font-face rule to the document + var url = "url(data:" + this.mimetype + ";base64," + dataBase64 + ");"; + var rule = "@font-face { font-family:'" + this.name + "';src:" + url + "}"; + var styleSheet = document.styleSheets[0]; + styleSheet.insertRule(rule, styleSheet.length); + } +}; + +/** Implementation dirty logic starts here */ + var kMaxFontFileSize = 100000; /** * This dictionary holds decoded fonts data. */ -var Fonts = new Dict(); - -/** - * This simple object keep a trace of the fonts that have already been decoded - * by storing a map between the name given by the PDF and the name gather from - * the font (aka the PostScript code of the font itself for Type1 font). - */ -var _Fonts = {}; +var PSFonts = new Dict(); var Stack = function() { @@ -57,27 +124,7 @@ var Stack = function() { }; }; -var Base64Encoder = { - encode: function(aFontName, aData) { - var str = []; - var count = aData.length; - for (var i = 0; i < count; i++) - str.push(aData.getChar ? aData.getChar() : String.fromCharCode(aData[i])); - - // Add the css rule to the document - var fontData = window.btoa(str.join("")); - var url = "url(data:font/otf;base64," + fontData + ");"; - document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); - - return fontData; - } -}; - -var TrueTypeFont = function(aFontName, aFontFile) { - if (_Fonts[aFontName]) - return; - _Fonts[aFontName] = true; - +var TrueType = function(aFontName, aFontFile) { var debug = true; function dump(aMsg) { if (debug) @@ -85,8 +132,7 @@ var TrueTypeFont = function(aFontName, aFontFile) { } dump("Loading a TrueType font: " + aFontName); - var fontData = Base64Encoder.encode(aFontName, aFontFile); - Fonts.set(aFontName, fontData); + this.data = aFontFile; }; var Type1Parser = function(aAsciiStream, aBinaryStream) { @@ -485,14 +531,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { case "def": var value = operandStack.pop(); var key = operandStack.pop(); - - // XXX we don't want to do that here but for some reasons the names - // are different between what is declared and the FontName directive - if (key == "FontName" && Fonts.get(value)) { - // The font has already be decoded, stop! - return true; - } - dump("def: " + key + " = " + value); dictionaryStack.peek().set(key, value); break; @@ -504,7 +542,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { // The key will be the identifier to recognize this font fontName = key; - Fonts.set(key, font); + PSFonts.set(key, font); operandStack.push(font); break; @@ -722,11 +760,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var fontCount = 0; -var Type1Font = function(aFontName, aFontFile) { - if (_Fonts[aFontName]) - return; - _Fonts[aFontName] = true; - +var Type1 = function(aFontName, aFontFile) { // All Type1 font program should begin with the comment %! if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) error("Invalid file header"); @@ -740,17 +774,14 @@ var Type1Font = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); - var font = Fonts.get(fontName); - var fontData = this.convertToOTF(this.convertToCFF(font), font); - fontData = Base64Encoder.encode(aFontName, fontData); - Fonts.set(aFontName, fontData); - + var font = PSFonts.get(fontName); + this.data = this.convertToOTF(this.convertToCFF(font), font); var end = Date.now(); log("Time to parse font is:" + (end - start)); } }; -Type1Font.prototype = { +Type1.prototype = { getDefaultWidth: function(aCharstrings) { var defaultWidth = 0; var defaultUsedCount = 0; @@ -1104,11 +1135,6 @@ Type1Font.prototype = { value = 2; for (var i = 1; i < maxPower; i++) value *= 2; - - if (fontCount == 5) { - log ("mp2: " + aNumber + "::" + value); - } - return value; }, @@ -1480,7 +1506,7 @@ Type1Font.prototype = { for (var i = 0; i < currentOffset; i++) fontData.push(otf[i]); - writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; diff --git a/pdf.js b/pdf.js index cdc035c28..06928ca09 100644 --- a/pdf.js +++ b/pdf.js @@ -2278,37 +2278,14 @@ var CanvasGraphics = (function() { var fontName = ""; var subtype = font.get("Subtype").name; - switch (subtype) { - case "Type1": - var fontDescriptor = font.get("FontDescriptor"); - if (fontDescriptor.num) { - // XXX fetchIfRef looks expensive - var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); - var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); - fontName = fontDescriptor.get("FontName").name; - fontName = fontName.replace("+", ""); // no + are allowed in the font name - font = new Type1Font(fontName, fontFile); - } - break; - - case "Type3": - TODO("support Type3 font"); - break; - - case "TrueType": - var fontDescriptor = font.get("FontDescriptor"); - if (fontDescriptor.num) { - var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); - var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile2")); - fontName = fontDescriptor.get("FontName").name; - fontName = fontName.replace("+", ""); // no + are allowed in the font name - font = new TrueTypeFont(fontName, fontFile); - } - break; - - default: - error("Unsupported font type: " + subtype); - break; + var fontDescriptor = font.get("FontDescriptor"); + if (fontDescriptor.num) { + var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); + var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); + if (!fontFile) + fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile2")); + fontName = fontDescriptor.get("FontName").name.replace("+", " "); + new Font(fontName, fontFile, subtype); } this.current.fontSize = size; From a8ce1d24e95e73ae4862ef7ae81d69240839adc2 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 02:30:16 +0200 Subject: [PATCH 36/72] Beginning of the separatation of the Type1/CFF/OTF code --- PDFFont.js | 882 ++++++++++++++++++++++++++--------------------------- 1 file changed, 440 insertions(+), 442 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 15e5f2c32..8cddb2360 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1,6 +1,12 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- / /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/** + * Maximum file size of the font. + */ +var kMaxFontFileSize = 40000; + + /** * Hold a map of decoded fonts and of the standard fourteen Type1 fonts and * their acronyms. @@ -32,20 +38,32 @@ var Font = function(aFontName, aFontFile, aFontType) { switch (aFontType) { case "Type1": + // All Type1 font program should begin with the comment %! + if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) + error("Invalid file header"); + + var cff = new CFF(aFontName, aFontFile); this.mimetype = "font/otf"; - this.font = new Type1(aFontName, aFontFile); + + // Wrap the CFF data inside an OTF font file + this.font = this.cover(cff); break; + case "TrueType": this.mimetype = "font/ttf"; - this.font = new TrueType(aFontName, aFontFile); + var ttf = new TrueType(aFontName, aFontFile); + this.font = ttf.data; break; + default: - error("Font " + aFontType + " is not supported"); + warn("Font " + aFontType + " is not supported"); break; } - Fonts[aFontName] = this.font; + // Attach the font to the document this.bind(); + + Fonts[aFontName] = this.font; }; Font.prototype = { @@ -53,8 +71,8 @@ Font.prototype = { font: null, mimetype: null, - bind: function() { - var data = this.font.data; + bind: function font_bind() { + var data = this.font; // Compute the binary data to base 64 var str = []; @@ -70,12 +88,413 @@ Font.prototype = { var rule = "@font-face { font-family:'" + this.name + "';src:" + url + "}"; var styleSheet = document.styleSheets[0]; styleSheet.insertRule(rule, styleSheet.length); + }, + + _createOpenTypeHeader: function font_createOpenTypeHeader(aNumTables) { + // sfnt version (4 bytes) + var version = [0x4F, 0x54, 0x54, 0X4F]; + + // numTables (2 bytes) + var numTables = aNumTables; + + // searchRange (2 bytes) + var tablesMaxPower2 = FontsUtils.getMaxPower2(numTables); + var searchRange = tablesMaxPower2 * 16; + + // entrySelector (2 bytes) + var entrySelector = Math.log(tablesMaxPower2) / Math.log(2); + + // rangeShift (2 bytes) + var rangeShift = numTables * 16 - searchRange; + + return [].concat(version, + FontsUtils.integerToBytes(numTables, 2), + FontsUtils.integerToBytes(searchRange, 2), + FontsUtils.integerToBytes(entrySelector, 2), + FontsUtils.integerToBytes(rangeShift, 2)); + }, + + _createTableEntry: function font_createTableEntry(aTag, aOffset, aData) { + // tag + var tag = [ + aTag.charCodeAt(0), + aTag.charCodeAt(1), + aTag.charCodeAt(2), + aTag.charCodeAt(3) + ]; + + // offset + var offset = aOffset; + + // Per spec tables must be 4-bytes align so add some 0x00 if needed + while (aData.length & 3) + aData.push(0x00); + + // length + var length = aData.length; + + // checksum + var checksum = FontsUtils.bytesToInteger(tag) + offset + length; + + return [].concat(tag, + FontsUtils.integerToBytes(checksum, 4), + FontsUtils.integerToBytes(offset, 4), + FontsUtils.integerToBytes(length, 4)); + }, + + _createCMAPTable: function font_createCMAPTable(aGlyphs) { + var data = new Array(1000); + for (var i = 0; i < aGlyphs.length; i++) + data[aGlyphs[i].unicode] = i + 1; + + var ranges = []; + var range = []; + for (var i = 0; i < data.length; i++) { + var char = data[i]; + if (char) { + range.push(i); + } else if (range.length) { + if (0) { + log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); + log("range content is: " + range); + } + ranges.push(range.slice()); + range = []; + } + } + + + var cmap = []; + var segCount = ranges.length + 1; + + var segCount2 = segCount * 2; + var searchRange = FontsUtils.getMaxPower2(segCount) * 2; + var searchEntry = Math.log(segCount) / Math.log(2); + var rangeShift = 2 * segCount - searchRange; + cmap = cmap.concat(FontsUtils.integerToBytes(segCount2, 2)); + cmap = cmap.concat(FontsUtils.integerToBytes(searchRange, 2)); + cmap = cmap.concat(FontsUtils.integerToBytes(searchEntry, 2)); + cmap = cmap.concat(FontsUtils.integerToBytes(rangeShift, 2)); + + // End characters code with an additional 0xFFFF to finish the array + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat(FontsUtils.integerToBytes(range[range.length - 1], 2)); + }; + cmap = cmap.concat([0xFF, 0xFF]); + + // reserved pad + cmap = cmap.concat([0x00, 0x00]); + + // Start characters code with an additional 0xFFFF to finish the array + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat(FontsUtils.integerToBytes(range[0], 2)); + }; + cmap = cmap.concat([0xFF, 0xFF]); + + // Fill idDelta + var delta = 0; + var p = 0; + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + var start = range[0]; + var delta = ((start - 1) - p) % 65536; + + var value = FontsUtils.integerToBytes(delta, 2); + value[0] ^= 0xFF; + value[1] ^= 0xFF; + value[1] += 1; + cmap = cmap.concat([value[0], value[1]]); + + p += range.length; + }; + cmap = cmap.concat([0x00, 0x01]); + + + // Fill id Offsets with 0x00 + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + cmap = cmap.concat([0x00, 0x00]); + }; + cmap = cmap.concat([0x00, 0x00]); + + var cmapHeader = [ + 0x00, 0x00, // version + 0x00, 0x01, // numTables + 0x00, 0x03, // platformID + 0x00, 0x01, // encodingID + 0x00, 0x00, 0x00, 0x0C, // start of the table record + 0x00, 0x04 // format + ]; + cmapHeader = cmapHeader.concat(FontsUtils.integerToBytes(cmap.length + 6, 2)); // length + cmapHeader = cmapHeader.concat(0x00, 0x00); // language + + // Fill up data! + for (var i = 0; i < ranges.length; i++) { + var range = ranges[i]; + for (var j = 0; j < range.length; j++) + cmap = cmap.concat(range[j]); + }; + cmap = cmapHeader.concat(cmap); + return cmap; + }, + + cover: function font_cover(aFont) { + var otf = new Uint8Array(kMaxFontFileSize); + var aFontData = aFont.data; + var currentOffset = 0; + + var numTables = 9; + //var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; + var header = this._createOpenTypeHeader(numTables); + otf.set(header, currentOffset); + currentOffset += header.length; + + var baseOffset = numTables * (4 * 4) + currentOffset; + var virtualOffset = baseOffset; + var tableEntry = this._createTableEntry("CFF ", virtualOffset, aFontData); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += aFontData.length; + + var OS2 = [ + 0x00, 0x03, // version + 0x02, 0x24, // xAvgCharWidth + 0x01, 0xF4, // usWeightClass + 0x00, 0x05, // usWidthClass + 0x00, 0x00, // fstype + 0x02, 0x8A, // ySubscriptXSize + 0x02, 0xBB, // ySubscriptYSize + 0x00, 0x00, // ySubscriptXOffset + 0x00, 0x8C, // ySubscriptYOffset + 0x02, 0x8A, // ySuperScriptXSize + 0x02, 0xBB, // ySuperScriptYSize + 0x00, 0x00, // ySuperScriptXOffset + 0x01, 0xDF, // ySuperScriptYOffset + 0x00, 0x31, // yStrikeOutSize + 0x01, 0x02, // yStrikeOutPosition + 0x00, 0x00, // sFamilyClass + 0x02, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Panose + 0x00, 0x00, 0x00, 0x01, // ulUnicodeRange1 (Bits 0-31) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange2 (Bits 32-63) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange3 (Bits 64-95) + 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange4 (Bits 96-127) + 0x47, 0x49, 0x60, 0x20, // achVendID + 0x00, 0x20, // fsSelection + 0x00, 0x2D, // usFirstCharIndex + 0x00, 0x7A, // usLastCharIndex + 0x00, 0x03, // sTypoAscender + 0x00, 0x20, // sTypeDescender + 0x00, 0x38, // sTypoLineGap + 0x00, 0x5A, // usWinAscent + 0x02, 0xB4, // usWinDescent + 0x00, 0xCE, 0x00, 0x00, // ulCodePageRange1 (Bits 0-31) + 0x00, 0x01, 0x00, 0x00, // ulCodePageRange2 (Bits 32-63) + 0x00, 0x00, // sxHeight + 0x00, 0x00, // sCapHeight + 0x00, 0x01, // usDefaultChar + 0x00, 0xCD, // usBreakChar + 0x00, 0x02 // usMaxContext + ]; + + var tableEntry = this._createTableEntry("OS/2", virtualOffset, OS2); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += OS2.length; + + /** CMAP */ + var charstrings = aFont.getOrderedCharStrings(aFont.font); + + var cmap = this._createCMAPTable(charstrings); + var tableEntry = this._createTableEntry("cmap", virtualOffset, cmap); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += cmap.length; + + + /** HEAD */ + + var head = [ + 0x00, 0x01, 0x00, 0x00, // Version number + 0x00, 0x00, 0x50, 0x00, // fontRevision + 0x00, 0x00, 0x00, 0x00, // checksumAdjustement + 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber + 0x00, 0x00, // Flags + 0x03, 0xE8, // unitsPerEM (>= 16 && <=16384) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified + 0x00, 0x00, // xMin + 0x00, 0x00, // yMin + 0x00, 0x00, // xMax + 0x00, 0x00, // yMax + 0x00, 0x00, // macStyle + 0x00, 0x00, // lowestRecPPEM + 0x00, 0x00, // fontDirectionHint + 0x00, 0x00, // indexToLocFormat + 0x00, 0x00 // glyphDataFormat + ]; + var tableEntry = this._createTableEntry("head", virtualOffset, head); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += head.length; + + + /** HHEA */ + + var hhea = [ + 0x00, 0x01, 0x00, 0x00, // Version number + 0x00, 0x00, // Typographic Ascent + 0x00, 0x00, // Typographic Descent + 0x00, 0x00, // Line Gap + 0xFF, 0xFF, // advanceWidthMax + 0x00, 0x00, // minLeftSidebearing + 0x00, 0x00, // minRightSidebearing + 0x00, 0x00, // xMaxExtent + 0x00, 0x00, // caretSlopeRise + 0x00, 0x00, // caretSlopeRun + 0x00, 0x00, // caretOffset + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00 // metricDataFormat + ]; + hhea = hhea.concat(FontsUtils.integerToBytes(charstrings.length, 2)); // numberOfHMetrics + + var tableEntry = this._createTableEntry("hhea", virtualOffset, hhea); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += hhea.length; + + /** HMTX */ + + var hmtx = [0x01, 0xF4, 0x00, 0x00]; + for (var i = 0; i < charstrings.length; i++) { + var charstring = charstrings[i].charstring; + var width = FontsUtils.integerToBytes(charstring[1], 2); + var lsb = FontsUtils.integerToBytes(charstring[0], 2); + hmtx = hmtx.concat(width, lsb); + } + + var tableEntry = this._createTableEntry("hmtx", virtualOffset, hmtx); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += hmtx.length; + + + /** MAXP */ + + var maxp = [ + 0x00, 0x00, 0x50, 0x00, // Version number + ].concat(FontsUtils.integerToBytes(charstrings.length + 1, 2)); // Num of glyphs (+1 to pass the sanitizer...) + + var tableEntry = this._createTableEntry("maxp", virtualOffset, maxp); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += maxp.length; + + + /** NAME */ + + var name = [ + 0x00, 0x00, // format + 0x00, 0x00, // Number of names Record + 0x00, 0x00 // Storage + ]; + var tableEntry = this._createTableEntry("name", virtualOffset, name); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += name.length; + + + /** POST */ + + // XXX get those info from the Font dict! + var post = [ + 0x00, 0x03, 0x00, 0x00, // Version number + 0x00, 0x00, 0x01, 0x00, // italicAngle + 0x00, 0x00, // underlinePosition + 0x00, 0x00, // underlineThickness + 0x00, 0x00, 0x00, 0x00, // isFixedPitch + 0x00, 0x00, 0x00, 0x00, // minMemType42 + 0x00, 0x00, 0x00, 0x00, // maxMemType42 + 0x00, 0x00, 0x00, 0x00, // minMemType1 + 0x00, 0x00, 0x00, 0x00 // maxMemType1 + ]; + var tableEntry = this._createTableEntry("post", virtualOffset, post); + otf.set(tableEntry, currentOffset); + currentOffset += tableEntry.length; + virtualOffset += post.length; + + // Set the CFF data + otf.set(aFontData, currentOffset); + currentOffset += aFontData.length; + + var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; + otf.set(table, currentOffset); + currentOffset += table.length; + } + + var fontData = []; + for (var i = 0; i < currentOffset; i++) + fontData.push(otf[i]); + + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); + return fontData; } }; + +var FontsUtils = { + integerToBytes: function fu_integerToBytes(aValue, aBytesCount) { + var bytes = []; + for (var i = 0; i < aBytesCount; i++) + bytes[i] = 0x00; + + do { + bytes[--aBytesCount] = (aValue & 0xFF); + aValue = aValue >> 8; + } while (aBytesCount && aValue > 0); + + return bytes; + }, + + bytesToInteger: function(aBytesArray) { + var value = 0; + for (var i = 0; i < aBytesArray.length; i++) + value = (value << 8) + aBytesArray[i]; + return value; + }, + + getMaxPower2: function fu_getMaxPower2(aNumber) { + var maxPower = 0; + var value = aNumber; + while (value >= 2) { + value /= 2; + maxPower++; + } + + value = 2; + for (var i = 1; i < maxPower; i++) + value *= 2; + return value; + } +}; + + /** Implementation dirty logic starts here */ -var kMaxFontFileSize = 100000; +/** + * At the moment TrueType is just a stub that does mostly nothing but in a + * (near?) future this class will rewrite the font to ensure it is well formed + * and valid in the point of view of the sanitizer. + */ +var TrueType = function(aFontName, aFontFile) { + this.data = aFontFile; +}; /** * This dictionary holds decoded fonts data. @@ -124,17 +543,6 @@ var Stack = function() { }; }; -var TrueType = function(aFontName, aFontFile) { - var debug = true; - function dump(aMsg) { - if (debug) - log(aMsg); - } - - dump("Loading a TrueType font: " + aFontName); - this.data = aFontFile; -}; - var Type1Parser = function(aAsciiStream, aBinaryStream) { var lexer = aAsciiStream ? new Lexer(aAsciiStream) : null; @@ -760,11 +1168,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var fontCount = 0; -var Type1 = function(aFontName, aFontFile) { - // All Type1 font program should begin with the comment %! - if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) - error("Invalid file header"); - +var CFF = function(aFontName, aFontFile) { if (!fontCount || true) { fontCount++; var start = Date.now(); @@ -774,14 +1178,14 @@ var Type1 = function(aFontName, aFontFile) { this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); - var font = PSFonts.get(fontName); - this.data = this.convertToOTF(this.convertToCFF(font), font); + this.font = PSFonts.get(fontName); + this.data = this.convertToCFF(this.font); var end = Date.now(); log("Time to parse font is:" + (end - start)); } }; -Type1.prototype = { +CFF.prototype = { getDefaultWidth: function(aCharstrings) { var defaultWidth = 0; var defaultUsedCount = 0; @@ -809,7 +1213,7 @@ Type1.prototype = { if (count ==0) return [0x00, 0x00, 0x00]; - var bytes = this.integerToBytes(count, 2); + var bytes = FontsUtils.integerToBytes(count, 2); for (var i = 0; i < bytes.length; i++) data.push(bytes[i]); @@ -821,7 +1225,7 @@ Type1.prototype = { // Add another offset after this one because we need a new offset var relativeOffset = 1; for (var i = 0; i < count + 1; i++) { - var bytes = this.integerToBytes(relativeOffset, 4); + var bytes = FontsUtils.integerToBytes(relativeOffset, 4); for (var j = 0; j < bytes.length; j++) data.push(bytes[j]); @@ -836,42 +1240,22 @@ Type1.prototype = { return data; }, - integerToBytes: function(aValue, aBytesCount) { - var bytes = []; - for (var i = 0; i < aBytesCount; i++) - bytes[i] = 0x00; - - do { - bytes[--aBytesCount] = (aValue & 0xFF); - aValue = aValue >> 8; - } while (aBytesCount && aValue > 0); - - return bytes; - }, - - bytesToInteger: function(aBytesArray) { - var value = 0; - for (var i = 0; i < aBytesArray.length; i++) - value = (value << 8) + aBytesArray[i]; - return value; - }, - encodeNumber: function(aValue) { var x = 0; // XXX we don't really care about Type2 optimization here... if (aValue >= -32768 && aValue <= 32767) { return [ 28, - this.integerToBytes(aValue >> 8, 1), - this.integerToBytes(aValue, 1) + FontsUtils.integerToBytes(aValue >> 8, 1), + FontsUtils.integerToBytes(aValue, 1) ]; } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { return [ 0xFF, - this.integerToBytes(aValue >> 24, 1), - this.integerToBytes(aValue >> 16, 1), - this.integerToBytes(aValue >> 8, 1), - this.integerToBytes(aValue, 1) + FontsUtils.integerToBytes(aValue >> 24, 1), + FontsUtils.integerToBytes(aValue >> 16, 1), + FontsUtils.integerToBytes(aValue >> 8, 1), + FontsUtils.integerToBytes(aValue, 1) ]; } else { error("Value: " + aValue + " is not allowed"); @@ -891,7 +1275,7 @@ Type1.prototype = { var b1 = parseInt("0x" + unicode[0] + unicode[1]); var b2 = parseInt("0x" + unicode[2] + unicode[3]); - unicode = this.bytesToInteger([b1, b2]); + unicode = FontsUtils.bytesToInteger([b1, b2]); charstrings.push({ glyph: glyph, @@ -973,7 +1357,7 @@ Type1.prototype = { var index = CFFStrings.indexOf(charstrings[i].glyph); if (index == -1) index = CFFStrings.length + strings.indexOf(glyph); - var bytes = this.integerToBytes(index, 2); + var bytes = FontsUtils.integerToBytes(index, 2); charset.push(bytes[0]); charset.push(bytes[1]); } @@ -1121,392 +1505,6 @@ Type1.prototype = { //log("== write to file"); //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".cff"); - return fontData; - }, - - getMaxPower2: function(aNumber) { - var maxPower = 0; - var value = aNumber; - while (value >= 2) { - value /= 2; - maxPower++; - } - - value = 2; - for (var i = 1; i < maxPower; i++) - value *= 2; - return value; - }, - - createOpenTypeHeader: function(aNumTables) { - // sfnt version (4 bytes) - var version = [0x4F, 0x54, 0x54, 0X4F]; - - // numTables (2 bytes) - var numTables = aNumTables; - - // searchRange (2 bytes) - var searchRange = this.getMaxPower2(numTables) * 16; - - // entrySelector (2 bytes) - var entrySelector = Math.log(this.getMaxPower2(numTables)) / Math.log(2); - - // rangeShift (2 bytes) - var rangeShift = numTables * 16 - searchRange; - - return [].concat(version, - this.integerToBytes(numTables, 2), - this.integerToBytes(searchRange, 2), - this.integerToBytes(entrySelector, 2), - this.integerToBytes(rangeShift, 2)); - }, - - createTableEntry: function(aTag, aOffset, aData) { - // tag - var tag = [ - aTag.charCodeAt(0), - aTag.charCodeAt(1), - aTag.charCodeAt(2), - aTag.charCodeAt(3) - ]; - - // offset - var offset = aOffset; - - // length - // Per spec tables must be 4-bytes align so add some 0x00 if needed - while (aData.length & 3) - aData.push(0x00); - - var length = aData.length; - - // checksum - var checksum = this.bytesToInteger(tag) + offset + length; - - return [].concat(tag, - this.integerToBytes(checksum, 4), - this.integerToBytes(offset, 4), - this.integerToBytes(length, 4)); - }, - - convertToOTF: function(aData, aFont) { - var otf = new Uint8Array(kMaxFontFileSize); - var currentOffset = 0; - - var numTables = 9; - var header = this.createOpenTypeHeader(numTables); - otf.set(header, currentOffset); - currentOffset += header.length; - - var baseOffset = numTables * (4 * 4) + currentOffset; - var virtualOffset = baseOffset; - var tableEntry = this.createTableEntry("CFF ", virtualOffset, aData); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += aData.length; - - var OS2 = [ - 0x00, 0x03, // version - 0x02, 0x24, // xAvgCharWidth - 0x01, 0xF4, // usWeightClass - 0x00, 0x05, // usWidthClass - 0x00, 0x00, // fstype - 0x02, 0x8A, // ySubscriptXSize - 0x02, 0xBB, // ySubscriptYSize - 0x00, 0x00, // ySubscriptXOffset - 0x00, 0x8C, // ySubscriptYOffset - 0x02, 0x8A, // ySuperScriptXSize - 0x02, 0xBB, // ySuperScriptYSize - 0x00, 0x00, // ySuperScriptXOffset - 0x01, 0xDF, // ySuperScriptYOffset - 0x00, 0x31, // yStrikeOutSize - 0x01, 0x02, // yStrikeOutPosition - 0x00, 0x00, // sFamilyClass - 0x02, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Panose - 0x00, 0x00, 0x00, 0x01, // ulUnicodeRange1 (Bits 0-31) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange2 (Bits 32-63) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange3 (Bits 64-95) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange4 (Bits 96-127) - 0x47, 0x49, 0x60, 0x20, // achVendID - 0x00, 0x20, // fsSelection - 0x00, 0x2D, // usFirstCharIndex - 0x00, 0x7A, // usLastCharIndex - 0x00, 0x03, // sTypoAscender - 0x00, 0x20, // sTypeDescender - 0x00, 0x38, // sTypoLineGap - 0x00, 0x5A, // usWinAscent - 0x02, 0xB4, // usWinDescent - 0x00, 0xCE, 0x00, 0x00, // ulCodePageRange1 (Bits 0-31) - 0x00, 0x01, 0x00, 0x00, // ulCodePageRange2 (Bits 32-63) - 0x00, 0x00, // sxHeight - 0x00, 0x00, // sCapHeight - 0x00, 0x01, // usDefaultChar - 0x00, 0xCD, // usBreakChar - 0x00, 0x02 // usMaxContext - ]; - - var tableEntry = this.createTableEntry("OS/2", virtualOffset, OS2); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += OS2.length; - - /** CMAP */ - var charstrings = this.getOrderedCharStrings(aFont); - - if (false) { - var cmap = [ - 0x00, 0x00, // version - 0x00, 0x01, // numTables - 0x00, 0x01, // platformID - 0x00, 0x00, // encodingID - 0x00, 0x00, 0x00, 0x0C, //offset - 0x00, 0x00, - 0x01, 0x06, - 0x00, 0x00 - ]; - - var data = []; - for (var i = 0; i < 262; i++) { - data.push(0x00); - } - - for (var i = 0; i < charstrings.length; i++) - data[charstrings[i].unicode] = i + 1; - cmap = cmap.concat(data); - } - else { - var data = new Array(1000); - for (var i = 0; i < charstrings.length; i++) - data[charstrings[i].unicode] = i + 1; - - var ranges = []; - var range = []; - for (var i = 0; i < data.length; i++) { - var char = data[i]; - if (char) { - range.push(i); - } else if (range.length) { - if (0) { - log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); - log("range content is: " + range); - } - ranges.push(range.slice()); - range = []; - } - } - - - var cmap = []; - var segCount = ranges.length + 1; - - var segCount2 = segCount * 2; - var searchRange = this.getMaxPower2(segCount) * 2; - var searchEntry = Math.log(segCount) / Math.log(2); - var rangeShift = 2 * segCount - searchRange; - cmap = cmap.concat(this.integerToBytes(segCount2, 2)); - cmap = cmap.concat(this.integerToBytes(searchRange, 2)); - cmap = cmap.concat(this.integerToBytes(searchEntry, 2)); - cmap = cmap.concat(this.integerToBytes(rangeShift, 2)); - - // End characters code with an additional 0xFFFF to finish the array - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - cmap = cmap.concat(this.integerToBytes(range[range.length - 1], 2)); - }; - cmap = cmap.concat([0xFF, 0xFF]); - - // reserved pad - cmap = cmap.concat([0x00, 0x00]); - - // Start characters code with an additional 0xFFFF to finish the array - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - cmap = cmap.concat(this.integerToBytes(range[0], 2)); - }; - cmap = cmap.concat([0xFF, 0xFF]); - - // Fill idDelta - var delta = 0; - var p = 0; - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - var start = range[0]; - var delta = ((start - 1) - p) % 65536; - - var value = this.integerToBytes(delta, 2); - value[0] ^= 0xFF; - value[1] ^= 0xFF; - value[1] += 1; - cmap = cmap.concat([value[0], value[1]]); - - p += range.length; - }; - cmap = cmap.concat([0x00, 0x01]); - - - // Fill id Offsets with 0x00 - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - cmap = cmap.concat([0x00, 0x00]); - }; - cmap = cmap.concat([0x00, 0x00]); - - var cmapHeader = [ - 0x00, 0x00, // version - 0x00, 0x01, // numTables - 0x00, 0x03, // platformID - 0x00, 0x01, // encodingID - 0x00, 0x00, 0x00, 0x0C, // start of the table record - 0x00, 0x04 // format - ]; - cmapHeader = cmapHeader.concat(this.integerToBytes(cmap.length + 6, 2)); // length - cmapHeader = cmapHeader.concat(0x00, 0x00); // language - - // Fill up data! - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - for (var j = 0; j < range.length; j++) - cmap = cmap.concat(range[j]); - }; - cmap = cmapHeader.concat(cmap); - } - - var tableEntry = this.createTableEntry("cmap", virtualOffset, cmap); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += cmap.length; - - - /** HEAD */ - - var head = [ - 0x00, 0x01, 0x00, 0x00, // Version number - 0x00, 0x00, 0x50, 0x00, // fontRevision - 0x00, 0x00, 0x00, 0x00, // checksumAdjustement - 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber - 0x00, 0x00, // Flags - 0x03, 0xE8, // unitsPerEM (>= 16 && <=16384) - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified - 0x00, 0x00, // xMin - 0x00, 0x00, // yMin - 0x00, 0x00, // xMax - 0x00, 0x00, // yMax - 0x00, 0x00, // macStyle - 0x00, 0x00, // lowestRecPPEM - 0x00, 0x00, // fontDirectionHint - 0x00, 0x00, // indexToLocFormat - 0x00, 0x00 // glyphDataFormat - ]; - var tableEntry = this.createTableEntry("head", virtualOffset, head); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += head.length; - - - /** HHEA */ - - var hhea = [ - 0x00, 0x01, 0x00, 0x00, // Version number - 0x00, 0x00, // Typographic Ascent - 0x00, 0x00, // Typographic Descent - 0x00, 0x00, // Line Gap - 0xFF, 0xFF, // advanceWidthMax - 0x00, 0x00, // minLeftSidebearing - 0x00, 0x00, // minRightSidebearing - 0x00, 0x00, // xMaxExtent - 0x00, 0x00, // caretSlopeRise - 0x00, 0x00, // caretSlopeRun - 0x00, 0x00, // caretOffset - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00 // metricDataFormat - ]; - hhea = hhea.concat(this.integerToBytes(charstrings.length, 2)); // numberOfHMetrics - - var tableEntry = this.createTableEntry("hhea", virtualOffset, hhea); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += hhea.length; - - /** HMTX */ - - var hmtx = [0x01, 0xF4, 0x00, 0x00]; - for (var i = 0; i < charstrings.length; i++) { - var charstring = charstrings[i].charstring; - var width = this.integerToBytes(charstring[1], 2); - var lsb = this.integerToBytes(charstring[0], 2); - hmtx = hmtx.concat(width, lsb); - } - - var tableEntry = this.createTableEntry("hmtx", virtualOffset, hmtx); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += hmtx.length; - - - /** MAXP */ - - var maxp = [ - 0x00, 0x00, 0x50, 0x00, // Version number - ].concat(this.integerToBytes(charstrings.length + 1, 2)); // Num of glyphs (+1 to pass the sanitizer...) - - var tableEntry = this.createTableEntry("maxp", virtualOffset, maxp); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += maxp.length; - - - /** NAME */ - - var name = [ - 0x00, 0x00, // format - 0x00, 0x00, // Number of names Record - 0x00, 0x00 // Storage - ]; - var tableEntry = this.createTableEntry("name", virtualOffset, name); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += name.length; - - - /** POST */ - - // XXX get those info from the Font dict! - var post = [ - 0x00, 0x03, 0x00, 0x00, // Version number - 0x00, 0x00, 0x01, 0x00, // italicAngle - 0x00, 0x00, // underlinePosition - 0x00, 0x00, // underlineThickness - 0x00, 0x00, 0x00, 0x00, // isFixedPitch - 0x00, 0x00, 0x00, 0x00, // minMemType42 - 0x00, 0x00, 0x00, 0x00, // maxMemType42 - 0x00, 0x00, 0x00, 0x00, // minMemType1 - 0x00, 0x00, 0x00, 0x00 // maxMemType1 - ]; - var tableEntry = this.createTableEntry("post", virtualOffset, post); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += post.length; - - // Set the CFF data - otf.set(aData, currentOffset); - currentOffset += aData.length; - - var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; - for (var i = 0; i < tables.length; i++) { - var table = tables[i]; - otf.set(table, currentOffset); - currentOffset += table.length; - } - - var fontData = []; - for (var i = 0; i < currentOffset; i++) - fontData.push(otf[i]); - - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; From 65b8158c3454a7ff0720b2d8f1827316943ff585 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 03:35:56 +0200 Subject: [PATCH 37/72] CMAP creation code is now more readable --- PDFFont.js | 115 ++++++++++++++++++++++++----------------------------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 8cddb2360..479b05add 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -147,6 +147,7 @@ Font.prototype = { for (var i = 0; i < aGlyphs.length; i++) data[aGlyphs[i].unicode] = i + 1; + // Separate the glyphs into continuous range of codes, aka segment. var ranges = []; var range = []; for (var i = 0; i < data.length; i++) { @@ -164,80 +165,68 @@ Font.prototype = { } - var cmap = []; - var segCount = ranges.length + 1; + // The size in bytes of the header is equal to the size of the + // different fields * length of a short + (size of the 4 parallels arrays + // describing segments * length of a short). + var headerSize = (12 * 2 + (ranges.length * 4 * 2)); + var segCount = ranges.length + 1; var segCount2 = segCount * 2; var searchRange = FontsUtils.getMaxPower2(segCount) * 2; var searchEntry = Math.log(segCount) / Math.log(2); var rangeShift = 2 * segCount - searchRange; - cmap = cmap.concat(FontsUtils.integerToBytes(segCount2, 2)); - cmap = cmap.concat(FontsUtils.integerToBytes(searchRange, 2)); - cmap = cmap.concat(FontsUtils.integerToBytes(searchEntry, 2)); - cmap = cmap.concat(FontsUtils.integerToBytes(rangeShift, 2)); + var cmap = [].concat( + [ + 0x00, 0x00, // version + 0x00, 0x01, // numTables + 0x00, 0x03, // platformID + 0x00, 0x01, // encodingID + 0x00, 0x00, 0x00, 0x0C, // start of the table record + 0x00, 0x04 // format + ], + FontsUtils.integerToBytes(headerSize, 2), // length + [0x00, 0x00], // language + FontsUtils.integerToBytes(segCount2, 2), + FontsUtils.integerToBytes(searchRange, 2), + FontsUtils.integerToBytes(searchEntry, 2), + FontsUtils.integerToBytes(rangeShift, 2) + ); - // End characters code with an additional 0xFFFF to finish the array - for (var i = 0; i < ranges.length; i++) { + // Fill up the 4 parallel arrays describing the segments. + var startCount = []; + var endCount = []; + var idDeltas = []; + var idRangeOffsets = []; + var glyphsIdsArray = []; + var bias = 0; + for (var i = 0; i < segCount - 1; i++) { var range = ranges[i]; - cmap = cmap.concat(FontsUtils.integerToBytes(range[range.length - 1], 2)); - }; - cmap = cmap.concat([0xFF, 0xFF]); + var start = FontsUtils.integerToBytes(range[0], 2); + var end = FontsUtils.integerToBytes(range[range.length - 1], 2); - // reserved pad - cmap = cmap.concat([0x00, 0x00]); + var delta = FontsUtils.integerToBytes(((range[0] - 1) - bias) % 65536, 2); + bias += range.length; - // Start characters code with an additional 0xFFFF to finish the array - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - cmap = cmap.concat(FontsUtils.integerToBytes(range[0], 2)); - }; - cmap = cmap.concat([0xFF, 0xFF]); + // deltas are signed shorts + delta[0] ^= 0xFF; + delta[1] ^= 0xFF; + delta[1] += 1; - // Fill idDelta - var delta = 0; - var p = 0; - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - var start = range[0]; - var delta = ((start - 1) - p) % 65536; + startCount.push(start[0], start[1]); + endCount.push(end[0], end[1]); + idDeltas.push(delta[0], delta[1]); + idRangeOffsets.push(0x00, 0x00); - var value = FontsUtils.integerToBytes(delta, 2); - value[0] ^= 0xFF; - value[1] ^= 0xFF; - value[1] += 1; - cmap = cmap.concat([value[0], value[1]]); - - p += range.length; - }; - cmap = cmap.concat([0x00, 0x01]); - - - // Fill id Offsets with 0x00 - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; - cmap = cmap.concat([0x00, 0x00]); - }; - cmap = cmap.concat([0x00, 0x00]); - - var cmapHeader = [ - 0x00, 0x00, // version - 0x00, 0x01, // numTables - 0x00, 0x03, // platformID - 0x00, 0x01, // encodingID - 0x00, 0x00, 0x00, 0x0C, // start of the table record - 0x00, 0x04 // format - ]; - cmapHeader = cmapHeader.concat(FontsUtils.integerToBytes(cmap.length + 6, 2)); // length - cmapHeader = cmapHeader.concat(0x00, 0x00); // language - - // Fill up data! - for (var i = 0; i < ranges.length; i++) { - var range = ranges[i]; for (var j = 0; j < range.length; j++) - cmap = cmap.concat(range[j]); - }; - cmap = cmapHeader.concat(cmap); - return cmap; + glyphsIdsArray.push(range[j]); + } + startCount.push(0xFF, 0xFF); + endCount.push(0xFF, 0xFF); + idDeltas.push(0x00, 0x01); + idRangeOffsets.push(0x00, 0x00); + + return cmap.concat(endCount, [0x00, 0x00], startCount, + idDeltas, idRangeOffsets, glyphsIdsArray); }, cover: function font_cover(aFont) { @@ -442,7 +431,7 @@ Font.prototype = { for (var i = 0; i < currentOffset; i++) fontData.push(otf[i]); - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); + writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; From cebdda3f350d5898016e34fa7f786da59c3696fc Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 03:45:24 +0200 Subject: [PATCH 38/72] Clean up a bit the CMAP ranges creation loop --- PDFFont.js | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 479b05add..ff1d4cfc7 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -6,6 +6,11 @@ */ var kMaxFontFileSize = 40000; +/** + * Maximum number of glyphs per font. +*/ +var kMaxGlyphsCount = 1024; + /** * Hold a map of decoded fonts and of the standard fourteen Type1 fonts and @@ -143,28 +148,22 @@ Font.prototype = { }, _createCMAPTable: function font_createCMAPTable(aGlyphs) { - var data = new Array(1000); + var characters = new Array(kMaxGlyphsCount); for (var i = 0; i < aGlyphs.length; i++) - data[aGlyphs[i].unicode] = i + 1; + characters[aGlyphs[i].unicode] = i + 1; // Separate the glyphs into continuous range of codes, aka segment. var ranges = []; var range = []; - for (var i = 0; i < data.length; i++) { - var char = data[i]; - if (char) { + for (var i = 0; i < characters.length; i++) { + if (characters[i]) { range.push(i); } else if (range.length) { - if (0) { - log("create a new range of " + range.length + " chars width min: " + range[0] + " to max: " + range[range.length - 1]); - log("range content is: " + range); - } ranges.push(range.slice()); range = []; } } - // The size in bytes of the header is equal to the size of the // different fields * length of a short + (size of the 4 parallels arrays // describing segments * length of a short). From c6f5385d15a83a152b7f7f38fde235f538aa85ce Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 04:37:53 +0200 Subject: [PATCH 39/72] Clean up a bit the code to generate an OTF --- PDFFont.js | 209 +++++++++++++++++++++++++---------------------------- 1 file changed, 98 insertions(+), 111 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index ff1d4cfc7..195bca861 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -95,7 +95,7 @@ Font.prototype = { styleSheet.insertRule(rule, styleSheet.length); }, - _createOpenTypeHeader: function font_createOpenTypeHeader(aNumTables) { + _createOpenTypeHeader: function font_createOpenTypeHeader(aFile, aOffsets, aNumTables) { // sfnt version (4 bytes) var version = [0x4F, 0x54, 0x54, 0X4F]; @@ -112,14 +112,17 @@ Font.prototype = { // rangeShift (2 bytes) var rangeShift = numTables * 16 - searchRange; - return [].concat(version, - FontsUtils.integerToBytes(numTables, 2), - FontsUtils.integerToBytes(searchRange, 2), - FontsUtils.integerToBytes(entrySelector, 2), - FontsUtils.integerToBytes(rangeShift, 2)); + var header = [].concat(version, + FontsUtils.integerToBytes(numTables, 2), + FontsUtils.integerToBytes(searchRange, 2), + FontsUtils.integerToBytes(entrySelector, 2), + FontsUtils.integerToBytes(rangeShift, 2)); + aFile.set(header, aOffsets.currentOffset); + aOffsets.currentOffset += header.length; + aOffsets.virtualOffset += header.length; }, - _createTableEntry: function font_createTableEntry(aTag, aOffset, aData) { + _createTableEntry: function font_createTableEntry(aFile, aOffsets, aTag, aData) { // tag var tag = [ aTag.charCodeAt(0), @@ -129,7 +132,7 @@ Font.prototype = { ]; // offset - var offset = aOffset; + var offset = aOffsets.virtualOffset; // Per spec tables must be 4-bytes align so add some 0x00 if needed while (aData.length & 3) @@ -141,10 +144,13 @@ Font.prototype = { // checksum var checksum = FontsUtils.bytesToInteger(tag) + offset + length; - return [].concat(tag, - FontsUtils.integerToBytes(checksum, 4), - FontsUtils.integerToBytes(offset, 4), - FontsUtils.integerToBytes(length, 4)); + var tableEntry = [].concat(tag, + FontsUtils.integerToBytes(checksum, 4), + FontsUtils.integerToBytes(offset, 4), + FontsUtils.integerToBytes(length, 4)); + aFile.set(tableEntry, aOffsets.currentOffset); + aOffsets.currentOffset += tableEntry.length; + aOffsets.virtualOffset += aData.length; }, _createCMAPTable: function font_createCMAPTable(aGlyphs) { @@ -230,23 +236,39 @@ Font.prototype = { cover: function font_cover(aFont) { var otf = new Uint8Array(kMaxFontFileSize); - var aFontData = aFont.data; - var currentOffset = 0; - var numTables = 9; - //var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; - var header = this._createOpenTypeHeader(numTables); - otf.set(header, currentOffset); - currentOffset += header.length; + // Required Tables + var CFF = aFont.data, + OS2 = [], + cmap = [], + head = [], + hhea = [], + hmtx = [], + maxp = [], + name = [], + post = []; + var tables = [CFF, OS2, cmap, head, hhea, hmtx, maxp, name, post]; - var baseOffset = numTables * (4 * 4) + currentOffset; - var virtualOffset = baseOffset; - var tableEntry = this._createTableEntry("CFF ", virtualOffset, aFontData); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += aFontData.length; + // The offsets object holds at the same time a representation of where + // to write the table entry information about a table and another offset + // representing the offset where to draw the actual data of a particular + // table + var offsets = { + currentOffset: 0, + virtualOffset: tables.length * (4 * 4) + }; - var OS2 = [ + // For files with only one font the offset table is the first thing of the + // file + this._createOpenTypeHeader(otf, offsets, tables.length); + + // XXX It is probable that in a future we want to get rid of this glue + // between the CFF and the OTF format in order to be able to embed TrueType + // data. + this._createTableEntry(otf, offsets, "CFF ", CFF); + + /** OS/2 */ + OS2 = [ 0x00, 0x03, // version 0x02, 0x24, // xAvgCharWidth 0x01, 0xF4, // usWeightClass @@ -268,7 +290,7 @@ Font.prototype = { 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange2 (Bits 32-63) 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange3 (Bits 64-95) 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange4 (Bits 96-127) - 0x47, 0x49, 0x60, 0x20, // achVendID + 0x2A, 0x32, 0x31, 0x2A, // achVendID 0x00, 0x20, // fsSelection 0x00, 0x2D, // usFirstCharIndex 0x00, 0x7A, // usLastCharIndex @@ -285,25 +307,17 @@ Font.prototype = { 0x00, 0xCD, // usBreakChar 0x00, 0x02 // usMaxContext ]; + this._createTableEntry(otf, offsets, "OS/2", OS2); - var tableEntry = this._createTableEntry("OS/2", virtualOffset, OS2); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += OS2.length; - - /** CMAP */ + //XXX Getting charstrings here seems wrong since this is another CFF glue var charstrings = aFont.getOrderedCharStrings(aFont.font); - var cmap = this._createCMAPTable(charstrings); - var tableEntry = this._createTableEntry("cmap", virtualOffset, cmap); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += cmap.length; - + /** CMAP */ + cmap = this._createCMAPTable(charstrings); + this._createTableEntry(otf, offsets, "cmap", cmap); /** HEAD */ - - var head = [ + head = [ 0x00, 0x01, 0x00, 0x00, // Version number 0x00, 0x00, 0x50, 0x00, // fontRevision 0x00, 0x00, 0x00, 0x00, // checksumAdjustement @@ -322,84 +336,63 @@ Font.prototype = { 0x00, 0x00, // indexToLocFormat 0x00, 0x00 // glyphDataFormat ]; - var tableEntry = this._createTableEntry("head", virtualOffset, head); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += head.length; - + this._createTableEntry(otf, offsets, "head", head); /** HHEA */ - - var hhea = [ - 0x00, 0x01, 0x00, 0x00, // Version number - 0x00, 0x00, // Typographic Ascent - 0x00, 0x00, // Typographic Descent - 0x00, 0x00, // Line Gap - 0xFF, 0xFF, // advanceWidthMax - 0x00, 0x00, // minLeftSidebearing - 0x00, 0x00, // minRightSidebearing - 0x00, 0x00, // xMaxExtent - 0x00, 0x00, // caretSlopeRise - 0x00, 0x00, // caretSlopeRun - 0x00, 0x00, // caretOffset - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00, // -reserved- - 0x00, 0x00 // metricDataFormat - ]; - hhea = hhea.concat(FontsUtils.integerToBytes(charstrings.length, 2)); // numberOfHMetrics - - var tableEntry = this._createTableEntry("hhea", virtualOffset, hhea); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += hhea.length; + hhea = [].concat( + [ + 0x00, 0x01, 0x00, 0x00, // Version number + 0x00, 0x00, // Typographic Ascent + 0x00, 0x00, // Typographic Descent + 0x00, 0x00, // Line Gap + 0xFF, 0xFF, // advanceWidthMax + 0x00, 0x00, // minLeftSidebearing + 0x00, 0x00, // minRightSidebearing + 0x00, 0x00, // xMaxExtent + 0x00, 0x00, // caretSlopeRise + 0x00, 0x00, // caretSlopeRun + 0x00, 0x00, // caretOffset + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00, // -reserved- + 0x00, 0x00 // metricDataFormat + ], + FontsUtils.integerToBytes(charstrings.length, 2) // numberOfHMetrics + ); + this._createTableEntry(otf, offsets, "hhea", hhea); /** HMTX */ - - var hmtx = [0x01, 0xF4, 0x00, 0x00]; + hmtx = [0x01, 0xF4, 0x00, 0x00]; for (var i = 0; i < charstrings.length; i++) { + // XXX this can easily broke var charstring = charstrings[i].charstring; var width = FontsUtils.integerToBytes(charstring[1], 2); var lsb = FontsUtils.integerToBytes(charstring[0], 2); hmtx = hmtx.concat(width, lsb); } - - var tableEntry = this._createTableEntry("hmtx", virtualOffset, hmtx); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += hmtx.length; - + this._createTableEntry(otf, offsets, "hmtx", hmtx); /** MAXP */ - - var maxp = [ - 0x00, 0x00, 0x50, 0x00, // Version number - ].concat(FontsUtils.integerToBytes(charstrings.length + 1, 2)); // Num of glyphs (+1 to pass the sanitizer...) - - var tableEntry = this._createTableEntry("maxp", virtualOffset, maxp); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += maxp.length; - + maxp = [].concat( + [ + 0x00, 0x00, 0x50, 0x00, // Version number + ], + FontsUtils.integerToBytes(charstrings.length + 1, 2) // Num of glyphs (+1 to pass the sanitizer...) + ); + this._createTableEntry(otf, offsets, "maxp", maxp); /** NAME */ - - var name = [ + name = [ 0x00, 0x00, // format 0x00, 0x00, // Number of names Record - 0x00, 0x00 // Storage + 0x00, 0x00 // Storage ]; - var tableEntry = this._createTableEntry("name", virtualOffset, name); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += name.length; - + this._createTableEntry(otf, offsets, "name", name); /** POST */ - // XXX get those info from the Font dict! - var post = [ + post = [ 0x00, 0x03, 0x00, 0x00, // Version number 0x00, 0x00, 0x01, 0x00, // italicAngle 0x00, 0x00, // underlinePosition @@ -410,24 +403,18 @@ Font.prototype = { 0x00, 0x00, 0x00, 0x00, // minMemType1 0x00, 0x00, 0x00, 0x00 // maxMemType1 ]; - var tableEntry = this._createTableEntry("post", virtualOffset, post); - otf.set(tableEntry, currentOffset); - currentOffset += tableEntry.length; - virtualOffset += post.length; + this._createTableEntry(otf, offsets, "post", post); - // Set the CFF data - otf.set(aFontData, currentOffset); - currentOffset += aFontData.length; - - var tables = [OS2, cmap, head, hhea, hmtx, maxp, name, post]; + // Once all the table entry are written, this is time to dump the data! + var tables = [CFF, OS2, cmap, head, hhea, hmtx, maxp, name, post]; for (var i = 0; i < tables.length; i++) { var table = tables[i]; - otf.set(table, currentOffset); - currentOffset += table.length; + otf.set(table, offsets.currentOffset); + offsets.currentOffset += table.length; } var fontData = []; - for (var i = 0; i < currentOffset; i++) + for (var i = 0; i < offsets.currentOffset; i++) fontData.push(otf[i]); writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); From 4039e3e1e222a5ae786225799dba2a1ebad3a34d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 04:46:21 +0200 Subject: [PATCH 40/72] Clean up a bit the code to generate an OTF (again) --- PDFFont.js | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 195bca861..424c6721f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -238,15 +238,15 @@ Font.prototype = { var otf = new Uint8Array(kMaxFontFileSize); // Required Tables - var CFF = aFont.data, - OS2 = [], - cmap = [], - head = [], - hhea = [], - hmtx = [], - maxp = [], - name = [], - post = []; + var CFF = aFont.data, // PostScript Font Program + OS2 = [], // OS/2 and Windows Specific metrics + cmap = [], // Character to glyphs mapping + head = [], // Font eader + hhea = [], // Horizontal header + hmtx = [], // Horizontal metrics + maxp = [], // Maximum profile + name = [], // Naming tables + post = []; // PostScript informations var tables = [CFF, OS2, cmap, head, hhea, hmtx, maxp, name, post]; // The offsets object holds at the same time a representation of where @@ -323,9 +323,9 @@ Font.prototype = { 0x00, 0x00, 0x00, 0x00, // checksumAdjustement 0x5F, 0x0F, 0x3C, 0xF5, // magicNumber 0x00, 0x00, // Flags - 0x03, 0xE8, // unitsPerEM (>= 16 && <=16384) - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // created - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modified + 0x03, 0xE8, // unitsPerEM (defaulting to 1000) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // creation date + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // modifification date 0x00, 0x00, // xMin 0x00, 0x00, // yMin 0x00, 0x00, // xMax @@ -334,7 +334,7 @@ Font.prototype = { 0x00, 0x00, // lowestRecPPEM 0x00, 0x00, // fontDirectionHint 0x00, 0x00, // indexToLocFormat - 0x00, 0x00 // glyphDataFormat + 0x00, 0x00 // glyphDataFormat ]; this._createTableEntry(otf, offsets, "head", head); @@ -405,7 +405,7 @@ Font.prototype = { ]; this._createTableEntry(otf, offsets, "post", post); - // Once all the table entry are written, this is time to dump the data! + // Once all the table entries header are written, dump the data! var tables = [CFF, OS2, cmap, head, hhea, hmtx, maxp, name, post]; for (var i = 0; i < tables.length; i++) { var table = tables[i]; From 9830b09f34ebddbe14dafcf1d4e628a43eae68c5 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 05:30:02 +0200 Subject: [PATCH 41/72] Small changes --- PDFFont.js | 14 ++++++++------ PDFFontUtils.js | 32 +++++++++++++++++--------------- cffStandardStrings.js | 1 + test.html | 1 - 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 424c6721f..7abd9aa65 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -41,6 +41,7 @@ var Font = function(aFontName, aFontFile, aFontType) { return; } + var start = Date.now(); switch (aFontType) { case "Type1": // All Type1 font program should begin with the comment %! @@ -64,6 +65,7 @@ var Font = function(aFontName, aFontFile, aFontType) { warn("Font " + aFontType + " is not supported"); break; } + var end = Date.now(); // Attach the font to the document this.bind(); @@ -417,7 +419,7 @@ Font.prototype = { for (var i = 0; i < offsets.currentOffset; i++) fontData.push(otf[i]); - writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; @@ -477,8 +479,8 @@ var TrueType = function(aFontName, aFontFile) { var PSFonts = new Dict(); -var Stack = function() { - var innerStack = []; +var Stack = function(aStackSize) { + var innerStack = new Array(aStackSize || 0); this.push = function(aOperand) { innerStack.push(aOperand); @@ -695,7 +697,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * operator returns one or more results, it does so by pushing them on the * operand stack. */ - var operandStack = new Stack(); + var operandStack = new Stack(40); // Flag indicating if the topmost operand of the operandStack is an array var operandIsArray = 0; @@ -1156,7 +1158,7 @@ var CFF = function(aFontName, aFontFile) { this.font = PSFonts.get(fontName); this.data = this.convertToCFF(this.font); var end = Date.now(); - log("Time to parse font is:" + (end - start)); + //log("Time to parse font is:" + (end - start)); } }; @@ -1392,7 +1394,7 @@ CFF.prototype = { 248, 28, 1, // Notice 248, 29, 2, // FullName 248, 30, 3, // FamilyName - 248, 31, 4, // Weight + 248, 31, 4 // Weight ]; for (var i = 0; i < fontBBox.length; i++) diff --git a/PDFFontUtils.js b/PDFFontUtils.js index 36dc2b421..072dd48f1 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -4,7 +4,7 @@ * * So the code here is useful for dumping the data content of a .cff file in * order to investigate the similarity between a Type1 CharString and a Type2 - * CharString. + * CharString or to understand the structure of the CFF format. */ @@ -216,6 +216,14 @@ function readFontIndexData(aStream, aIsByte) { var Type2Parser = function(aFilePath) { var font = new Dict(); + var xhr = new XMLHttpRequest(); + xhr.open("GET", aFilePath, false); + xhr.mozResponseType = xhr.responseType = "arraybuffer"; + xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; + xhr.send(null); + this.data = new Stream(xhr.mozResponseArrayBuffer || xhr.mozResponse || + xhr.responseArrayBuffer || xhr.response); + // Turn on this flag for additional debugging logs var debug = false; @@ -340,22 +348,16 @@ var Type2Parser = function(aFilePath) { } }; - -var xhr = new XMLHttpRequest(); -xhr.open("GET", "titi.cff", false); -xhr.mozResponseType = xhr.responseType = "arraybuffer"; -xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200; -xhr.send(null); -var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse || - xhr.responseArrayBuffer || xhr.response; -var cff = new Type2Parser("titi.cff"); -//cff.parse(new Stream(cffData)); +/* +var cff = new Type2Parser("test.cff"); +cff.parse(); +*/ /** * Write to a file (works only on Firefox in privilege mode"); */ - function writeToFile(aBytes, aFilePath) { +function writeToFile(aBytes, aFilePath) { netscape.security.PrivilegeManager.enablePrivilege("UniversalXPConnect"); var Cc = Components.classes, Ci = Components.interfaces; @@ -366,10 +368,10 @@ var cff = new Type2Parser("titi.cff"); .createInstance(Ci.nsIFileOutputStream); stream.init(file, 0x04 | 0x08 | 0x20, 0600, 0); - var bos = Cc["@mozilla.org/binaryoutputstream;1"] - .createInstance(Ci.nsIBinaryOutputStream); + var bos = Cc["@mozilla.org/binaryoutputstream;1"] + .createInstance(Ci.nsIBinaryOutputStream); bos.setOutputStream(stream); bos.writeByteArray(aBytes, aBytes.length); stream.close(); - }; +}; diff --git a/cffStandardStrings.js b/cffStandardStrings.js index 585c1157e..8977cd8f2 100644 --- a/cffStandardStrings.js +++ b/cffStandardStrings.js @@ -686,3 +686,4 @@ var CFFDictPrivateDataMap = { operand: 0 } }; + diff --git a/test.html b/test.html index 5d0b15403..6b37f222f 100644 --- a/test.html +++ b/test.html @@ -7,7 +7,6 @@ - From e0a6c8ef76f8132ed8858989548d075d18c7490d Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Sat, 21 May 2011 13:40:28 +0800 Subject: [PATCH 42/72] use const instead of var --- pdf.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pdf.js b/pdf.js index 06928ca09..37c93a4ad 100644 --- a/pdf.js +++ b/pdf.js @@ -2130,10 +2130,10 @@ var CanvasGraphics = (function() { }; } - var LINE_CAP_STYLES = [ "butt", "round", "square" ]; - var LINE_JOIN_STYLES = [ "miter", "round", "bevel" ]; - var NORMAL_CLIP = {}; - var EO_CLIP = {}; + const LINE_CAP_STYLES = [ "butt", "round", "square" ]; + const LINE_JOIN_STYLES = [ "miter", "round", "bevel" ]; + const NORMAL_CLIP = {}; + const EO_CLIP = {}; constructor.prototype = { beginDrawing: function(mediaBox) { From 83e100b0133912531166ce524bad1769e36e34ff Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Fri, 3 Jun 2011 01:57:06 +0800 Subject: [PATCH 43/72] starting position of a stream is relative to the file, not the current stream --- pdf.js | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pdf.js b/pdf.js index 37c93a4ad..8d55ff2ac 100644 --- a/pdf.js +++ b/pdf.js @@ -48,26 +48,24 @@ function shadow(obj, prop, value) { } var Stream = (function() { - function constructor(arrayBuffer, dict) { + function constructor(arrayBuffer, start, length, dict) { this.bytes = new Uint8Array(arrayBuffer); - this.pos = 0; - this.start = 0; + this.start = start || 0; + this.pos = this.start; + this.length = (start + length) || arrayBuffer.byteLength; this.dict = dict; } constructor.prototype = { - get length() { - return this.bytes.length; - }, getByte: function() { var bytes = this.bytes; - if (this.pos >= bytes.length) + if (this.pos >= this.length) return -1; return bytes[this.pos++]; }, lookChar: function() { var bytes = this.bytes; - if (this.pos >= bytes.length) + if (this.pos >= this.length) return; return String.fromCharCode(bytes[this.pos]); }, @@ -89,11 +87,8 @@ var Stream = (function() { moveStart: function() { this.start = this.pos; }, - makeSubStream: function(pos, length, dict) { - var buffer = this.bytes.buffer; - if (length) - return new Stream(new Uint8Array(buffer, pos, length), dict); - return new Stream(new Uint8Array(buffer, pos), dict); + makeSubStream: function(start, length, dict) { + return new Stream(this.bytes.buffer, start, length, dict); } }; From 2c4a0aa26971ef5da62c35023acdaf08ff9cb1bc Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 07:06:51 +0200 Subject: [PATCH 44/72] Undo last patch, it broke font rendering --- pdf.js | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pdf.js b/pdf.js index 8d55ff2ac..37c93a4ad 100644 --- a/pdf.js +++ b/pdf.js @@ -48,24 +48,26 @@ function shadow(obj, prop, value) { } var Stream = (function() { - function constructor(arrayBuffer, start, length, dict) { + function constructor(arrayBuffer, dict) { this.bytes = new Uint8Array(arrayBuffer); - this.start = start || 0; - this.pos = this.start; - this.length = (start + length) || arrayBuffer.byteLength; + this.pos = 0; + this.start = 0; this.dict = dict; } constructor.prototype = { + get length() { + return this.bytes.length; + }, getByte: function() { var bytes = this.bytes; - if (this.pos >= this.length) + if (this.pos >= bytes.length) return -1; return bytes[this.pos++]; }, lookChar: function() { var bytes = this.bytes; - if (this.pos >= this.length) + if (this.pos >= bytes.length) return; return String.fromCharCode(bytes[this.pos]); }, @@ -87,8 +89,11 @@ var Stream = (function() { moveStart: function() { this.start = this.pos; }, - makeSubStream: function(start, length, dict) { - return new Stream(this.bytes.buffer, start, length, dict); + makeSubStream: function(pos, length, dict) { + var buffer = this.bytes.buffer; + if (length) + return new Stream(new Uint8Array(buffer, pos, length), dict); + return new Stream(new Uint8Array(buffer, pos), dict); } }; From 2dc7bda2bc3bce77e5bc818cdb65529988606a05 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 18:59:46 +0200 Subject: [PATCH 45/72] Merge the stream handling changes with the Font code --- PDFFont.js | 36 ++++++++++++++---------------------- pdf.js | 21 ++++++++------------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 7abd9aa65..39d27606b 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -20,7 +20,6 @@ var kMaxGlyphsCount = 1024; */ var Fonts = {}; - /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -44,10 +43,6 @@ var Font = function(aFontName, aFontFile, aFontType) { var start = Date.now(); switch (aFontType) { case "Type1": - // All Type1 font program should begin with the comment %! - if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) - error("Invalid file header"); - var cff = new CFF(aFontName, aFontFile); this.mimetype = "font/otf"; @@ -569,7 +564,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var decryptedString = []; var value = ""; - var count = aStream.length; + var count = aStream.length - aStream.start; for (var i = 0; i < count; i++) { value = aStream.getByte(); decryptedString[i] = String.fromCharCode(value ^ (r >> 8)); @@ -949,7 +944,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { // and start interpreting it in order to decode it var file = operandStack.pop(); var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); - dump(eexecString); lexer = new Lexer(new StringStream(eexecString)); break; @@ -989,13 +983,12 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var file = operandStack.pop(); // Add '1' because of the space separator, this is dirty - var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size); + var stream = lexer.stream.makeSubStream(lexer.stream.start + lexer.stream.pos + 1, size); lexer.stream.skip(size + 1); var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); var charStream = new StringStream(charString); var decodedCharString = decodeCharString(charStream); - dump("decodedCharString: " + decodedCharString); operandStack.push(decodedCharString); // boolean indicating if the operation is a success or not @@ -1144,22 +1137,21 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { }; -var fontCount = 0; var CFF = function(aFontName, aFontFile) { - if (!fontCount || true) { - fontCount++; - var start = Date.now(); + var start = Date.now(); - var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); - var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict); + var length1 = aFontFile.dict.get("Length1"); + var length2 = aFontFile.dict.get("Length2"); + var pos = aFontFile.pos; + var ASCIIStream = aFontFile.makeSubStream(pos, length1, aFontFile.dict); + var binaryStream = aFontFile.makeSubStream(pos + length1, length2, aFontFile.dict); - this.parser = new Type1Parser(ASCIIStream, binaryStream); - var fontName = this.parser.parse(); - this.font = PSFonts.get(fontName); - this.data = this.convertToCFF(this.font); - var end = Date.now(); - //log("Time to parse font is:" + (end - start)); - } + this.parser = new Type1Parser(ASCIIStream, binaryStream); + var fontName = this.parser.parse(); + this.font = PSFonts.get(fontName); + this.data = this.convertToCFF(this.font); + var end = Date.now(); + //log("Time to parse font is:" + (end - start)); }; CFF.prototype = { diff --git a/pdf.js b/pdf.js index 37c93a4ad..8d55ff2ac 100644 --- a/pdf.js +++ b/pdf.js @@ -48,26 +48,24 @@ function shadow(obj, prop, value) { } var Stream = (function() { - function constructor(arrayBuffer, dict) { + function constructor(arrayBuffer, start, length, dict) { this.bytes = new Uint8Array(arrayBuffer); - this.pos = 0; - this.start = 0; + this.start = start || 0; + this.pos = this.start; + this.length = (start + length) || arrayBuffer.byteLength; this.dict = dict; } constructor.prototype = { - get length() { - return this.bytes.length; - }, getByte: function() { var bytes = this.bytes; - if (this.pos >= bytes.length) + if (this.pos >= this.length) return -1; return bytes[this.pos++]; }, lookChar: function() { var bytes = this.bytes; - if (this.pos >= bytes.length) + if (this.pos >= this.length) return; return String.fromCharCode(bytes[this.pos]); }, @@ -89,11 +87,8 @@ var Stream = (function() { moveStart: function() { this.start = this.pos; }, - makeSubStream: function(pos, length, dict) { - var buffer = this.bytes.buffer; - if (length) - return new Stream(new Uint8Array(buffer, pos, length), dict); - return new Stream(new Uint8Array(buffer, pos), dict); + makeSubStream: function(start, length, dict) { + return new Stream(this.bytes.buffer, start, length, dict); } }; From b7449f3bccdbeb5b062b93c414d85daeccc0febb Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 13 Jun 2011 23:23:13 +0200 Subject: [PATCH 46/72] Do not use makeSubStream in the CFF constructor --- PDFFont.js | 6 +++--- pdf.js | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 9318fd3a1..34fb28fc4 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1142,9 +1142,9 @@ var CFF = function(aFontName, aFontFile) { var length1 = aFontFile.dict.get("Length1"); var length2 = aFontFile.dict.get("Length2"); - var pos = aFontFile.pos; - var ASCIIStream = aFontFile.makeSubStream(pos, length1, aFontFile.dict); - var binaryStream = aFontFile.makeSubStream(pos + length1, length2, aFontFile.dict); + + var ASCIIStream = new Stream(aFontFile.getBytes(length1)); + var binaryStream = new Stream(aFontFile.getBytes(length2)); this.parser = new Type1Parser(ASCIIStream, binaryStream); var fontName = this.parser.parse(); diff --git a/pdf.js b/pdf.js index a9c9bf020..8b06f5582 100644 --- a/pdf.js +++ b/pdf.js @@ -1848,7 +1848,6 @@ var CanvasGraphics = (function() { } this.current.fontSize = size; - TODO("using hard-coded font for testing"); this.ctx.font = this.current.fontSize +'px "' + fontName + '"'; }, moveText: function (x, y) { From 375b38392082f014e7a099888cb60dfc3001bbf2 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 04:35:46 +0200 Subject: [PATCH 47/72] Make the fonts decoding code works with asynchronous data url --- PDFFont.js | 104 +++++++++++++++++++++++++++++++++++++++++++++++++---- pdf.js | 15 +++++++- test.js | 61 ++++++++++++++++++++++++------- 3 files changed, 161 insertions(+), 19 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 34fb28fc4..4e81187b9 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -11,6 +11,16 @@ var kMaxFontFileSize = 40000; */ var kMaxGlyphsCount = 1024; +/** + * Maximum time to wait for a font to be loaded by @font-face + */ +var kMaxWaitForFontFace = 2000; + + /* + * Useful for debugging when you want to certains operations depending on how + * many fonts are loaded. + */ +var fontCount = 0; /** * Hold a map of decoded fonts and of the standard fourteen Type1 fonts and @@ -36,9 +46,10 @@ var Font = function(aFontName, aFontFile, aFontType) { // If the font has already been decoded simply return if (Fonts[aFontName]) { - this.font = Fonts[aFontName]; + this.font = Fonts[aFontName].data; return; } + fontCount++; var start = Date.now(); switch (aFontType) { @@ -62,10 +73,13 @@ var Font = function(aFontName, aFontFile, aFontType) { } var end = Date.now(); + Fonts[aFontName] = { + data: this.font, + loading: true + } + // Attach the font to the document this.bind(); - - Fonts[aFontName] = this.font; }; Font.prototype = { @@ -84,10 +98,90 @@ Font.prototype = { : String.fromCharCode(data[i])); var dataBase64 = window.btoa(str.join("")); + var fontName = this.name; + + /** Hack begin */ + + // Actually there is not event when a font has finished downloading so + // the following tons of code are a dirty hack to 'guess' when a font is + // ready + var debug = false; + + var canvas = document.createElement("canvas"); + var style = "position:absolute; left: " + + (debug ? (100 * fontCount) : "-200") + "px; top: -200px;"; + canvas.setAttribute("style", style); + canvas.setAttribute("width", 100); + canvas.setAttribute("heigth", 100); + document.body.appendChild(canvas); + + // Get the first character of the font + var page = pdfDocument.getPage(pageNum); + var xref = page.xref; + var resources = xref.fetchIfRef(page.resources); + var fontResource = resources.get("Font"); + var charset = ""; + for (var id in fontResource.map) { + var res = xref.fetch(fontResource.get(id)); + var descriptor = xref.fetch(res.get("FontDescriptor")); + var name = descriptor.get("FontName").toString(); + var font = Fonts[name.replace("+", "_")]; + if (font && font.loading && name == fontName.replace("_", "+")) { + charset = descriptor.get("CharSet").split("/"); + break; + } + } + + // Warn if the charset is not found, this is likely a bug! + var testCharset = charset; + if (!charset) { + warn("No charset found for: " + fontName); + } else { + // if the charset is too small make it repeat a few times + var count = 30; + while (count-- && testCharset.length <= 30) + testCharset = testCharset.concat(charset.slice()); + } + + // Get the font size canvas think it will be + var ctx = canvas.getContext("2d"); + var testString = ""; + for (var i = 0; i < testCharset.length; i++) { + var unicode = new Number("0x" + GlyphsUnicode[testCharset[i]]); + if (!unicode) + error("Unicode for " + testCharset[i] + " is has not been found in the glyphs list"); + testString += String.fromCharCode(unicode); + } + ctx.font = "20px " + fontName + ", Symbol"; + var textWidth = ctx.mozMeasureText(testString); + + if (debug) + ctx.fillText(testString, 20, 20); + + var start = Date.now(); + var interval = window.setInterval(function(self) { + ctx.font = "20px " + fontName + ", Symbol"; + + // For some reasons the font has not loaded, so mark it loaded for the + // page to proceed but cry + if ((Date.now() - start) >= kMaxWaitForFontFace) { + window.clearInterval(interval); + Fonts[fontName].loading = false; + warn("Is " + fontName + " for charset: " + charset + " loaded?"); + } else if (textWidth != ctx.mozMeasureText(testString)) { + window.clearInterval(interval); + Fonts[fontName].loading = false; + } + + if (debug) + ctx.fillText(testString, 20, 60); + }, 150, this); + + /** Hack end */ // Add the @font-face rule to the document var url = "url(data:" + this.mimetype + ";base64," + dataBase64 + ");"; - var rule = "@font-face { font-family:'" + this.name + "';src:" + url + "}"; + var rule = "@font-face { font-family:'" + fontName + "';src:" + url + "}"; var styleSheet = document.styleSheets[0]; styleSheet.insertRule(rule, styleSheet.length); }, @@ -473,7 +567,6 @@ var TrueType = function(aFontName, aFontFile) { */ var PSFonts = new Dict(); - var Stack = function(aStackSize) { var innerStack = new Array(aStackSize || 0); @@ -1136,7 +1229,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } }; - var CFF = function(aFontName, aFontFile) { var start = Date.now(); diff --git a/pdf.js b/pdf.js index 8b06f5582..697e6c3ad 100644 --- a/pdf.js +++ b/pdf.js @@ -1399,6 +1399,19 @@ var Page = (function() { ? obj : null)); }, + get fonts() { + var xref = this.xref; + var fonts = []; + + var resources = xref.fetchIfRef(this.resources); + var fontResource = resources.get("Font"); + for (var id in fontResource.map) { + var res = xref.fetch(fontResource.get(id)); + var descriptor = xref.fetch(res.get("FontDescriptor")); + fonts.push(descriptor.get("FontName").toString()); + } + return shadow(this, "fonts", fonts); + }, display: function(gfx) { var xref = this.xref; var contents = xref.fetchIfRef(this.contents); @@ -1843,7 +1856,7 @@ var CanvasGraphics = (function() { var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); if (!fontFile) fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile2")); - fontName = fontDescriptor.get("FontName").name.replace("+", " "); + fontName = fontDescriptor.get("FontName").name.replace("+", "_"); new Font(fontName, fontFile, subtype); } diff --git a/test.js b/test.js index ec784ea62..bd6c812e8 100644 --- a/test.js +++ b/test.js @@ -21,7 +21,6 @@ function queryParams() { return params; } - function open(url) { document.title = url; req = new XMLHttpRequest(); @@ -54,21 +53,59 @@ function displayPage(num) { var page = pdfDocument.getPage(pageNum = num); - var t1 = Date.now(); + function display() { + var t1 = Date.now(); + var ctx = canvas.getContext("2d"); + ctx.save(); + ctx.fillStyle = "rgb(255, 255, 255)"; + ctx.fillRect(0, 0, canvas.width, canvas.height); + ctx.restore(); - var ctx = canvas.getContext("2d"); - ctx.save(); - ctx.fillStyle = "rgb(255, 255, 255)"; - ctx.fillRect(0, 0, canvas.width, canvas.height); - ctx.restore(); + var gfx = new CanvasGraphics(ctx); + page.display(gfx); - var gfx = new CanvasGraphics(ctx); - page.display(gfx); + var t2 = Date.now(); + var infoDisplay = document.getElementById("info"); + infoDisplay.innerHTML = "Time to render: "+ (t1 - t0) + "/" + (t2 - t1) + " ms"; + } - var t2 = Date.now(); + // Loading a font via data uri is asynchronous, so wait for all font + // of the page to be fully loaded before loading the page + var fontsReady = true; + var fonts = page.fonts; + for (var i = 0; i < fonts.length; i++) { + var fontName = fonts[i].replace("+", "_"); + var font = Fonts[fontName]; + if (!font) { + // load the new font + var xref = page.xref; + var resources = xref.fetchIfRef(page.resources); + var fontResource = resources.get("Font"); + for (var id in fontResource.map) { + var res = xref.fetch(fontResource.get(id)); + var descriptor = xref.fetch(res.get("FontDescriptor")); + var name = descriptor.get("FontName").toString(); + if (name == fontName.replace("_", "+")) { + var subtype = res.get("Subtype").name; + var fontFile = page.xref.fetchIfRef(descriptor.get("FontFile")); + if (!fontFile) + fontFile = page.xref.fetchIfRef(descriptor.get("FontFile2")); + new Font(fontName, fontFile, subtype); + fontsReady = false; + break; + } + } + } else if (font.loading) { + fontsReady = false; + break; + } + } - var infoDisplay = document.getElementById("info"); - infoDisplay.innerHTML = "Time to render: "+ (t1 - t0) + "/" + (t2 - t1) + " ms"; + // If everything is ready do not delayed the page loading any more + if (fontsReady) + display(); + else + setTimeout(displayPage, 150, num); } function nextPage() { From f802ad05b2d9f1cd8dd81a9167eb9b87f48dba2d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 04:52:21 +0200 Subject: [PATCH 48/72] Fix an issue with the setTimeout method used for waiting for fonts to load --- test.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test.js b/test.js index bd6c812e8..8c561e8a6 100644 --- a/test.js +++ b/test.js @@ -1,7 +1,7 @@ /* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- / /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ -var pdfDocument, canvas, pageDisplay, pageNum; +var pdfDocument, canvas, pageDisplay, pageNum, pageTimeout; function load() { canvas = document.getElementById("canvas"); canvas.mozOpaque = true; @@ -47,6 +47,9 @@ function gotoPage(num) { } function displayPage(num) { + if (pageNum != num) + window.clearTimeout(pageTimeout); + document.getElementById("pageNumber").value = num; var t0 = Date.now(); @@ -104,8 +107,11 @@ function displayPage(num) { // If everything is ready do not delayed the page loading any more if (fontsReady) display(); - else - setTimeout(displayPage, 150, num); + else { + // FIXME Relying on an event seems much more cleaner here instead + // of a setTimeout... + pageTimeout = window.setTimeout(displayPage, 150, num); + } } function nextPage() { From 4cae2df1e59fe7312a93be04144bc09e0407c9f2 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 11:08:08 +0200 Subject: [PATCH 49/72] Add support for ligatures, asterisks, etc. --- PDFFont.js | 69 ++++++++++++++++++++++++++++++++++-------------------- pdf.js | 8 +++---- test.html | 1 - test.js | 16 ++++++++++++- 4 files changed, 61 insertions(+), 33 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 4e81187b9..509d714fe 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -9,7 +9,7 @@ var kMaxFontFileSize = 40000; /** * Maximum number of glyphs per font. */ -var kMaxGlyphsCount = 1024; +var kMaxGlyphsCount = 65526; /** * Maximum time to wait for a font to be loaded by @font-face @@ -28,33 +28,48 @@ var fontCount = 0; * TODO Add the standard fourteen Type1 fonts list by default * http://cgit.freedesktop.org/poppler/poppler/tree/poppler/GfxFont.cc#n65 */ -var Fonts = {}; +var Fonts = { + _active: null, + get active() { + return this._active; + }, + + set active(aFontName) { + this._active = this[aFontName]; + }, + + getUnicodeFor: function fonts_getUnicodeFor(aCode) { + var glyph = this._active.encoding[aCode]; + var unicode = "0x" + GlyphsUnicode[glyph]; + return unicode || aCode; + } +}; /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). * * For example to read a Type1 font and to attach it to the document: - * var type1Font = new Font("MyFontName", binaryData, "Type1"); + * var type1Font = new Font("MyFontName", binaryData, aFontEncoding, "Type1"); * type1Font.bind(); * * As an improvment the last parameter can be replaced by an automatic guess * of the font type based on the first byte of the file. */ -var Font = function(aFontName, aFontFile, aFontType) { - this.name = aFontName; +var Font = function(aName, aFile, aEncoding, aType) { + this.name = aName; // If the font has already been decoded simply return - if (Fonts[aFontName]) { - this.font = Fonts[aFontName].data; + if (Fonts[aName]) { + this.font = Fonts[aName].data; return; } fontCount++; var start = Date.now(); - switch (aFontType) { + switch (aType) { case "Type1": - var cff = new CFF(aFontName, aFontFile); + var cff = new CFF(aFile); this.mimetype = "font/otf"; // Wrap the CFF data inside an OTF font file @@ -63,18 +78,19 @@ var Font = function(aFontName, aFontFile, aFontType) { case "TrueType": this.mimetype = "font/ttf"; - var ttf = new TrueType(aFontName, aFontFile); + var ttf = new TrueType(aFile); this.font = ttf.data; break; default: - warn("Font " + aFontType + " is not supported"); + warn("Font " + aType + " is not supported"); break; } var end = Date.now(); - Fonts[aFontName] = { + Fonts[aName] = { data: this.font, + encoding: aEncoding, loading: true } @@ -108,11 +124,11 @@ Font.prototype = { var debug = false; var canvas = document.createElement("canvas"); - var style = "position:absolute; left: " + - (debug ? (100 * fontCount) : "-200") + "px; top: -200px;"; + var style = "position:absolute; top: " + + (debug ? (80 * fontCount) : "-200") + "px; left: 100px;"; canvas.setAttribute("style", style); canvas.setAttribute("width", 100); - canvas.setAttribute("heigth", 100); + canvas.setAttribute("heigth", 70); document.body.appendChild(canvas); // Get the first character of the font @@ -125,7 +141,7 @@ Font.prototype = { var res = xref.fetch(fontResource.get(id)); var descriptor = xref.fetch(res.get("FontDescriptor")); var name = descriptor.get("FontName").toString(); - var font = Fonts[name.replace("+", "_")]; + var font = Fonts[name.replace("+", "_")]; if (font && font.loading && name == fontName.replace("_", "+")) { charset = descriptor.get("CharSet").split("/"); break; @@ -174,7 +190,7 @@ Font.prototype = { } if (debug) - ctx.fillText(testString, 20, 60); + ctx.fillText(testString, 20, 50); }, 150, this); /** Hack end */ @@ -246,8 +262,9 @@ Font.prototype = { _createCMAPTable: function font_createCMAPTable(aGlyphs) { var characters = new Array(kMaxGlyphsCount); - for (var i = 0; i < aGlyphs.length; i++) + for (var i = 0; i < aGlyphs.length; i++) { characters[aGlyphs[i].unicode] = i + 1; + } // Separate the glyphs into continuous range of codes, aka segment. var ranges = []; @@ -377,10 +394,10 @@ Font.prototype = { 0x01, 0x02, // yStrikeOutPosition 0x00, 0x00, // sFamilyClass 0x02, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Panose - 0x00, 0x00, 0x00, 0x01, // ulUnicodeRange1 (Bits 0-31) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange2 (Bits 32-63) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange3 (Bits 64-95) - 0x00, 0x00, 0x00, 0x00, // ulUnicodeRange4 (Bits 96-127) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 0-31) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 32-63) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 64-95) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 96-127) 0x2A, 0x32, 0x31, 0x2A, // achVendID 0x00, 0x20, // fsSelection 0x00, 0x2D, // usFirstCharIndex @@ -558,8 +575,8 @@ var FontsUtils = { * (near?) future this class will rewrite the font to ensure it is well formed * and valid in the point of view of the sanitizer. */ -var TrueType = function(aFontName, aFontFile) { - this.data = aFontFile; +var TrueType = function(aFile) { + this.data = aFile; }; /** @@ -1229,7 +1246,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } }; -var CFF = function(aFontName, aFontFile) { +var CFF = function(aFontFile) { var start = Date.now(); var length1 = aFontFile.dict.get("Length1"); @@ -1405,7 +1422,7 @@ CFF.prototype = { var familyName = fontInfo.get("FamilyName"); var weight = fontInfo.get("Weight"); var strings = [version, notice, fullName, - familyName, weight, "asteriskmath"]; + familyName, weight]; var stringsIndex = this.createCFFIndexHeader(strings); var stringsDataLength = stringsIndex.length; diff --git a/pdf.js b/pdf.js index 697e6c3ad..1f42bd6d6 100644 --- a/pdf.js +++ b/pdf.js @@ -798,6 +798,8 @@ var Lexer = (function() { x = (x << 3) + (ch - '0'); } } + + x = Fonts.getUnicodeFor(x); str += String.fromCharCode(x); break; case '\r': @@ -1849,15 +1851,11 @@ var CanvasGraphics = (function() { return; var fontName = ""; - var subtype = font.get("Subtype").name; var fontDescriptor = font.get("FontDescriptor"); if (fontDescriptor.num) { var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); - var fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile")); - if (!fontFile) - fontFile = this.xref.fetchIfRef(fontDescriptor.get("FontFile2")); fontName = fontDescriptor.get("FontName").name.replace("+", "_"); - new Font(fontName, fontFile, subtype); + Fonts.active = fontName; } this.current.fontSize = size; diff --git a/test.html b/test.html index 73ba7d2db..8d649a149 100644 --- a/test.html +++ b/test.html @@ -1,5 +1,4 @@ -<<<<<<< HEAD Simple pdf.js page viewer diff --git a/test.js b/test.js index 8c561e8a6..f2fdcfdf7 100644 --- a/test.js +++ b/test.js @@ -93,7 +93,21 @@ function displayPage(num) { var fontFile = page.xref.fetchIfRef(descriptor.get("FontFile")); if (!fontFile) fontFile = page.xref.fetchIfRef(descriptor.get("FontFile2")); - new Font(fontName, fontFile, subtype); + + // Generate the custom cmap of the font + var encoding = xref.fetch(res.get("Encoding")); + var differences = encoding.get("Differences"); + var encodingMap = {}; + var index = 0; + for (var j = 0; j < differences.length; j++) { + var data = differences[j]; + if (IsNum(data)) + index = data; + else + encodingMap[index++] = data; + } + + new Font(fontName, fontFile, encodingMap, subtype); fontsReady = false; break; } From 6a6753fd5e1068684488f95d9a16e98cc3c9ee80 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 11:59:10 +0200 Subject: [PATCH 50/72] Add support for brackets around the header emails --- PDFFont.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 509d714fe..9c6c620bb 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -38,10 +38,14 @@ var Fonts = { this._active = this[aFontName]; }, - getUnicodeFor: function fonts_getUnicodeFor(aCode) { - var glyph = this._active.encoding[aCode]; - var unicode = "0x" + GlyphsUnicode[glyph]; - return unicode || aCode; + unicodeFromCode: function fonts_unicodeFromCode(aCode) { + var active = this._active; + if (!active) + return aCode; + + var difference = active.encoding[aCode]; + var unicode = GlyphsUnicode[difference]; + return unicode ? "0x" + unicode : aCode; } }; From 6364e7fad5a12ecaa280807d1f793f0118d2ec6d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 19:40:36 +0200 Subject: [PATCH 51/72] Forgot to push function calls changes to get the unicode char from a random code --- pdf.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pdf.js b/pdf.js index 1f42bd6d6..bbfa7186a 100644 --- a/pdf.js +++ b/pdf.js @@ -799,7 +799,7 @@ var Lexer = (function() { } } - x = Fonts.getUnicodeFor(x); + x = Fonts.unicodeFromCode(x); str += String.fromCharCode(x); break; case '\r': @@ -815,7 +815,8 @@ var Lexer = (function() { } break; default: - str += ch; + var unicode = Fonts.unicodeFromCode(ch.charCodeAt(0)); + str += String.fromCharCode(unicode); break; } } while (!done); From bd2e7561003e87a3a2bd4944fdb1127560c189ad Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 21:38:59 +0200 Subject: [PATCH 52/72] Add XObjects fonts to the list of page fonts and has a forEach method to the Dict class --- PDFFont.js | 13 ++++++++++++ pdf.js | 34 +++++++++++++++++++++++-------- test.js | 60 +++++++++++++++++++++++------------------------------- 3 files changed, 64 insertions(+), 43 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 9c6c620bb..56e93c19a 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -580,9 +580,22 @@ var FontsUtils = { * and valid in the point of view of the sanitizer. */ var TrueType = function(aFile) { + var header = this._readOpenTypeHeader(aFile); this.data = aFile; }; +TrueType.prototype = { + _readOpenTypeHeader: function(aFile) { + return { + version: aFile.getBytes(4), + numTables: FontsUtils.bytesToInteger(aFile.getBytes(2)), + searchRange: FontsUtils.bytesToInteger(aFile.getBytes(2)), + entrySelector: FontsUtils.bytesToInteger(aFile.getBytes(2)), + rangeShift: FontsUtils.bytesToInteger(aFile.getBytes(2)) + } + } +}; + /** * This dictionary holds decoded fonts data. */ diff --git a/pdf.js b/pdf.js index bbfa7186a..afcb39fff 100644 --- a/pdf.js +++ b/pdf.js @@ -563,6 +563,10 @@ var Dict = (function() { set: function(key, value) { this.map[key] = value; }, + forEach: function(aCallback) { + for (var key in this.map) + aCallback(key, this.map[key]); + }, toString: function() { var keys = []; for (var key in this.map) @@ -1404,16 +1408,30 @@ var Page = (function() { }, get fonts() { var xref = this.xref; - var fonts = []; - var resources = xref.fetchIfRef(this.resources); - var fontResource = resources.get("Font"); - for (var id in fontResource.map) { - var res = xref.fetch(fontResource.get(id)); - var descriptor = xref.fetch(res.get("FontDescriptor")); - fonts.push(descriptor.get("FontName").toString()); + var fontsDict = new Dict(); + + // Get the fonts use on the page + var fontResources = resources.get("Font"); + fontResources.forEach(function(fontKey, fontData) { + fontsDict.set(fontKey, xref.fetch(fontData)) + }); + + // Get the fonts use on xobjects of the page if any + var xobjs = xref.fetchIfRef(resources.get("XObject")); + if (xobjs) { + xobjs.forEach(function(key, xobj) { + xobj = xref.fetchIfRef(xobj); + assertWellFormed(IsStream(xobj), "XObject should be a stream"); + + var xobjFonts = xobj.dict.get("Resources").get("Font"); + xobjFonts.forEach(function(fontKey, fontData) { + fontsDict.set(fontKey, xref.fetch(fontData)) + }); + }); } - return shadow(this, "fonts", fonts); + + return shadow(this, "fonts", fontsDict); }, display: function(gfx) { var xref = this.xref; diff --git a/test.js b/test.js index f2fdcfdf7..071b2f097 100644 --- a/test.js +++ b/test.js @@ -76,47 +76,37 @@ function displayPage(num) { // of the page to be fully loaded before loading the page var fontsReady = true; var fonts = page.fonts; - for (var i = 0; i < fonts.length; i++) { - var fontName = fonts[i].replace("+", "_"); - var font = Fonts[fontName]; - if (!font) { - // load the new font - var xref = page.xref; - var resources = xref.fetchIfRef(page.resources); - var fontResource = resources.get("Font"); - for (var id in fontResource.map) { - var res = xref.fetch(fontResource.get(id)); - var descriptor = xref.fetch(res.get("FontDescriptor")); - var name = descriptor.get("FontName").toString(); - if (name == fontName.replace("_", "+")) { - var subtype = res.get("Subtype").name; - var fontFile = page.xref.fetchIfRef(descriptor.get("FontFile")); - if (!fontFile) - fontFile = page.xref.fetchIfRef(descriptor.get("FontFile2")); + var xref = page.xref; + fonts.forEach(function(fontKey, fontDict) { + var descriptor = xref.fetch(fontDict.get("FontDescriptor")); + var fontName = descriptor.get("FontName").name; + fontName = fontName.replace("+", "_"); - // Generate the custom cmap of the font - var encoding = xref.fetch(res.get("Encoding")); - var differences = encoding.get("Differences"); + // Check if the font has been loaded or is still loading + var font = Fonts[fontName]; + if (!font) { + var fontFile = xref.fetchIfRef(descriptor.get2("FontFile", "FontFile2")); + // Generate the custom cmap of the font if needed var encodingMap = {}; - var index = 0; - for (var j = 0; j < differences.length; j++) { - var data = differences[j]; - if (IsNum(data)) - index = data; - else - encodingMap[index++] = data; + if (fontDict.has("Encoding")) { + var encoding = xref.fetchIfRef(fontDict.get("Encoding")); + if (IsDict(encoding)) { + var differences = encoding.get("Differences"); + var index = 0; + for (var j = 0; j < differences.length; j++) { + var data = differences[j]; + IsNum(data) ? index = data : encodingMap[index++] = data; + } + } } + var subtype = fontDict.get("Subtype").name; new Font(fontName, fontFile, encodingMap, subtype); - fontsReady = false; - break; - } + return fontsReady = false; + } else if (font.loading) { + return fontsReady = false; } - } else if (font.loading) { - fontsReady = false; - break; - } - } + }); // If everything is ready do not delayed the page loading any more if (fontsReady) From c8c4326ca870de68b56698433198c00ebb418e89 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 14 Jun 2011 21:51:11 +0200 Subject: [PATCH 53/72] Remove direct access to the underlying map object of dict in the fonts builder --- PDFFont.js | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 56e93c19a..58c61e051 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -135,25 +135,25 @@ Font.prototype = { canvas.setAttribute("heigth", 70); document.body.appendChild(canvas); - // Get the first character of the font + // Retrieve font charset + var charset = null; var page = pdfDocument.getPage(pageNum); var xref = page.xref; - var resources = xref.fetchIfRef(page.resources); - var fontResource = resources.get("Font"); - var charset = ""; - for (var id in fontResource.map) { - var res = xref.fetch(fontResource.get(id)); - var descriptor = xref.fetch(res.get("FontDescriptor")); + + var fonts = page.fonts; + fonts.forEach(function(fontKey, fontData) { + var descriptor = xref.fetch(fontData.get("FontDescriptor")); var name = descriptor.get("FontName").toString(); var font = Fonts[name.replace("+", "_")]; if (font && font.loading && name == fontName.replace("_", "+")) { - charset = descriptor.get("CharSet").split("/"); - break; + charset = descriptor.get("CharSet"); + charset = charset ? charset.split("/") : null; + return; } - } + }); - // Warn if the charset is not found, this is likely a bug! - var testCharset = charset; + // Warn if the charset is not found, this is likely + var testCharset = charset || []; if (!charset) { warn("No charset found for: " + fontName); } else { @@ -1358,26 +1358,26 @@ CFF.prototype = { }, getOrderedCharStrings: function(aFont) { - var dict = aFont.get("CharStrings") var charstrings = []; - for (var glyph in dict.map) { + + var glyphs = aFont.get("CharStrings") + glyphs.forEach(function(glyph, glyphData) { var unicode = GlyphsUnicode[glyph]; if (!unicode) { if (glyph != ".notdef") warn(glyph + " does not have an entry in the glyphs unicode dictionary"); - continue; + } else { + var b1 = parseInt("0x" + unicode[0] + unicode[1]); + var b2 = parseInt("0x" + unicode[2] + unicode[3]); + unicode = FontsUtils.bytesToInteger([b1, b2]); + + charstrings.push({ + glyph: glyph, + unicode: unicode, + charstring: glyphData.slice() + }); } - - var b1 = parseInt("0x" + unicode[0] + unicode[1]); - var b2 = parseInt("0x" + unicode[2] + unicode[3]); - unicode = FontsUtils.bytesToInteger([b1, b2]); - - charstrings.push({ - glyph: glyph, - unicode: unicode, - charstring: dict.map[glyph].slice() - }); - } + }); charstrings.sort(function(a, b) { return a.unicode > b.unicode; From 1dcd42b66c3dd3fadcc4fca66f4d8908f26d5645 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 15 Jun 2011 04:46:48 +0200 Subject: [PATCH 54/72] Add a basic support for TrueType (generate fonts with OS/2 table) --- PDFFont.js | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++- pdf.js | 18 +++-- test.js | 4 +- 3 files changed, 246 insertions(+), 9 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 58c61e051..fd7e0d86f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -581,11 +581,213 @@ var FontsUtils = { */ var TrueType = function(aFile) { var header = this._readOpenTypeHeader(aFile); - this.data = aFile; + var numTables = header.numTables; + + // Check that required tables are present + var requiredTables = [ + "OS/2", + "cmap", + "head", + "hhea", + "hmtx", + "maxp", + "name", + "post" + ]; + + var tables = []; + for (var i = 0; i < numTables; i++) { + var table = this._readTableEntry(aFile); + var index = requiredTables.indexOf(table.tag); + if (index != -1) + requiredTables.splice(index, 1); + + tables.push(table); + } + tables.sort(function(a, b) { + return a.tag > b.tag; + }); + + // If any tables are still in the array this means some required tables are + // missing, which means that we need to rebuild the font in order to pass + // the sanitizer. + if (requiredTables.length && requiredTables[0] == "OS/2") { + OS2 = [ + 0x00, 0x03, // version + 0x02, 0x24, // xAvgCharWidth + 0x01, 0xF4, // usWeightClass + 0x00, 0x05, // usWidthClass + 0x00, 0x00, // fstype + 0x02, 0x8A, // ySubscriptXSize + 0x02, 0xBB, // ySubscriptYSize + 0x00, 0x00, // ySubscriptXOffset + 0x00, 0x8C, // ySubscriptYOffset + 0x02, 0x8A, // ySuperScriptXSize + 0x02, 0xBB, // ySuperScriptYSize + 0x00, 0x00, // ySuperScriptXOffset + 0x01, 0xDF, // ySuperScriptYOffset + 0x00, 0x31, // yStrikeOutSize + 0x01, 0x02, // yStrikeOutPosition + 0x00, 0x00, // sFamilyClass + 0x02, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Panose + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 0-31) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 32-63) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 64-95) + 0xFF, 0xFF, 0xFF, 0xFF, // ulUnicodeRange1 (Bits 96-127) + 0x2A, 0x32, 0x31, 0x2A, // achVendID + 0x00, 0x20, // fsSelection + 0x00, 0x2D, // usFirstCharIndex + 0x00, 0x7A, // usLastCharIndex + 0x00, 0x03, // sTypoAscender + 0x00, 0x20, // sTypeDescender + 0x00, 0x38, // sTypoLineGap + 0x00, 0x5A, // usWinAscent + 0x02, 0xB4, // usWinDescent + 0x00, 0xCE, 0x00, 0x00, // ulCodePageRange1 (Bits 0-31) + 0x00, 0x01, 0x00, 0x00, // ulCodePageRange2 (Bits 32-63) + 0x00, 0x00, // sxHeight + 0x00, 0x00, // sCapHeight + 0x00, 0x01, // usDefaultChar + 0x00, 0xCD, // usBreakChar + 0x00, 0x02 // usMaxContext + ]; + + // Create a new file to hold the new version of our truetype with a new + // header and new offsets + var stream = aFile.stream || aFile; + var ttf = new Uint8Array(stream.length + 16 + OS2.length); + + // The new numbers of tables will be the last one plus the num of missing + // tables + var numTables = header.numTables + 1; + + // The offsets object holds at the same time a representation of where + // to write the table entry information about a table and another offset + // representing the offset where to draw the actual data of a particular + // table + var offsets = { + currentOffset: 0, + virtualOffset: numTables * (4 * 4) + }; + + // Write the sfnt header with one more table + this._createOpenTypeHeader(ttf, offsets, numTables); + + // Insert the missing table + tables.unshift({ + tag: "OS/2", + data: OS2 + }); + + // rewrite the tables but tweak offsets + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; + var data = []; + + var tableData = table.data; + for (var j = 0; j < tableData.length; j++) + data.push(tableData[j]); + this._createTableEntry(ttf, offsets, table.tag, data); + } + + // Add the table datas + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; + var tableData = table.data; + ttf.set(tableData, offsets.currentOffset); + offsets.currentOffset += tableData.length; + + if (0) { + var data = []; + for (var j = 0; j < tableData.length; j++) + d.push(tableData[j]); + log("data for table: " + table.tag + ": " + data); + } + + // 4-byte aligned data + while (offsets.currentOffset & 3) + offsets.currentOffset++; + } + + var fontData = []; + for (var i = 0; i < ttf.length; i++) + fontData.push(ttf[i]); + + this.data = ttf; + //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".ttf"); + return; + } else if (requiredTables.lenght) { + error("Table " + requiredTables[0] + " is missing from the TruType font"); + } else { + this.data = aFile; + } }; TrueType.prototype = { - _readOpenTypeHeader: function(aFile) { + _createOpenTypeHeader: function tt_createOpenTypeHeader(aFile, aOffsets, aNumTables) { + // sfnt version (4 bytes) + // XXX if we want to merge this function and the one from the Font class + // XXX this need to be adapted + var version = [0x00, 0x01, 0x00, 0X00]; + + // numTables (2 bytes) + var numTables = aNumTables; + + // searchRange (2 bytes) + var tablesMaxPower2 = FontsUtils.getMaxPower2(numTables); + var searchRange = tablesMaxPower2 * 16; + + // entrySelector (2 bytes) + var entrySelector = Math.log(tablesMaxPower2) / Math.log(2); + + // rangeShift (2 bytes) + var rangeShift = numTables * 16 - searchRange; + + var header = [].concat(version, + FontsUtils.integerToBytes(numTables, 2), + FontsUtils.integerToBytes(searchRange, 2), + FontsUtils.integerToBytes(entrySelector, 2), + FontsUtils.integerToBytes(rangeShift, 2)); + aFile.set(header, aOffsets.currentOffset); + aOffsets.currentOffset += header.length; + aOffsets.virtualOffset += header.length; + }, + + _createTableEntry: function font_createTableEntry(aFile, aOffsets, aTag, aData) { + // tag + var tag = [ + aTag.charCodeAt(0), + aTag.charCodeAt(1), + aTag.charCodeAt(2), + aTag.charCodeAt(3) + ]; + + // Per spec tables must be 4-bytes align so add some 0x00 if needed + while (aData.length & 3) + aData.push(0x00); + + while (aOffsets.virtualOffset & 3) + aOffsets.virtualOffset++; + + // offset + var offset = aOffsets.virtualOffset; + + // length + var length = aData.length; + + // checksum + var checksum = FontsUtils.bytesToInteger(tag) + offset + length; + + var tableEntry = [].concat(tag, + FontsUtils.integerToBytes(checksum, 4), + FontsUtils.integerToBytes(offset, 4), + FontsUtils.integerToBytes(length, 4)); + aFile.set(tableEntry, aOffsets.currentOffset); + aOffsets.currentOffset += tableEntry.length; + aOffsets.virtualOffset += aData.length; + }, + + _readOpenTypeHeader: function tt_readOpenTypeHeader(aFile) { return { version: aFile.getBytes(4), numTables: FontsUtils.bytesToInteger(aFile.getBytes(2)), @@ -593,6 +795,33 @@ TrueType.prototype = { entrySelector: FontsUtils.bytesToInteger(aFile.getBytes(2)), rangeShift: FontsUtils.bytesToInteger(aFile.getBytes(2)) } + }, + + _readTableEntry: function tt_readTableEntry(aFile) { + // tag + var tag = aFile.getBytes(4); + tag = String.fromCharCode(tag[0]) + + String.fromCharCode(tag[1]) + + String.fromCharCode(tag[2]) + + String.fromCharCode(tag[3]); + + var checksum = FontsUtils.bytesToInteger(aFile.getBytes(4)); + var offset = FontsUtils.bytesToInteger(aFile.getBytes(4)); + var length = FontsUtils.bytesToInteger(aFile.getBytes(4)); + + // Read the table associated data + var currentPosition = aFile.pos; + aFile.pos = aFile.start + offset; + var data = aFile.getBytes(length); + aFile.pos = currentPosition; + + return { + tag: tag, + checksum: checksum, + length: offset, + offset: length, + data: data + } } }; diff --git a/pdf.js b/pdf.js index afcb39fff..f46362f7f 100644 --- a/pdf.js +++ b/pdf.js @@ -52,7 +52,7 @@ var Stream = (function() { this.bytes = new Uint8Array(arrayBuffer); this.start = start || 0; this.pos = this.start; - this.end = (start + length) || arrayBuffer.byteLength; + this.end = (start + length) || this.bytes.byteLength; this.dict = dict; } @@ -1411,11 +1411,13 @@ var Page = (function() { var resources = xref.fetchIfRef(this.resources); var fontsDict = new Dict(); - // Get the fonts use on the page + // Get the fonts use on the page if any var fontResources = resources.get("Font"); - fontResources.forEach(function(fontKey, fontData) { - fontsDict.set(fontKey, xref.fetch(fontData)) - }); + if (IsDict(fontResources)) { + fontResources.forEach(function(fontKey, fontData) { + fontsDict.set(fontKey, xref.fetch(fontData)) + }); + } // Get the fonts use on xobjects of the page if any var xobjs = xref.fetchIfRef(resources.get("XObject")); @@ -1864,7 +1866,11 @@ var CanvasGraphics = (function() { this.current.leading = leading; }, setFont: function(fontRef, size) { - var font = this.res.get("Font").get(fontRef.name); + var font = this.res.get("Font"); + if (!IsDict(font)) + return; + + font = font.get(fontRef.name); font = this.xref.fetchIfRef(font); if (!font) return; diff --git a/test.js b/test.js index 071b2f097..2b99dc136 100644 --- a/test.js +++ b/test.js @@ -85,7 +85,9 @@ function displayPage(num) { // Check if the font has been loaded or is still loading var font = Fonts[fontName]; if (!font) { - var fontFile = xref.fetchIfRef(descriptor.get2("FontFile", "FontFile2")); + var fontFile = descriptor.get2("FontFile", "FontFile2"); + fontFile = xref.fetchIfRef(fontFile); + // Generate the custom cmap of the font if needed var encodingMap = {}; if (fontDict.has("Encoding")) { From f7e90f569c989fc5e317a242df3a91d68eb515a2 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 15 Jun 2011 05:40:54 +0200 Subject: [PATCH 55/72] Add Encodings.js and change the code to generate a CharSet per font, this will allow future changes to the OpenType font generator --- Encodings.js | 1552 ++++++++++++++++++++++++++++++++++++++++++++++++++ PDFFont.js | 46 +- test.html | 1 + test.js | 19 +- 4 files changed, 1584 insertions(+), 34 deletions(-) create mode 100644 Encodings.js diff --git a/Encodings.js b/Encodings.js new file mode 100644 index 000000000..d32a399c8 --- /dev/null +++ b/Encodings.js @@ -0,0 +1,1552 @@ + +var Encodings = { + ExpertEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "exclamsmall", + "Hungarumlautsmall", + null, + "dollaroldstyle", + "dollarsuperior", + "ampersandsmall", + "Acutesmall", + "parenleftsuperior", + "parenrightsuperior", + "twodotenleader", + "onedotenleader", + "comma", + "hyphen", + "period", + "fraction", + "zerooldstyle", + "oneoldstyle", + "twooldstyle", + "threeoldstyle", + "fouroldstyle", + "fiveoldstyle", + "sixoldstyle", + "sevenoldstyle", + "eightoldstyle", + "nineoldstyle", + "colon", + "semicolon", + "commasuperior", + "threequartersemdash", + "periodsuperior", + "questionsmall", + null, + "asuperior", + "bsuperior", + "centsuperior", + "dsuperior", + "esuperior", + null, + null, + null, + "isuperior", + null, + null, + "lsuperior", + "msuperior", + "nsuperior", + "osuperior", + null, + null, + "rsuperior", + "ssuperior", + "tsuperior", + null, + "ff", + "fi", + "fl", + "ffi", + "ffl", + "parenleftinferior", + null, + "parenrightinferior", + "Circumflexsmall", + "hyphensuperior", + "Gravesmall", + "Asmall", + "Bsmall", + "Csmall", + "Dsmall", + "Esmall", + "Fsmall", + "Gsmall", + "Hsmall", + "Ismall", + "Jsmall", + "Ksmall", + "Lsmall", + "Msmall", + "Nsmall", + "Osmall", + "Psmall", + "Qsmall", + "Rsmall", + "Ssmall", + "Tsmall", + "Usmall", + "Vsmall", + "Wsmall", + "Xsmall", + "Ysmall", + "Zsmall", + "colonmonetary", + "onefitted", + "rupiah", + "Tildesmall", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "exclamdownsmall", + "centoldstyle", + "Lslashsmall", + null, + null, + "Scaronsmall", + "Zcaronsmall", + "Dieresissmall", + "Brevesmall", + "Caronsmall", + null, + "Dotaccentsmall", + null, + null, + "Macronsmall", + null, + null, + "figuredash", + "hypheninferior", + null, + null, + "Ogoneksmall", + "Ringsmall", + "Cedillasmall", + null, + null, + null, + "onequarter", + "onehalf", + "threequarters", + "questiondownsmall", + "oneeighth", + "threeeighths", + "fiveeighths", + "seveneighths", + "onethird", + "twothirds", + null, + null, + "zerosuperior", + "onesuperior", + "twosuperior", + "threesuperior", + "foursuperior", + "fivesuperior", + "sixsuperior", + "sevensuperior", + "eightsuperior", + "ninesuperior", + "zeroinferior", + "oneinferior", + "twoinferior", + "threeinferior", + "fourinferior", + "fiveinferior", + "sixinferior", + "seveninferior", + "eightinferior", + "nineinferior", + "centinferior", + "dollarinferior", + "periodinferior", + "commainferior", + "Agravesmall", + "Aacutesmall", + "Acircumflexsmall", + "Atildesmall", + "Adieresissmall", + "Aringsmall", + "AEsmall", + "Ccedillasmall", + "Egravesmall", + "Eacutesmall", + "Ecircumflexsmall", + "Edieresissmall", + "Igravesmall", + "Iacutesmall", + "Icircumflexsmall", + "Idieresissmall", + "Ethsmall", + "Ntildesmall", + "Ogravesmall", + "Oacutesmall", + "Ocircumflexsmall", + "Otildesmall", + "Odieresissmall", + "OEsmall", + "Oslashsmall", + "Ugravesmall", + "Uacutesmall", + "Ucircumflexsmall", + "Udieresissmall", + "Yacutesmall", + "Thornsmall", + "Ydieresissmall" + ], + MacExpertEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "exclamsmall", + "Hungarumlautsmall", + "centoldstyle", + "dollaroldstyle", + "dollarsuperior", + "ampersandsmall", + "Acutesmall", + "parenleftsuperior", + "parenrightsuperior", + "twodotenleader", + "onedotenleader", + "comma", + "hyphen", + "period", + "fraction", + "zerooldstyle", + "oneoldstyle", + "twooldstyle", + "threeoldstyle", + "fouroldstyle", + "fiveoldstyle", + "sixoldstyle", + "sevenoldstyle", + "eightoldstyle", + "nineoldstyle", + "colon", + "semicolon", + null, + "threequartersemdash", + null, + "questionsmall", + null, + null, + null, + null, + "Ethsmall", + null, + null, + "onequarter", + "onehalf", + "threequarters", + "oneeighth", + "threeeighths", + "fiveeighths", + "seveneighths", + "onethird", + "twothirds", + null, + null, + null, + null, + null, + null, + "ff", + "fi", + "fl", + "ffi", + "ffl", + "parenleftinferior", + null, + "parenrightinferior", + "Circumflexsmall", + "hypheninferior", + "Gravesmall", + "Asmall", + "Bsmall", + "Csmall", + "Dsmall", + "Esmall", + "Fsmall", + "Gsmall", + "Hsmall", + "Ismall", + "Jsmall", + "Ksmall", + "Lsmall", + "Msmall", + "Nsmall", + "Osmall", + "Psmall", + "Qsmall", + "Rsmall", + "Ssmall", + "Tsmall", + "Usmall", + "Vsmall", + "Wsmall", + "Xsmall", + "Ysmall", + "Zsmall", + "colonmonetary", + "onefitted", + "rupiah", + "Tildesmall", + null, + null, + "asuperior", + "centsuperior", + null, + null, + null, + null, + "Aacutesmall", + "Agravesmall", + "Acircumflexsmall", + "Adieresissmall", + "Atildesmall", + "Aringsmall", + "Ccedillasmall", + "Eacutesmall", + "Egravesmall", + "Ecircumflexsmall", + "Edieresissmall", + "Iacutesmall", + "Igravesmall", + "Icircumflexsmall", + "Idieresissmall", + "Ntildesmall", + "Oacutesmall", + "Ogravesmall", + "Ocircumflexsmall", + "Odieresissmall", + "Otildesmall", + "Uacutesmall", + "Ugravesmall", + "Ucircumflexsmall", + "Udieresissmall", + null, + "eightsuperior", + "fourinferior", + "threeinferior", + "sixinferior", + "eightinferior", + "seveninferior", + "Scaronsmall", + null, + "centinferior", + "twoinferior", + null, + "Dieresissmall", + null, + "Caronsmall", + "osuperior", + "fiveinferior", + null, + "commainferior", + "periodinferior", + "Yacutesmall", + null, + "dollarinferior", + null, + null, + "Thornsmall", + null, + "nineinferior", + "zeroinferior", + "Zcaronsmall", + "AEsmall", + "Oslashsmall", + "questiondownsmall", + "oneinferior", + "Lslashsmall", + null, + null, + null, + null, + null, + null, + "Cedillasmall", + null, + null, + null, + null, + null, + "OEsmall", + "figuredash", + "hyphensuperior", + null, + null, + null, + null, + "exclamdownsmall", + null, + "Ydieresissmall", + null, + "onesuperior", + "twosuperior", + "threesuperior", + "foursuperior", + "fivesuperior", + "sixsuperior", + "sevensuperior", + "ninesuperior", + "zerosuperior", + null, + "esuperior", + "rsuperior", + "tsuperior", + null, + null, + "isuperior", + "ssuperior", + "dsuperior", + null, + null, + null, + null, + null, + "lsuperior", + "Ogoneksmall", + "Brevesmall", + "Macronsmall", + "bsuperior", + "nsuperior", + "msuperior", + "commasuperior", + "periodsuperior", + "Dotaccentsmall", + "Ringsmall", + null, + null, + null, + null + ], + MacRomanEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quotesingle", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "grave", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + null, + "Adieresis", + "Aring", + "Ccedilla", + "Eacute", + "Ntilde", + "Odieresis", + "Udieresis", + "aacute", + "agrave", + "acircumflex", + "adieresis", + "atilde", + "aring", + "ccedilla", + "eacute", + "egrave", + "ecircumflex", + "edieresis", + "iacute", + "igrave", + "icircumflex", + "idieresis", + "ntilde", + "oacute", + "ograve", + "ocircumflex", + "odieresis", + "otilde", + "uacute", + "ugrave", + "ucircumflex", + "udieresis", + "dagger", + "degree", + "cent", + "sterling", + "section", + "bullet", + "paragraph", + "germandbls", + "registered", + "copyright", + "trademark", + "acute", + "dieresis", + "notequal", + "AE", + "Oslash", + "infinity", + "plusminus", + "lessequal", + "greaterequal", + "yen", + "mu", + "partialdiff", + "summation", + "product", + "pi", + "integral", + "ordfeminine", + "ordmasculine", + "Omega", + "ae", + "oslash", + "questiondown", + "exclamdown", + "logicalnot", + "radical", + "florin", + "approxequal", + "Delta", + "guillemotleft", + "guillemotright", + "ellipsis", + "space", + "Agrave", + "Atilde", + "Otilde", + "OE", + "oe", + "endash", + "emdash", + "quotedblleft", + "quotedblright", + "quoteleft", + "quoteright", + "divide", + "lozenge", + "ydieresis", + "Ydieresis", + "fraction", + "currency", + "guilsinglleft", + "guilsinglright", + "fi", + "fl", + "daggerdbl", + "periodcentered", + "quotesinglbase", + "quotedblbase", + "perthousand", + "Acircumflex", + "Ecircumflex", + "Aacute", + "Edieresis", + "Egrave", + "Iacute", + "Icircumflex", + "Idieresis", + "Igrave", + "Oacute", + "Ocircumflex", + "apple", + "Ograve", + "Uacute", + "Ucircumflex", + "Ugrave", + "dotlessi", + "circumflex", + "tilde", + "macron", + "breve", + "dotaccent", + "ring", + "cedilla", + "hungarumlaut", + "ogonek", + "caron" + ], + StandardEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quoteright", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "quoteleft", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "exclamdown", + "cent", + "sterling", + "fraction", + "yen", + "florin", + "section", + "currency", + "quotesingle", + "quotedblleft", + "guillemotleft", + "guilsinglleft", + "guilsinglright", + "fi", + "fl", + null, + "endash", + "dagger", + "daggerdbl", + "periodcentered", + null, + "paragraph", + "bullet", + "quotesinglbase", + "quotedblbase", + "quotedblright", + "guillemotright", + "ellipsis", + "perthousand", + null, + "questiondown", + null, + "grave", + "acute", + "circumflex", + "tilde", + "macron", + "breve", + "dotaccent", + "dieresis", + null, + "ring", + "cedilla", + null, + "hungarumlaut", + "ogonek", + "caron", + "emdash", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "AE", + null, + "ordfeminine", + null, + null, + null, + null, + "Lslash", + "Oslash", + "OE", + "ordmasculine", + null, + null, + null, + null, + null, + "ae", + null, + null, + null, + "dotlessi", + null, + null, + "lslash", + "oslash", + "oe", + "germandbls", + null, + null, + null, + null + ], + WinAnsiEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "exclam", + "quotedbl", + "numbersign", + "dollar", + "percent", + "ampersand", + "quotesingle", + "parenleft", + "parenright", + "asterisk", + "plus", + "comma", + "hyphen", + "period", + "slash", + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "colon", + "semicolon", + "less", + "equal", + "greater", + "question", + "at", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "bracketleft", + "backslash", + "bracketright", + "asciicircum", + "underscore", + "grave", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "braceleft", + "bar", + "braceright", + "asciitilde", + "bullet", + "Euro", + "bullet", + "quotesinglbase", + "florin", + "quotedblbase", + "ellipsis", + "dagger", + "daggerdbl", + "circumflex", + "perthousand", + "Scaron", + "guilsinglleft", + "OE", + "bullet", + "Zcaron", + "bullet", + "bullet", + "quoteleft", + "quoteright", + "quotedblleft", + "quotedblright", + "bullet", + "endash", + "emdash", + "tilde", + "trademark", + "scaron", + "guilsinglright", + "oe", + "bullet", + "zcaron", + "Ydieresis", + "space", + "exclamdown", + "cent", + "sterling", + "currency", + "yen", + "brokenbar", + "section", + "dieresis", + "copyright", + "ordfeminine", + "guillemotleft", + "logicalnot", + "hyphen", + "registered", + "macron", + "degree", + "plusminus", + "twosuperior", + "threesuperior", + "acute", + "mu", + "paragraph", + "periodcentered", + "cedilla", + "onesuperior", + "ordmasculine", + "guillemotright", + "onequarter", + "onehalf", + "threequarters", + "questiondown", + "Agrave", + "Aacute", + "Acircumflex", + "Atilde", + "Adieresis", + "Aring", + "AE", + "Ccedilla", + "Egrave", + "Eacute", + "Ecircumflex", + "Edieresis", + "Igrave", + "Iacute", + "Icircumflex", + "Idieresis", + "Eth", + "Ntilde", + "Ograve", + "Oacute", + "Ocircumflex", + "Otilde", + "Odieresis", + "multiply", + "Oslash", + "Ugrave", + "Uacute", + "Ucircumflex", + "Udieresis", + "Yacute", + "Thorn", + "germandbls", + "agrave", + "aacute", + "acircumflex", + "atilde", + "adieresis", + "aring", + "ae", + "ccedilla", + "egrave", + "eacute", + "ecircumflex", + "edieresis", + "igrave", + "iacute", + "icircumflex", + "idieresis", + "eth", + "ntilde", + "ograve", + "oacute", + "ocircumflex", + "otilde", + "odieresis", + "divide", + "oslash", + "ugrave", + "uacute", + "ucircumflex", + "udieresis", + "yacute", + "thorn", + "ydieresis" + ], + zapfDingbatsEncoding: [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "space", + "a1", + "a2", + "a202", + "a3", + "a4", + "a5", + "a119", + "a118", + "a117", + "a11", + "a12", + "a13", + "a14", + "a15", + "a16", + "a105", + "a17", + "a18", + "a19", + "a20", + "a21", + "a22", + "a23", + "a24", + "a25", + "a26", + "a27", + "a28", + "a6", + "a7", + "a8", + "a9", + "a10", + "a29", + "a30", + "a31", + "a32", + "a33", + "a34", + "a35", + "a36", + "a37", + "a38", + "a39", + "a40", + "a41", + "a42", + "a43", + "a44", + "a45", + "a46", + "a47", + "a48", + "a49", + "a50", + "a51", + "a52", + "a53", + "a54", + "a55", + "a56", + "a57", + "a58", + "a59", + "a60", + "a61", + "a62", + "a63", + "a64", + "a65", + "a66", + "a67", + "a68", + "a69", + "a70", + "a71", + "a72", + "a73", + "a74", + "a203", + "a75", + "a204", + "a76", + "a77", + "a78", + "a79", + "a81", + "a82", + "a83", + "a84", + "a97", + "a98", + "a99", + "a100", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + "a101", + "a102", + "a103", + "a104", + "a106", + "a107", + "a108", + "a112", + "a111", + "a110", + "a109", + "a120", + "a121", + "a122", + "a123", + "a124", + "a125", + "a126", + "a127", + "a128", + "a129", + "a130", + "a131", + "a132", + "a133", + "a134", + "a135", + "a136", + "a137", + "a138", + "a139", + "a140", + "a141", + "a142", + "a143", + "a144", + "a145", + "a146", + "a147", + "a148", + "a149", + "a150", + "a151", + "a152", + "a153", + "a154", + "a155", + "a156", + "a157", + "a158", + "a159", + "a160", + "a161", + "a163", + "a164", + "a196", + "a165", + "a192", + "a166", + "a167", + "a168", + "a169", + "a170", + "a171", + "a172", + "a173", + "a162", + "a174", + "a175", + "a176", + "a177", + "a178", + "a179", + "a193", + "a180", + "a199", + "a181", + "a200", + "a182", + null, + "a201", + "a183", + "a184", + "a197", + "a185", + "a194", + "a198", + "a186", + "a195", + "a187", + "a188", + "a189", + "a190", + "a191", + null + ] +}; + diff --git a/PDFFont.js b/PDFFont.js index fd7e0d86f..ff06c5e4e 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -40,7 +40,7 @@ var Fonts = { unicodeFromCode: function fonts_unicodeFromCode(aCode) { var active = this._active; - if (!active) + if (!active || !active.encoding) return aCode; var difference = active.encoding[aCode]; @@ -60,7 +60,7 @@ var Fonts = { * As an improvment the last parameter can be replaced by an automatic guess * of the font type based on the first byte of the file. */ -var Font = function(aName, aFile, aEncoding, aType) { +var Font = function(aName, aFile, aEncoding, aCharset, aType) { this.name = aName; // If the font has already been decoded simply return @@ -95,6 +95,7 @@ var Font = function(aName, aFile, aEncoding, aType) { Fonts[aName] = { data: this.font, encoding: aEncoding, + charset: aCharset ? aCharset.slice() : null, loading: true } @@ -125,10 +126,10 @@ Font.prototype = { // Actually there is not event when a font has finished downloading so // the following tons of code are a dirty hack to 'guess' when a font is // ready - var debug = false; + var debug = true; var canvas = document.createElement("canvas"); - var style = "position:absolute; top: " + + var style = "border: 1px solid black; position:absolute; top: " + (debug ? (80 * fontCount) : "-200") + "px; left: 100px;"; canvas.setAttribute("style", style); canvas.setAttribute("width", 100); @@ -136,40 +137,19 @@ Font.prototype = { document.body.appendChild(canvas); // Retrieve font charset - var charset = null; - var page = pdfDocument.getPage(pageNum); - var xref = page.xref; - - var fonts = page.fonts; - fonts.forEach(function(fontKey, fontData) { - var descriptor = xref.fetch(fontData.get("FontDescriptor")); - var name = descriptor.get("FontName").toString(); - var font = Fonts[name.replace("+", "_")]; - if (font && font.loading && name == fontName.replace("_", "+")) { - charset = descriptor.get("CharSet"); - charset = charset ? charset.split("/") : null; - return; - } - }); - - // Warn if the charset is not found, this is likely - var testCharset = charset || []; - if (!charset) { - warn("No charset found for: " + fontName); - } else { - // if the charset is too small make it repeat a few times - var count = 30; - while (count-- && testCharset.length <= 30) - testCharset = testCharset.concat(charset.slice()); - } + var charset = Fonts[fontName].charset || []; + // if the charset is too small make it repeat a few times + var count = 30; + while (count-- && charset.length <= 30) + charset = charset.concat(charset.slice()); // Get the font size canvas think it will be var ctx = canvas.getContext("2d"); var testString = ""; - for (var i = 0; i < testCharset.length; i++) { - var unicode = new Number("0x" + GlyphsUnicode[testCharset[i]]); + for (var i = 0; i < charset.length; i++) { + var unicode = new Number("0x" + GlyphsUnicode[charset[i]]); if (!unicode) - error("Unicode for " + testCharset[i] + " is has not been found in the glyphs list"); + error("Unicode for " + charset[i] + " is has not been found in the glyphs list"); testString += String.fromCharCode(unicode); } ctx.font = "20px " + fontName + ", Symbol"; diff --git a/test.html b/test.html index 8d649a149..d18f9599b 100644 --- a/test.html +++ b/test.html @@ -7,6 +7,7 @@ + diff --git a/test.js b/test.js index 2b99dc136..1dffac549 100644 --- a/test.js +++ b/test.js @@ -93,17 +93,34 @@ function displayPage(num) { if (fontDict.has("Encoding")) { var encoding = xref.fetchIfRef(fontDict.get("Encoding")); if (IsDict(encoding)) { + + // Build an map between codes and glyphs var differences = encoding.get("Differences"); var index = 0; for (var j = 0; j < differences.length; j++) { var data = differences[j]; IsNum(data) ? index = data : encodingMap[index++] = data; } + + // Get the font charset + var charset = descriptor.get("CharSet").split("/"); + + } else if (IsName(encoding)) { + var encoding = Encodings[encoding]; + var widths = xref.fetchIfRef(fontDict.get("Widths")); + var firstchar = xref.fetchIfRef(fontDict.get("FirstChar")); + + var charset = []; + for (var j = 0; j < widths.length; j++) { + var index = widths[j]; + if (index) + charset.push(encoding[j + firstchar]); + } } } var subtype = fontDict.get("Subtype").name; - new Font(fontName, fontFile, encodingMap, subtype); + new Font(fontName, fontFile, encodingMap, charset, subtype); return fontsReady = false; } else if (font.loading) { return fontsReady = false; From 73350b1f46d44707ebcc8563dea0da7aa06043f3 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 15 Jun 2011 05:42:15 +0200 Subject: [PATCH 56/72] Forgot to turn off some debug flags --- PDFFont.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PDFFont.js b/PDFFont.js index ff06c5e4e..f20f7e24f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -126,7 +126,7 @@ Font.prototype = { // Actually there is not event when a font has finished downloading so // the following tons of code are a dirty hack to 'guess' when a font is // ready - var debug = true; + var debug = false; var canvas = document.createElement("canvas"); var style = "border: 1px solid black; position:absolute; top: " + From 2519e4f53b02cebbec59f4194288e4e2a7108e8a Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 15 Jun 2011 09:21:59 +0200 Subject: [PATCH 57/72] Make Type1 glyphs use the default width they declare --- PDFFont.js | 68 ++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index f20f7e24f..89dc22d42 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1375,10 +1375,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format', * chapter 8. */ - this.flattenCharstring = function(aCharstring, aDefaultWidth, aSubrs) { + this.flattenCharstring = function(aCharstring, aSubrs) { operandStack.clear(); executionStack.clear(); - executionStack.push(aCharstring); + executionStack.push(aCharstring.slice()); var leftSidebearing = 0; var lastPoint = 0; @@ -1392,24 +1392,13 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { switch (obj) { case "hsbw": var charWidthVector = operandStack.pop(); - leftSidebearing = operandStack.pop(); - - if (charWidthVector != aDefaultWidth) - operandStack.push(charWidthVector - aDefaultWidth); - break; - - case "rmoveto": - var dy = operandStack.pop(); - var dx = operandStack.pop(); + var leftSidebearing = operandStack.pop(); + operandStack.push(charWidthVector); if (leftSidebearing) { - dx += leftSidebearing; - leftSidebearing = 0; + operandStack.push(leftSidebearing); + operandStack.push("hmoveto"); } - - operandStack.push(dx); - operandStack.push(dy); - operandStack.push("rmoveto"); break; case "div": @@ -1445,12 +1434,13 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { break; case "callothersubr": - // XXX need to be improved var index = operandStack.pop(); var count = operandStack.pop(); var data = operandStack.pop(); + // XXX The callothersubr needs to support at least the 3 defaults + // otherSubrs of the spec if (index != 3) - dump("callothersubr for index: " + index); + error("callothersubr for index: " + index); operandStack.push(3); operandStack.push("callothersubr"); break; @@ -1490,25 +1480,6 @@ var CFF = function(aFontFile) { }; CFF.prototype = { - getDefaultWidth: function(aCharstrings) { - var defaultWidth = 0; - var defaultUsedCount = 0; - - var widths = {}; - for (var i = 0; i < aCharstrings.length; i++) { - var width = aCharstrings[i].charstring[1]; - var usedCount = (widths[width] || 0) + 1; - - if (usedCount > defaultUsedCount) { - defaultUsedCount = usedCount; - defaultWidth = width; - } - - widths[width] = usedCount; - } - return parseInt(defaultWidth); - }, - createCFFIndexHeader: function(aObjects, aIsByte) { var data = []; @@ -1602,7 +1573,6 @@ CFF.prototype = { }; var charstrings = this.getOrderedCharStrings(aFont); - var defaultWidth = this.getDefaultWidth(charstrings); var charstringsCount = 0; var charstringsDataLength = 0; @@ -1617,7 +1587,7 @@ CFF.prototype = { error("glyphs already exists!"); glyphsChecker[glyph] = true; - var flattened = parser.flattenCharstring(charstring, defaultWidth, subrs); + var flattened = parser.flattenCharstring(charstring, subrs); glyphs.push(flattened); charstringsCount++; charstringsDataLength += flattened.length; @@ -1712,8 +1682,6 @@ CFF.prototype = { charstringsIndex = charstringsIndex.join(" ").split(" "); // XXX why? - var fontBBox = aFont.get("FontBBox"); - //Top Dict Index var topDictIndex = [ 0x00, 0x01, 0x01, 0x01, 0x30, @@ -1724,6 +1692,7 @@ CFF.prototype = { 248, 31, 4 // Weight ]; + var fontBBox = aFont.get("FontBBox"); for (var i = 0; i < fontBBox.length; i++) topDictIndex = topDictIndex.concat(this.encodeNumber(fontBBox[i])); topDictIndex.push(5) // FontBBox; @@ -1768,19 +1737,22 @@ CFF.prototype = { currentOffset += charstringsIndex.length; // Private Data - var privateData = [ - 248, 136, 20, - 248, 136, 21, + var defaultWidth = this.encodeNumber(0); + var privateData = [].concat( + defaultWidth, [20], + defaultWidth, [21], + [ 119, 159, 248, 97, 159, 247, 87, 159, 6, 30, 10, 3, 150, 37, 255, 12, 9, - 139, 12, 10, - 172, 10, + 139, 12, + 10, 172, 10, 172, 150, 143, 146, 150, 146, 12, 12, 247, 32, 11, 247, 10, 161, 147, 154, 150, 143, 12, 13, 139, 12, 14, 28, 0, 55, 19 - ]; + ]); + privateData = privateData.join(" ").split(" "); cff.set(privateData, currentOffset); currentOffset += privateData.length; From 650ed04a702485e209843bbfe38fc5a0faeed905 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 15 Jun 2011 23:02:30 +0200 Subject: [PATCH 58/72] Get rid of the PostScript interpreter (part 1) --- PDFFont.js | 448 ++++++++++++----------------------------------------- test.js | 4 +- 2 files changed, 105 insertions(+), 347 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 89dc22d42..e54f3ec4b 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -14,7 +14,7 @@ var kMaxGlyphsCount = 65526; /** * Maximum time to wait for a font to be loaded by @font-face */ -var kMaxWaitForFontFace = 2000; +var kMaxWaitForFontFace = 1000; /* * Useful for debugging when you want to certains operations depending on how @@ -59,8 +59,11 @@ var Fonts = { * * As an improvment the last parameter can be replaced by an automatic guess * of the font type based on the first byte of the file. + * + * XXX There is now too many parameters, this should be turned into an + * object containing all the required informations about the font */ -var Font = function(aName, aFile, aEncoding, aCharset, aType) { +var Font = function(aName, aFile, aEncoding, aCharset, aBBox, aType) { this.name = aName; // If the font has already been decoded simply return @@ -73,7 +76,7 @@ var Font = function(aName, aFile, aEncoding, aCharset, aType) { var start = Date.now(); switch (aType) { case "Type1": - var cff = new CFF(aFile); + var cff = new CFF(aName, aBBox, aFile); this.mimetype = "font/otf"; // Wrap the CFF data inside an OTF font file @@ -175,7 +178,7 @@ Font.prototype = { if (debug) ctx.fillText(testString, 20, 50); - }, 150, this); + }, 20, this); /** Hack end */ @@ -402,7 +405,7 @@ Font.prototype = { this._createTableEntry(otf, offsets, "OS/2", OS2); //XXX Getting charstrings here seems wrong since this is another CFF glue - var charstrings = aFont.getOrderedCharStrings(aFont.font); + var charstrings = aFont.getOrderedCharStrings(aFont.glyphs); /** CMAP */ cmap = this._createCMAPTable(charstrings); @@ -851,9 +854,7 @@ var Stack = function(aStackSize) { }; }; -var Type1Parser = function(aAsciiStream, aBinaryStream) { - var lexer = aAsciiStream ? new Lexer(aAsciiStream) : null; - +var Type1Parser = function() { // Turn on this flag for additional debugging logs var debug = false; @@ -862,30 +863,6 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { log(aData); }; - // Hold the fontName as declared inside the /FontName postscript directive - // XXX This is a hack but at the moment I need it to map the name declared - // in the PDF and the name in the PS code. - var fontName = ""; - - /* - * Parse a whole Type1 font stream (from the first segment to the last) - * assuming the 'eexec' block is binary data and fill up the 'Fonts' - * dictionary with the font informations. - */ - var self = this; - this.parse = function() { - if (!debug) { - while (!processNextToken()) {}; - return fontName; - } else { - // debug mode is used to debug postcript processing - setTimeout(function() { - if (!processNextToken()) - self.parse(); - }, 0); - } - }; - /* * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence * of Plaintext Bytes. The function took a key as a parameter which can be @@ -894,7 +871,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var kEexecEncryptionKey = 55665; var kCharStringsEncryptionKey = 4330; - function decrypt(aStream, aKey, aDiscardNumber) { + function decrypt(aStream, aKey, aDiscardNumber, aByteArray) { var start = Date.now(); var r = aKey, c1 = 52845, c2 = 22719; var decryptedString = []; @@ -903,7 +880,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var count = aStream.length; for (var i = 0; i < count; i++) { value = aStream.getByte(); - decryptedString[i] = String.fromCharCode(value ^ (r >> 8)); + if (aByteArray) + decryptedString[i] = value ^ (r >> 8); + else + decryptedString[i] = String.fromCharCode(value ^ (r >> 8)); r = ((value + r) * c1 + c2) & ((1 << 16) - 1); } var end = Date.now(); @@ -1017,7 +997,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { var end = Date.now(); dump("Time to decode charString of length " + count + " is " + (end - start)); return charString; - } + }; /* * The operand stack holds arbitrary PostScript objects that are the operands @@ -1068,305 +1048,76 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { */ function nextInStack() { var currentProcedure = executionStack.peek(); - if (currentProcedure) { - var command = currentProcedure.shift(); - if (!currentProcedure.length) - executionStack.pop(); - return command; - } - - return lexer.getObj(); + var command = currentProcedure.shift(); + if (!currentProcedure.length) + executionStack.pop(); + return command; }; - /* - * Get the next token from the executionStack and process it. - * Actually the function does not process the third segment of a Type1 font - * and end on 'closefile'. - * - * The method thrown an error if it encounters an unknown token. + /** + * Returns an object containing a Subrs array and a CharStrings array + * extracted from and eexec encrypted block of data */ - function processNextToken() { - var obj = nextInStack(); - if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") && - !IsCmd(obj, "]") && !IsCmd(obj, "}")) { - dump("Adding an object: " + obj +" to array " + operandIsArray); - var currentArray = operandStack.peek(); - for (var i = 1; i < operandIsArray; i++) - currentArray = currentArray[currentArray.length - 1]; + this.extractFontInfo = function(aStream) { + var eexecString = decrypt(new Stream(aStream), kEexecEncryptionKey, 4, true); + var subrs = [], glyphs = []; + var inSubrs = inGlyphs = false; + var glyph = ""; - currentArray.push(obj); - } else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) { - dump("Value: " + obj); - operandStack.push(obj); - } else if (IsName(obj)) { - dump("Name: " + obj.name); - operandStack.push(obj.name); - } else if (IsCmd(obj)) { - var command = obj.cmd; - dump(command); + var token = ""; + var index = 0; + var length = 0; - switch (command) { - case "[": - case "{": - dump("Start" + (command == "{" ? " Executable " : " ") + "Array"); - operandIsArray++; - var currentArray = operandStack; - for (var i = 1; i < operandIsArray; i++) - if (currentArray.peek) - currentArray = currentArray.peek(); - else - currentArray = currentArray[currentArray.length - 1]; - currentArray.push([]); - break; + var count = eexecString.length; + var c = ""; + for (var i = 0; i < count; i++) { + var c = eexecString[i]; - case "]": - case "}": - var currentArray = operandStack.peek(); - for (var i = 1; i < operandIsArray; i++) - currentArray = currentArray[currentArray.length - 1]; - dump("End" + (command == "}" ? " Executable " : " ") + "Array: " + currentArray.join(" ")); - operandIsArray--; - break; + if (inSubrs && c == 0x52) { + length = parseInt(length); + var stream = new Stream(eexecString.slice(i + 3, i + 3 + length)); + var encodedSubr = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var subr = decodeCharString(new StringStream(encodedSubr)); - case "if": - var procedure = operandStack.pop(); - var bool = operandStack.pop(); - if (!IsBool(bool)) { - dump("if: " + bool); - // we need to execute things, let be dirty - executionStack.push(bool); - } else { - dump("if ( " + bool + " ) { " + procedure + " }"); - if (bool) - executionStack.push(procedure); - } - break; + subrs.push(subr); + i += 3 + length; + } else if (inGlyphs && c == 0x52) { + length = parseInt(length); + var stream = new Stream(eexecString.slice(i + 3, i + 3 + length)); + var encodedCharstring = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var subr = decodeCharString(new StringStream(encodedCharstring)); - case "ifelse": - var procedure1 = operandStack.pop(); - var procedure2 = operandStack.pop(); - var bool = !!operandStack.pop(); - dump("if ( " + bool + " ) { " + procedure2 + " } else { " + procedure1 + " }"); - executionStack.push(bool ? procedure2 : procedure1); - break; + glyphs.push({ + glyph: glyph, + data: subr + }); + i += 3 + length; + } else if (inGlyphs && c == 0x2F) { + token = ""; + glyph = ""; - case "for": - var procedure = operandStack.pop(); - var limit = operandStack.pop(); - var increment = operandStack.pop(); - var initial = operandStack.pop(); - for (var i = 0; i < limit; i += increment) { - operandStack.push(i); - executionStack.push(procedure.slice()); - } - break; - - case "dup": - dump("duplicate: " + operandStack.peek()); - operandStack.push(operandStack.peek()); - break; - - case "mark": - operandStack.push("mark"); - break; - - case "cleartomark": - var command = ""; - do { - command = operandStack.pop(); - } while (command != "mark"); - break; - - case "put": - var data = operandStack.pop(); - var indexOrKey = operandStack.pop(); - var object = operandStack.pop(); - dump("put " + data + " in " + object + "[" + indexOrKey + "]"); - object.set ? object.set(indexOrKey, data) - : object[indexOrKey] = data; - break; - - case "pop": - operandStack.pop(); - break; - - case "exch": - var operand1 = operandStack.pop(); - var operand2 = operandStack.pop(); - operandStack.push(operand1); - operandStack.push(operand2); - break; - - case "get": - var indexOrKey = operandStack.pop(); - var object = operandStack.pop(); - var data = object.get ? object.get(indexOrKey) : object[indexOrKey]; - dump("get " + object + "[" + indexOrKey + "]: " + data); - operandStack.push(data); - break; - - case "currentdict": - var dict = dictionaryStack.peek(); - operandStack.push(dict); - break; - - case "systemdict": - operandStack.push(systemDict); - break; - - case "readonly": - case "executeonly": - case "noaccess": - // Do nothing for the moment - break; - - case "currentfile": - operandStack.push("currentfile"); - break; - - case "array": - var size = operandStack.pop(); - var array = new Array(size); - operandStack.push(array); - break; - - case "dict": - var size = operandStack.pop(); - var dict = new Dict(size); - operandStack.push(dict); - break; - - case "begin": - dictionaryStack.push(operandStack.pop()); - break; - - case "end": - dictionaryStack.pop(); - break; - - case "def": - var value = operandStack.pop(); - var key = operandStack.pop(); - dump("def: " + key + " = " + value); - dictionaryStack.peek().set(key, value); - break; - - case "definefont": - var font = operandStack.pop(); - var key = operandStack.pop(); - dump("definefont " + font + " with key: " + key); - - // The key will be the identifier to recognize this font - fontName = key; - PSFonts.set(key, font); - - operandStack.push(font); - break; - - case "known": - var name = operandStack.pop(); - var dict = operandStack.pop(); - var data = !!dict.get(name); - dump("known: " + data + " :: " + name + " in dict: " + dict); - operandStack.push(data); - break; - - case "exec": - executionStack.push(operandStack.pop()); - break; - - case "eexec": - // All the first segment data has been read, decrypt the second segment - // and start interpreting it in order to decode it - var file = operandStack.pop(); - var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); - lexer = new Lexer(new StringStream(eexecString)); - break; - - case "LenIV": - error("LenIV: argh! we need to modify the length of discard characters for charStrings"); - break; - - case "closefile": - var file = operandStack.pop(); - return true; - break; - - case "index": - var operands = []; - var size = operandStack.pop(); - for (var i = 0; i < size; i++) - operands.push(operandStack.pop()); - - var newOperand = operandStack.peek(); - - while (operands.length) - operandStack.push(operands.pop()); - - operandStack.push(newOperand); - break; - - case "string": - var size = operandStack.pop(); - var str = (new Array(size + 1)).join(" "); - operandStack.push(str); - break; - - case "readstring": - var str = operandStack.pop(); - var size = str.length; - - var file = operandStack.pop(); - - // Add '1' because of the space separator, this is dirty - var stream = lexer.stream.makeSubStream(lexer.stream.start + lexer.stream.pos + 1, size); - lexer.stream.skip(size + 1); - - var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); - var charStream = new StringStream(charString); - var decodedCharString = decodeCharString(charStream); - operandStack.push(decodedCharString); - - // boolean indicating if the operation is a success or not - operandStack.push(true); - break; - - case "StandardEncoding": - // For some reason the value is considered as a command, maybe it is - // because of the uppercase 'S' - operandStack.push(obj.cmd); - break; - - default: - var command = null; - if (IsCmd(obj)) { - for (var i = 0; i < dictionaryStack.count(); i++) { - if (command = dictionaryStack.get(i).get(obj.cmd)) { - dump("found in dictionnary for " + obj.cmd + " command: " + command); - executionStack.push(command.slice()); - break; - } - } - } - - if (!command) { - log("operandStack: " + operandStack); - log("dictionaryStack: " + dictionaryStack); - log(obj); - error("Unknow command while parsing font"); - } - break; + while ((c = eexecString[++i]) != 0x20 && i < count) + glyph += String.fromCharCode(c); + } else if (c == 0x2F && eexecString[i+1] == 0x53 && !inGlyphs && !inSubrs) { + while ((c = eexecString[++i]) != 0x20) {}; + inSubrs = true; + } else if (c == 0x20) { + index = length; + length = token; + token = ""; + } else if (c == 0x2F && eexecString[i+1] == 0x43 && eexecString[i+2] == 0x68) { + while ((c = eexecString[++i]) != 0x20) {}; + inSubrs = false; + inGlyphs = true; + } else { + token += String.fromCharCode(c); } - } else if (obj) { - dump("unknow: " + obj); - operandStack.push(obj); - } else { // The End! - operandStack.dump(); - return true; } - - return false; - } + return { + subrs: subrs, + charstrings: glyphs + } + }; /* * Flatten the commands by interpreting the postscript code and replacing @@ -1462,19 +1213,25 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) { } }; -var CFF = function(aFontFile) { +var CFF = function(aFontName, aFontBBox, aFontFile) { var start = Date.now(); + // Get the data block containing glyphs and subrs informations var length1 = aFontFile.dict.get("Length1"); var length2 = aFontFile.dict.get("Length2"); + aFontFile.skip(length1); + var eexecBlock = aFontFile.getBytes(length2); - var ASCIIStream = new Stream(aFontFile.getBytes(length1)); - var binaryStream = new Stream(aFontFile.getBytes(length2)); + // Extract informations from it + var parser = new Type1Parser(); + var fontInfo = parser.extractFontInfo(eexecBlock); + fontInfo.name = aFontName; + fontInfo.bbox = aFontBBox; - this.parser = new Type1Parser(ASCIIStream, binaryStream); - var fontName = this.parser.parse(); - this.font = PSFonts.get(fontName); - this.data = this.convertToCFF(this.font); + // XXX + this.glyphs = fontInfo.charstrings; + + this.data = this.convertToCFF(fontInfo); var end = Date.now(); //log("Time to parse font is:" + (end - start)); }; @@ -1537,11 +1294,11 @@ CFF.prototype = { } }, - getOrderedCharStrings: function(aFont) { + getOrderedCharStrings: function(aGlyphs) { var charstrings = []; - var glyphs = aFont.get("CharStrings") - glyphs.forEach(function(glyph, glyphData) { + for (var i = 0; i < aGlyphs.length; i++) { + var glyph = aGlyphs[i].glyph; var unicode = GlyphsUnicode[glyph]; if (!unicode) { if (glyph != ".notdef") @@ -1554,10 +1311,10 @@ CFF.prototype = { charstrings.push({ glyph: glyph, unicode: unicode, - charstring: glyphData.slice() + charstring: aGlyphs[i].data.slice() }); } - }); + }; charstrings.sort(function(a, b) { return a.unicode > b.unicode; @@ -1565,20 +1322,20 @@ CFF.prototype = { return charstrings; }, - convertToCFF: function(aFont) { + convertToCFF: function(aFontInfo) { var debug = false; function dump(aMsg) { if (debug) log(aMsg); }; - var charstrings = this.getOrderedCharStrings(aFont); + var charstrings = this.getOrderedCharStrings(aFontInfo.charstrings); var charstringsCount = 0; var charstringsDataLength = 0; var glyphs = []; var glyphsChecker = {}; - var subrs = aFont.get("Private").get("Subrs"); + var subrs = aFontInfo.subrs; var parser = new Type1Parser(); for (var i = 0; i < charstrings.length; i++) { var charstring = charstrings[i].charstring.slice(); @@ -1604,19 +1361,18 @@ CFF.prototype = { cff.set(header); // Names Index - var nameIndex = this.createCFFIndexHeader([aFont.get("FontName")]); + var nameIndex = this.createCFFIndexHeader([aFontInfo.name]); cff.set(nameIndex, currentOffset); currentOffset += nameIndex.length; // Calculate strings before writing the TopDICT index in order // to calculate correct relative offsets for storing 'charset' // and 'charstrings' data - var fontInfo = aFont.get("FontInfo"); - var version = fontInfo.get("version"); - var notice = fontInfo.get("Notice"); - var fullName = fontInfo.get("FullName"); - var familyName = fontInfo.get("FamilyName"); - var weight = fontInfo.get("Weight"); + var version = ""; + var notice = ""; + var fullName = ""; + var familyName = ""; + var weight = ""; var strings = [version, notice, fullName, familyName, weight]; var stringsIndex = this.createCFFIndexHeader(strings); @@ -1692,7 +1448,7 @@ CFF.prototype = { 248, 31, 4 // Weight ]; - var fontBBox = aFont.get("FontBBox"); + var fontBBox = aFontInfo.bbox; for (var i = 0; i < fontBBox.length; i++) topDictIndex = topDictIndex.concat(this.encodeNumber(fontBBox[i])); topDictIndex.push(5) // FontBBox; diff --git a/test.js b/test.js index 1dffac549..0fdb2aacf 100644 --- a/test.js +++ b/test.js @@ -119,8 +119,10 @@ function displayPage(num) { } } + var fontBBox = descriptor.get("FontBBox"); + var subtype = fontDict.get("Subtype").name; - new Font(fontName, fontFile, encodingMap, charset, subtype); + new Font(fontName, fontFile, encodingMap, charset, fontBBox, subtype); return fontsReady = false; } else if (font.loading) { return fontsReady = false; From 97e8a563232d53aba0680a0febed845462946463 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 16 Jun 2011 01:09:17 +0200 Subject: [PATCH 59/72] Remove some useless JS function calls --- PDFFont.js | 82 +++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 44 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index e54f3ec4b..f052ecf9f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1096,9 +1096,9 @@ var Type1Parser = function() { token = ""; glyph = ""; - while ((c = eexecString[++i]) != 0x20 && i < count) + while ((c = eexecString[++i]) != 0x20) glyph += String.fromCharCode(c); - } else if (c == 0x2F && eexecString[i+1] == 0x53 && !inGlyphs && !inSubrs) { + } else if (!inSubrs && !inGlyphs && c == 0x2F && eexecString[i+1] == 0x53) { while ((c = eexecString[++i]) != 0x20) {}; inSubrs = true; } else if (c == 0x20) { @@ -1127,7 +1127,9 @@ var Type1Parser = function() { * chapter 8. */ this.flattenCharstring = function(aCharstring, aSubrs) { + var operandStackIndex = 0; operandStack.clear(); + executionStack.clear(); executionStack.push(aCharstring.slice()); @@ -1135,48 +1137,13 @@ var Type1Parser = function() { var lastPoint = 0; while (true) { var obj = nextInStack(); - if (IsBool(obj) || IsInt(obj) || IsNum(obj)) { - dump("Value: " + obj); + if (IsInt(obj) || IsBool(obj)) { operandStack.push(obj); - } else if (IsString(obj)) { - dump("String: " + obj); + } else { switch (obj) { - case "hsbw": - var charWidthVector = operandStack.pop(); - var leftSidebearing = operandStack.pop(); - operandStack.push(charWidthVector); - - if (leftSidebearing) { - operandStack.push(leftSidebearing); - operandStack.push("hmoveto"); - } - break; - - case "div": - var num2 = operandStack.pop(); - var num1 = operandStack.pop(); - operandStack.push(num2 / num1); - break; - - case "setcurrentpoint": - case "dotsection": - case "seac": - case "sbw": - error(obj + " parsing is not implemented (yet)"); - break; - - case "closepath": - case "return": - break; - case "vstem3": - case "vstem": - operandStack.push("vstem"); - break; - - case "hstem": case "hstem3": - operandStack.push("hstem"); + operandStack.push(obj.slice(0, 5)); break; case "callsubr": @@ -1196,12 +1163,40 @@ var Type1Parser = function() { operandStack.push("callothersubr"); break; + case "div": + var num2 = operandStack.pop(); + var num1 = operandStack.pop(); + operandStack.push(num2 / num1); + break; + + case "pop": + operandStack.pop(); + break; + + case "closepath": + case "return": + break; + + case "hsbw": + var charWidthVector = operandStack.pop(); + var leftSidebearing = operandStack.pop(); + operandStack.push(charWidthVector); + + if (leftSidebearing) { + operandStack.push(leftSidebearing); + operandStack.push("hmoveto"); + } + break; + case "endchar": operandStack.push("endchar"); return operandStack.clone(); - case "pop": - operandStack.pop(); + case "setcurrentpoint": + case "dotsection": + case "seac": + case "sbw": + error(obj + " parsing is not implemented (yet)"); break; default: @@ -1214,8 +1209,6 @@ var Type1Parser = function() { }; var CFF = function(aFontName, aFontBBox, aFontFile) { - var start = Date.now(); - // Get the data block containing glyphs and subrs informations var length1 = aFontFile.dict.get("Length1"); var length2 = aFontFile.dict.get("Length2"); @@ -1223,6 +1216,7 @@ var CFF = function(aFontName, aFontBBox, aFontFile) { var eexecBlock = aFontFile.getBytes(length2); // Extract informations from it + var start = Date.now(); var parser = new Type1Parser(); var fontInfo = parser.extractFontInfo(eexecBlock); fontInfo.name = aFontName; From 509d608a3a08f2dbfd6e20dd7a8ceb9ccc58db42 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 16 Jun 2011 01:30:47 +0200 Subject: [PATCH 60/72] Use spaces to compare font size to detect when the @font-face rule works, this seems to be more accurate --- PDFFont.js | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index f052ecf9f..a8205753e 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -131,12 +131,20 @@ Font.prototype = { // ready var debug = false; + if (debug) { + var name = document.createElement("font"); + name.setAttribute("style", "position: absolute; left: 20px; top: " + + (100 * fontCount + 60) + "px"); + name.innerHTML = fontName; + document.body.appendChild(name); + } + var canvas = document.createElement("canvas"); - var style = "border: 1px solid black; position:absolute; top: " + - (debug ? (80 * fontCount) : "-200") + "px; left: 100px;"; + var style = "border: 1px solid black; position:absolute; top: " + + (debug ? (100 * fontCount) : "-200") + "px; left: 2px; width: 340px; height: 100px"; canvas.setAttribute("style", style); - canvas.setAttribute("width", 100); - canvas.setAttribute("heigth", 70); + canvas.setAttribute("width", 340); + canvas.setAttribute("heigth", 100); document.body.appendChild(canvas); // Retrieve font charset @@ -146,16 +154,21 @@ Font.prototype = { while (count-- && charset.length <= 30) charset = charset.concat(charset.slice()); - // Get the font size canvas think it will be + // Get the font size canvas think it will be for 'spaces' var ctx = canvas.getContext("2d"); - var testString = ""; - for (var i = 0; i < charset.length; i++) { - var unicode = new Number("0x" + GlyphsUnicode[charset[i]]); - if (!unicode) - error("Unicode for " + charset[i] + " is has not been found in the glyphs list"); - testString += String.fromCharCode(unicode); + var testString = " "; + + // When debugging use the characters provided by the charsets to visually + // see what's happening + if (debug) { + for (var i = 0; i < charset.length; i++) { + var unicode = new Number("0x" + GlyphsUnicode[charset[i]]); + if (!unicode) + error("Unicode for " + charset[i] + " is has not been found in the glyphs list"); + testString += String.fromCharCode(unicode); + } } - ctx.font = "20px " + fontName + ", Symbol"; + ctx.font = "bold italic 20px " + fontName + ", Symbol, Arial"; var textWidth = ctx.mozMeasureText(testString); if (debug) @@ -163,7 +176,7 @@ Font.prototype = { var start = Date.now(); var interval = window.setInterval(function(self) { - ctx.font = "20px " + fontName + ", Symbol"; + ctx.font = "bold italic 20px " + fontName + ", Symbol, Arial"; // For some reasons the font has not loaded, so mark it loaded for the // page to proceed but cry From 1923337bab3dd38e98bf1c66400a6d85d71ab139 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 16 Jun 2011 02:17:45 +0200 Subject: [PATCH 61/72] Change the nominalWidth to not use the format 28,b0,b1 but b0-139 in order to pass the sanitizer and disable TrueType support until it fully works --- PDFFont.js | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index a8205753e..8c3abc7ec 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -31,7 +31,7 @@ var fontCount = 0; var Fonts = { _active: null, get active() { - return this._active; + return this._active || { encoding: {} }; }, set active(aFontName) { @@ -39,12 +39,7 @@ var Fonts = { }, unicodeFromCode: function fonts_unicodeFromCode(aCode) { - var active = this._active; - if (!active || !active.encoding) - return aCode; - - var difference = active.encoding[aCode]; - var unicode = GlyphsUnicode[difference]; + var unicode = GlyphsUnicode[this.active.encoding[aCode]]; return unicode ? "0x" + unicode : aCode; } }; @@ -84,6 +79,15 @@ var Font = function(aName, aFile, aEncoding, aCharset, aBBox, aType) { break; case "TrueType": + return Fonts[aName] = { + data: null, + encoding: {}, + charset: null, + loading: false + }; + + // TrueType is disabled for the moment since the sanitizer prevent it + // from loading this.mimetype = "font/ttf"; var ttf = new TrueType(aFile); this.font = ttf.data; @@ -1140,9 +1144,7 @@ var Type1Parser = function() { * chapter 8. */ this.flattenCharstring = function(aCharstring, aSubrs) { - var operandStackIndex = 0; operandStack.clear(); - executionStack.clear(); executionStack.push(aCharstring.slice()); @@ -1434,8 +1436,7 @@ CFF.prototype = { log("Glyph " + i + " has a wrong value: " + c + " in charstring: " + data); log("the default value is glyph " + charstrings[i].glyph + " and is supposed to be: " + charstrings[i].charstring); } - for (var k = 0; k < bytes.length; k++) - charstring.push(bytes[k]); + charstring = charstring.concat(bytes); } } r.push(charstring); @@ -1503,7 +1504,7 @@ CFF.prototype = { var defaultWidth = this.encodeNumber(0); var privateData = [].concat( defaultWidth, [20], - defaultWidth, [21], + [139, 21], // nominalWidth [ 119, 159, 248, 97, 159, 247, 87, 159, 6, 30, 10, 3, 150, 37, 255, 12, 9, From c9e0b056782a306da7f980db00388e73e0798572 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 16 Jun 2011 03:55:45 +0200 Subject: [PATCH 62/72] Resolve the char->glyphs mapping issue --- PDFFont.js | 2 +- pdf.js | 15 +++++++++------ test.js | 19 +++++++++++-------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index d106e0b23..48554d9d1 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -31,7 +31,7 @@ var fontCount = 0; var Fonts = { _active: null, get active() { - return this._active || { encoding: {} }; + return this._active || { encoding: [] }; }, set active(aName) { diff --git a/pdf.js b/pdf.js index 2b7eb1e1b..fe636bcf9 100644 --- a/pdf.js +++ b/pdf.js @@ -795,7 +795,6 @@ var Lexer = (function() { } } - x = Fonts.unicodeFromCode(x); str += String.fromCharCode(x); break; case '\r': @@ -811,8 +810,7 @@ var Lexer = (function() { } break; default: - var unicode = Fonts.unicodeFromCode(ch.charCodeAt(0)); - str += String.fromCharCode(unicode); + str += ch; break; } } while (!done); @@ -1730,7 +1728,7 @@ var CanvasGraphics = (function() { var descriptor = xref.fetch(fontDict.get("FontDescriptor")); var fontName = descriptor.get("FontName").name; fontName = fontName.replace("+", "_"); - + var font = Fonts[fontName]; if (!font) { var fontFile = descriptor.get2("FontFile", "FontFile2"); @@ -1760,7 +1758,7 @@ var CanvasGraphics = (function() { for (var j = 0; j < widths.length; j++) { var index = widths[j]; if (index) - charset.push(encoding[j + firstchar]); + charset.push(encoding[j + firstchar]); } } } @@ -2054,7 +2052,12 @@ var CanvasGraphics = (function() { this.ctx.scale(1, -1); this.ctx.transform.apply(this.ctx, this.current.textMatrix); - this.ctx.fillText(text, this.current.x, this.current.y); + // Replace characters code by glyphs code + var glyphs = []; + for (var i = 0; i < text.length; i++) + glyphs[i] = String.fromCharCode(Fonts.unicodeFromCode(text[i].charCodeAt(0))); + + this.ctx.fillText(glyphs.join(""), this.current.x, this.current.y); this.current.x += this.ctx.measureText(text).width; this.ctx.restore(); diff --git a/test.js b/test.js index 75b720002..d0d386872 100644 --- a/test.js +++ b/test.js @@ -1,7 +1,7 @@ /* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- / /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ -var pdfDocument, canvas, pageDisplay, pageNum, pageTimeout; +var pdfDocument, canvas, pageDisplay, pageNum, pageInterval; function load() { canvas = document.getElementById("canvas"); canvas.mozOpaque = true; @@ -48,7 +48,7 @@ function gotoPage(num) { function displayPage(num) { if (pageNum != num) - window.clearTimeout(pageTimeout); + window.clearTimeout(pageInterval); document.getElementById("pageNumber").value = num; @@ -57,7 +57,6 @@ function displayPage(num) { var page = pdfDocument.getPage(pageNum = num); var t1 = Date.now(); - var ctx = canvas.getContext("2d"); ctx.save(); ctx.fillStyle = "rgb(255, 255, 255)"; @@ -73,17 +72,21 @@ function displayPage(num) { page.compile(gfx, fonts); var t2 = Date.now(); - var interval = setInterval(function() { + // FIXME This need to be replaced by an event + pageInterval = setInterval(function() { for (var i = 0; i < fonts.length; i++) { if (fonts[i].loading) return; } - - page.display(gfx); var t3 = Date.now(); + + clearInterval(pageInterval); + page.display(gfx); + + var t4 = Date.now(); + var infoDisplay = document.getElementById("info"); - infoDisplay.innerHTML = "Time to load/compile/render: "+ (t1 - t0) + "/" + (t2 - t1) + "/" + (t3 - t2) + " ms"; - clearInterval(interval); + infoDisplay.innerHTML = "Time to load/compile/fonts/render: "+ (t1 - t0) + "/" + (t2 - t1) + "/" + (t3 - t2) + "/" + (t4 - t3) + " ms"; }, 10); } From 675b2f0471e8542baec6ce4a7ab8e66de58d215e Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 16 Jun 2011 09:34:43 +0200 Subject: [PATCH 63/72] Remove some debug leftovers and add some comments about future directions for the code --- PDFFont.js | 192 +++++++++++++++++++++++++++++++----------------- PDFFontUtils.js | 21 +++++- 2 files changed, 142 insertions(+), 71 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 48554d9d1..4278ae29a 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -16,10 +16,10 @@ var kMaxGlyphsCount = 65526; */ var kMaxWaitForFontFace = 1000; - /* - * Useful for debugging when you want to certains operations depending on how - * many fonts are loaded. - */ +/** + * Useful for debugging when you want to certains operations depending on how + * many fonts are loaded. + */ var fontCount = 0; /** @@ -55,7 +55,7 @@ var Fonts = { * As an improvment the last parameter can be replaced by an automatic guess * of the font type based on the first byte of the file. * - * XXX There is now too many parameters, this should be turned into an + * FIXME There is now too many parameters, this should be turned into an * object containing all the required informations about the font */ var Font = function(aName, aFile, aEncoding, aCharset, aBBox, aType) { @@ -110,6 +110,15 @@ var Font = function(aName, aFile, aEncoding, aCharset, aBBox, aType) { this.bind(); }; + +/** + * A bunch of the OpenType code is duplicate between this class and the + * TrueType code, this is intentional and will merge in a future version + * where all the code relative to OpenType will probably have its own + * class and will take decision without the Fonts consent. + * But at the moment it allows to develop around the TrueType rewriting + * on the fly without messing up with the 'regular' Type1 to OTF conversion. + */ Font.prototype = { name: null, font: null, @@ -477,7 +486,6 @@ Font.prototype = { /** HMTX */ hmtx = [0x01, 0xF4, 0x00, 0x00]; for (var i = 0; i < charstrings.length; i++) { - // XXX this can easily broke var charstring = charstrings[i].charstring; var width = FontsUtils.integerToBytes(charstring[1], 2); var lsb = FontsUtils.integerToBytes(charstring[0], 2); @@ -503,7 +511,7 @@ Font.prototype = { this._createTableEntry(otf, offsets, "name", name); /** POST */ - // XXX get those info from the Font dict! + // FIXME Get those informations from the FontInfo structure post = [ 0x00, 0x03, 0x00, 0x00, // Version number 0x00, 0x00, 0x01, 0x00, // italicAngle @@ -528,13 +536,15 @@ Font.prototype = { var fontData = []; for (var i = 0; i < offsets.currentOffset; i++) fontData.push(otf[i]); - - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".otf"); return fontData; } }; +/** + * FontsUtils is a static class dedicated to hold codes that are not related + * to fonts in particular and needs to be share between them. + */ var FontsUtils = { integerToBytes: function fu_integerToBytes(aValue, aBytesCount) { var bytes = []; @@ -549,7 +559,7 @@ var FontsUtils = { return bytes; }, - bytesToInteger: function(aBytesArray) { + bytesToInteger: function fu_bytesToInteger(aBytesArray) { var value = 0; for (var i = 0; i < aBytesArray.length; i++) value = (value << 8) + aBytesArray[i]; @@ -575,9 +585,14 @@ var FontsUtils = { /** Implementation dirty logic starts here */ /** - * At the moment TrueType is just a stub that does mostly nothing but in a - * (near?) future this class will rewrite the font to ensure it is well formed - * and valid in the point of view of the sanitizer. + * The TrueType class verify that the ttf embedded inside the PDF is correct in + * the point of view of the OTS sanitizer and rewrite it on the fly otherwise. + * + * At the moment the rewiting only support rewriting missing 'OS/2' table. + * This class is unused at the moment since the 'cmap' table of the test + * document is not missing but use and old version of the 'cmap' table that + * is deprecated and not supported by the sanitizer... + * */ var TrueType = function(aFile) { var header = this._readOpenTypeHeader(aFile); @@ -604,6 +619,8 @@ var TrueType = function(aFile) { tables.push(table); } + + // Tables needs to be written by ascendant alphabetic order tables.sort(function(a, b) { return a.tag > b.tag; }); @@ -714,7 +731,6 @@ var TrueType = function(aFile) { fontData.push(ttf[i]); this.data = ttf; - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".ttf"); return; } else if (requiredTables.lenght) { error("Table " + requiredTables[0] + " is missing from the TruType font"); @@ -825,6 +841,7 @@ TrueType.prototype = { } }; + /** * This dictionary holds decoded fonts data. */ @@ -954,19 +971,58 @@ var Type1Parser = function() { "6": "hlineto", "7": "vlineto", "8": "rrcurveto", - "9": "closepath", + + // closepath is a Type1 command that do not take argument and is useless + // in Type2 and it can simply be ignored. + "9": null, // closepath + "10": "callsubr", + + // return is normally used inside sub-routines to tells to the execution + // flow that it can be back to normal. + // During the translation process Type1 charstrings will be flattened and + // sub-routines will be embedded directly into the charstring directly, so + // this can be ignored safely. "11": "return", + "12": { - "0": "dotsection", - "1": "vstem3", - "3": "hstem3", - "6": "seac", - "7": "sbw", + // dotsection is a Type1 command to specify some hinting feature for dots + // that do not take a parameter and it can safely be ignored for Type2. + "0": null, // dotsection + + // [vh]stem3 are Type1 only and Type2 supports [vh]stem with multiple + // parameters, so instead of returning [vh]stem3 take a shortcut and + // return [vhstem] instead. + "1": "vstem", + "2": "hstem", + + // Type1 only command with command not (yet) built-in ,throw an error + "6": -1, // seac + "7": -1, //sbw + "12": "div", + + // callothersubr is a mechanism to make calls on the postscript + // interpreter. + // TODO When decodeCharstring encounter such a command it should + // directly do: + // - pop the previous charstring[] command into 'index' + // - pop the previous charstring[] command and ignore it, it is + // normally the number of element to push on the stack before + // the command but since everything will be pushed on the stack + // by the PS interpreter when it will read them that is safe to + // ignore this command + // - push the content of the OtherSubrs[index] inside charstring[] "16": "callothersubr", + "17": "pop", - "33": "setcurrentpoint" + + // setcurrentpoint sets the current point to x, y without performing a + // moveto (this is a one shot positionning command). This is used only + // with the return of an OtherSubrs call. + // TODO Implement the OtherSubrs charstring embedding and replace this + // call by a no-op, like 2 'pop' commands for example. + "33": null, //setcurrentpoint }, "13": "hsbw", "14": "endchar", @@ -986,12 +1042,27 @@ var Type1Parser = function() { value = aStream.getByte(); if (value < 32) { + var command = null; if (value == 12) { - value = charStringDictionary["12"][aStream.getByte()]; + var escape = aStream.getByte(); + command = charStringDictionary["12"][escape]; i++; } else { - value = charStringDictionary[value]; + command = charStringDictionary[value]; } + + // Some charstring commands are meaningless in Type2 and will return + // a null, let's just ignored them + if (!command && i < count) + continue; + else if (!command) + break; + else if (command == -1) { + log("decodeCharstring: " + charString); + error("Support for Type1 command " + value + " (" + escape + ") is not implemented"); + } + + value = command; } else if (value <= 246) { value = parseInt(value) - 139; } else if (value <= 250) { @@ -1140,7 +1211,7 @@ var Type1Parser = function() { * Flatten the commands by interpreting the postscript code and replacing * every 'callsubr', 'callothersubr' by the real commands. * At the moment OtherSubrs are not fully supported and only otherSubrs 0-4 - * as descrived in 'Using Subroutines' of 'Adobe Type 1 Font Format', + * as described in 'Using Subroutines' of 'Adobe Type 1 Font Format', * chapter 8. */ this.flattenCharstring = function(aCharstring, aSubrs) { @@ -1156,11 +1227,6 @@ var Type1Parser = function() { operandStack.push(obj); } else { switch (obj) { - case "vstem3": - case "hstem3": - operandStack.push(obj.slice(0, 5)); - break; - case "callsubr": var index = operandStack.pop(); executionStack.push(aSubrs[index].slice()); @@ -1188,7 +1254,6 @@ var Type1Parser = function() { operandStack.pop(); break; - case "closepath": case "return": break; @@ -1207,13 +1272,6 @@ var Type1Parser = function() { operandStack.push("endchar"); return operandStack.clone(); - case "setcurrentpoint": - case "dotsection": - case "seac": - case "sbw": - error(obj + " parsing is not implemented (yet)"); - break; - default: operandStack.push(obj); break; @@ -1345,6 +1403,20 @@ CFF.prototype = { var glyphs = []; var glyphsChecker = {}; var subrs = aFontInfo.subrs; + + // FIXME This code is actually the only reason the dummy PS Interpreter + // called Type1Parser continue to lives, basically the goal here is + // to embed the OtherSubrs/Subrs into the charstring directly. + // But since Type2 charstrings use a bias to index Subrs and can + // theorically store twice the number of Type1 we could directly + // save the OtherSubrs and Subrs in the Type2 table for Subrs + // and avoid this 'flattening' slow method. + // + // The other thinds done by this method is splitting the initial + // 'width lsb hswb' command of Type1 to something similar in Type2 + // that is: 'width dx moveto' but this can be done in the + // decodeCharstring method directly (maybe one day it will be called + // translateCharstring?) var parser = new Type1Parser(); for (var i = 0; i < charstrings.length; i++) { var charstring = charstrings[i].charstring.slice(); @@ -1417,7 +1489,11 @@ CFF.prototype = { "hvcurveto": 31, }; - // Encode the glyph and add it to the FUX + // FIXME Concatenating array with this algorithm (O²) is expensive and + // can be avoided if the voodoo's dance of charstrings decoding + // encoding is left for dead. Actually charstrings command number + // are converted to a string and then back to a number with the + // next few lines of code... var r = [[0x40, 0x0E]]; for (var i = 0; i < glyphs.length; i++) { var data = glyphs[i].slice(); @@ -1427,7 +1503,7 @@ CFF.prototype = { if (!IsNum(c)) { var token = getNumFor[c]; if (!token) - error(c); + error("Token " + c + " is not recognized in charstring " + data); charstring.push(token); } else { try { @@ -1445,7 +1521,6 @@ CFF.prototype = { var charstringsIndex = this.createCFFIndexHeader(r, true); charstringsIndex = charstringsIndex.join(" ").split(" "); // XXX why? - //Top Dict Index var topDictIndex = [ 0x00, 0x01, 0x01, 0x01, 0x30, @@ -1480,25 +1555,17 @@ CFF.prototype = { topDictIndex.push(18); // Private topDictIndex = topDictIndex.join(" ").split(" "); - // Top Dict Index - cff.set(topDictIndex, currentOffset); - currentOffset += topDictIndex.length; + var indexes = [ + topDictIndex, stringsIndex, + globalSubrsIndex, charset, + charstringsIndex + ]; - // Strings Index - cff.set(stringsIndex, currentOffset); - currentOffset += stringsIndex.length; - - // Global Subrs Index - cff.set(globalSubrsIndex, currentOffset); - currentOffset += globalSubrsIndex.length; - - // Charset Index - cff.set(charset, currentOffset); - currentOffset += charset.length; - - // Fill charstrings data - cff.set(charstringsIndex, currentOffset); - currentOffset += charstringsIndex.length; + for (var i = 0; i < indexes.length; i++) { + var index = indexes[i]; + cff.set(index, currentOffset); + currentOffset += index.length; + } // Private Data var defaultWidth = this.encodeNumber(0); @@ -1532,19 +1599,10 @@ CFF.prototype = { cff.set(shit, currentOffset); currentOffset += shit.length; - - dump("==================== debug ===================="); - //var file = new Uint8Array(cff, 0, currentOffset); - //var parser = new Type2Parser(); - //parser.parse(new Stream(file)); - var fontData = []; for (var i = 0; i < currentOffset; i++) fontData.push(cff[i]); - //log("== write to file"); - //writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".cff"); - return fontData; } }; diff --git a/PDFFontUtils.js b/PDFFontUtils.js index 072dd48f1..e242121db 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -349,13 +349,26 @@ var Type2Parser = function(aFilePath) { }; /* -var cff = new Type2Parser("test.cff"); -cff.parse(); -*/ + * To try the Type2 decoder on a local file in the current directory: + * + * var cff = new Type2Parser("file.cff"); + * cff.parse(this.data); + * + * To try the Type2 decoder on a custom built CFF array: + * + * var file = new Uint8Array(cffFileArray, 0, cffFileSize); + * var parser = new Type2Parser(); + * parser.parse(new Stream(file)); + * + */ /** - * Write to a file (works only on Firefox in privilege mode"); + * Write to a file to the disk (works only on Firefox in privilege mode) + * but this is useful for dumping a font file to the disk and check with + * fontforge or the ots program what's wrong with the file. + * + * writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".cff"); */ function writeToFile(aBytes, aFilePath) { netscape.security.PrivilegeManager.enablePrivilege("UniversalXPConnect"); From eb6f7499dc8c556ef3f8f4e40d346f6bf194953a Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 06:02:29 +0200 Subject: [PATCH 64/72] Rework some code to enhance performance (CIIM6/CIIM9 are rejected by the sanitizer now) --- PDFFont.js | 338 ++++++++++++++++++++---------------------------- PDFFontUtils.js | 3 +- 2 files changed, 142 insertions(+), 199 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 4278ae29a..1bf437066 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -546,7 +546,14 @@ Font.prototype = { * to fonts in particular and needs to be share between them. */ var FontsUtils = { + _bytesArray: new Uint8Array(4), integerToBytes: function fu_integerToBytes(aValue, aBytesCount) { + // If we want only one byte, take a fast path + if (aBytesCount == 1) { + this._bytesArray.set([aValue]); + return this._bytesArray[0]; + } + var bytes = []; for (var i = 0; i < aBytesCount; i++) bytes[i] = 0x00; @@ -1087,61 +1094,6 @@ var Type1Parser = function() { return charString; }; - /* - * The operand stack holds arbitrary PostScript objects that are the operands - * and results of PostScript operators being executed. The interpreter pushes - * objects on the operand stack when it encounters them as literal data in a - * program being executed. When an operator requires one or more operands, it - * obtains them by popping them off the top of the operand stack. When an - * operator returns one or more results, it does so by pushing them on the - * operand stack. - */ - var operandStack = new Stack(40); - - // Flag indicating if the topmost operand of the operandStack is an array - var operandIsArray = 0; - - /* - * The dictionary stack holds only dictionary objects. The current set of - * dictionaries on the dictionary stack defines the environment for all - * implicit name searches, such as those that occur when the interpreter - * encounters an executable name. The role of the dictionary stack is - * introduced in Section 3.3, “Data Types and Objects,” and is further - * explained in Section 3.5, “Execution.” of the PostScript Language - * Reference. - */ - var systemDict = new Dict(), - globalDict = new Dict(), - userDict = new Dict(); - - var dictionaryStack = new Stack(); - dictionaryStack.push(systemDict); - dictionaryStack.push(globalDict); - dictionaryStack.push(userDict); - - /* - * The execution stack holds executable objects (mainly procedures and files) - * that are in intermediate stages of execution. At any point in the - * execution of a PostScript program, this stack represents the program’s - * call stack. Whenever the interpreter suspends execution of an object to - * execute some other object, it pushes the new object on the execution - * stack. When the interpreter finishes executing an object, it pops that - * object off the execution stack and resumes executing the suspended object - * beneath it. - */ - var executionStack = new Stack(); - - /* - * Return the next token in the execution stack - */ - function nextInStack() { - var currentProcedure = executionStack.peek(); - var command = currentProcedure.shift(); - if (!currentProcedure.length) - executionStack.pop(); - return command; - }; - /** * Returns an object containing a Subrs array and a CharStrings array * extracted from and eexec encrypted block of data @@ -1205,79 +1157,6 @@ var Type1Parser = function() { subrs: subrs, charstrings: glyphs } - }; - - /* - * Flatten the commands by interpreting the postscript code and replacing - * every 'callsubr', 'callothersubr' by the real commands. - * At the moment OtherSubrs are not fully supported and only otherSubrs 0-4 - * as described in 'Using Subroutines' of 'Adobe Type 1 Font Format', - * chapter 8. - */ - this.flattenCharstring = function(aCharstring, aSubrs) { - operandStack.clear(); - executionStack.clear(); - executionStack.push(aCharstring.slice()); - - var leftSidebearing = 0; - var lastPoint = 0; - while (true) { - var obj = nextInStack(); - if (IsInt(obj) || IsBool(obj)) { - operandStack.push(obj); - } else { - switch (obj) { - case "callsubr": - var index = operandStack.pop(); - executionStack.push(aSubrs[index].slice()); - break; - - case "callothersubr": - var index = operandStack.pop(); - var count = operandStack.pop(); - var data = operandStack.pop(); - // XXX The callothersubr needs to support at least the 3 defaults - // otherSubrs of the spec - if (index != 3) - error("callothersubr for index: " + index); - operandStack.push(3); - operandStack.push("callothersubr"); - break; - - case "div": - var num2 = operandStack.pop(); - var num1 = operandStack.pop(); - operandStack.push(num2 / num1); - break; - - case "pop": - operandStack.pop(); - break; - - case "return": - break; - - case "hsbw": - var charWidthVector = operandStack.pop(); - var leftSidebearing = operandStack.pop(); - operandStack.push(charWidthVector); - - if (leftSidebearing) { - operandStack.push(leftSidebearing); - operandStack.push("hmoveto"); - } - break; - - case "endchar": - operandStack.push("endchar"); - return operandStack.clone(); - - default: - operandStack.push(obj); - break; - } - } - } } }; @@ -1339,10 +1218,11 @@ CFF.prototype = { return data; }, - encodeNumber: function(aValue) { + encodeNumber: function(aValue, aIsCharstring) { var x = 0; - // XXX we don't really care about Type2 optimization here... - if (aValue >= -32768 && aValue <= 32767) { + if (aIsCharstring && aValue >= -107 && aValue <= 107) { + return [aValue + 139]; + } else if (aValue >= -32768 && aValue <= 32767) { return [ 28, FontsUtils.integerToBytes(aValue >> 8, 1), @@ -1389,6 +1269,128 @@ CFF.prototype = { return charstrings; }, + /* + * Flatten the commands by interpreting the postscript code and replacing + * every 'callsubr', 'callothersubr' by the real commands. + * + * TODO This function also do a string to command number transformation + * that can probably be avoided if the Type1 decodeCharstring code is smarter + */ + commandsMap: { + "hstem": 1, + "vstem": 3, + "vmoveto": 4, + "rlineto": 5, + "hlineto": 6, + "vlineto": 7, + "rrcurveto": 8, + "endchar": 14, + "rmoveto": 21, + "hmoveto": 22, + "vhcurveto": 30, + "hvcurveto": 31, + }, + + flattenCharstring: function(aCharstring, aSubrs) { + var i = 0; + while (true) { + var obj = aCharstring[i]; + if (IsString(obj)) { + switch (obj) { + case "callsubr": + var subr = aSubrs[aCharstring[i- 1]].slice(); + if (subr.length > 1) { + subr = this.flattenCharstring(subr, aSubrs); + subr.pop(); + aCharstring.splice(i - 1, 2, subr); + } + else + aCharstring.splice(i - 1, 2); + + i -= 1; + break; + + case "callothersubr": + var index = aCharstring[i - 1]; + var count = aCharstring[i - 2]; + var data = aCharstring[i - 3]; + + // XXX The callothersubr needs to support at least the 3 defaults + // otherSubrs of the spec + if (index != 3) + error("callothersubr for index: " + index + " (" + aCharstring + ")"); + + if (!data) { + aCharstring.splice(i - 2, 3, "pop", 3); + i -= 2; + } else { + // 5 to remove the arguments, the callothersubr call and the pop command + aCharstring.splice(i - 3, 5, 3); + i -= 3; + } + break; + + case "div": + var num2 = aCharstring[i - 1]; + var num1 = aCharstring[i - 2]; + aCharstring.splice(i - 2, 3, num2 / num1); + i -= 2; + break; + + case "pop": + aCharstring.splice(i - 2, 2); + i -= 1; + break; + + + case "hsbw": + var charWidthVector = aCharstring[i - 1]; + var leftSidebearing = aCharstring[i - 2]; + aCharstring.splice(i - 2, 3, charWidthVector, leftSidebearing, "hmoveto"); + break; + + case "endchar": + case "return": + // CharString is ready to be re-encode to commands number at this point + for (var j = 0; j < aCharstring.length; j++) { + var command = aCharstring[j]; + if (IsNum(command)) { + var number = this.encodeNumber(command, true); + aCharstring.splice(j, 1); + for (var k = 0; k < number.length; k++) + aCharstring.splice(j + k, 0, number[k]); + j+= number.length - 1; + } else if (IsString(command)) { + var command = this.commandsMap[command]; + if (IsArray(command)) { + aCharstring.splice(j - 1, 1, command[0], command[1]); + j += 1; + } else { + aCharstring[j] = command; + } + } else if (IsArray(command)) { + aCharstring.splice(j, 1); + + // command has already been translated, just add them to the + // charstring directly + for (var k = 0; k < command.length; k++) + aCharstring.splice(j + k, 0, command[k]); + j+= command.length - 1; + } else { // what else? + error("Error while flattening the Type1 charstring: " + aCharstring); + } + } + return aCharstring; + + default: + break; + } + } + i++; + } + error("failing with i = " + i + " in charstring:" + aCharstring + "(" + aCharstring.length + ")"); + }, + convertToCFF: function(aFontInfo) { var debug = false; function dump(aMsg) { @@ -1398,39 +1400,24 @@ CFF.prototype = { var charstrings = this.getOrderedCharStrings(aFontInfo.charstrings); + // Starts the conversion of the Type1 charstrings to Type2 + var start = Date.now(); var charstringsCount = 0; var charstringsDataLength = 0; var glyphs = []; - var glyphsChecker = {}; - var subrs = aFontInfo.subrs; - - // FIXME This code is actually the only reason the dummy PS Interpreter - // called Type1Parser continue to lives, basically the goal here is - // to embed the OtherSubrs/Subrs into the charstring directly. - // But since Type2 charstrings use a bias to index Subrs and can - // theorically store twice the number of Type1 we could directly - // save the OtherSubrs and Subrs in the Type2 table for Subrs - // and avoid this 'flattening' slow method. - // - // The other thinds done by this method is splitting the initial - // 'width lsb hswb' command of Type1 to something similar in Type2 - // that is: 'width dx moveto' but this can be done in the - // decodeCharstring method directly (maybe one day it will be called - // translateCharstring?) - var parser = new Type1Parser(); for (var i = 0; i < charstrings.length; i++) { var charstring = charstrings[i].charstring.slice(); var glyph = charstrings[i].glyph; - if (glyphsChecker[glyph]) - error("glyphs already exists!"); - glyphsChecker[glyph] = true; - var flattened = parser.flattenCharstring(charstring, subrs); + var flattened = this.flattenCharstring(charstring, aFontInfo.subrs); glyphs.push(flattened); charstringsCount++; charstringsDataLength += flattened.length; } + + var end = Date.now(); dump("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")"); + dump("Time to flatten the strings is : " + (end -start)); // Create a CFF font data var cff = new Uint8Array(kMaxFontFileSize); @@ -1473,52 +1460,7 @@ CFF.prototype = { charset.push(bytes[1]); } - // Convert charstrings - var getNumFor = { - "hstem": 1, - "vstem": 3, - "vmoveto": 4, - "rlineto": 5, - "hlineto": 6, - "vlineto": 7, - "rrcurveto": 8, - "endchar": 14, - "rmoveto": 21, - "hmoveto": 22, - "vhcurveto": 30, - "hvcurveto": 31, - }; - - // FIXME Concatenating array with this algorithm (O²) is expensive and - // can be avoided if the voodoo's dance of charstrings decoding - // encoding is left for dead. Actually charstrings command number - // are converted to a string and then back to a number with the - // next few lines of code... - var r = [[0x40, 0x0E]]; - for (var i = 0; i < glyphs.length; i++) { - var data = glyphs[i].slice(); - var charstring = []; - for (var j = 0; j < data.length; j++) { - var c = data[j]; - if (!IsNum(c)) { - var token = getNumFor[c]; - if (!token) - error("Token " + c + " is not recognized in charstring " + data); - charstring.push(token); - } else { - try { - var bytes = this.encodeNumber(c); - } catch(e) { - log("Glyph " + i + " has a wrong value: " + c + " in charstring: " + data); - log("the default value is glyph " + charstrings[i].glyph + " and is supposed to be: " + charstrings[i].charstring); - } - charstring = charstring.concat(bytes); - } - } - r.push(charstring); - } - - var charstringsIndex = this.createCFFIndexHeader(r, true); + var charstringsIndex = this.createCFFIndexHeader([[0x40, 0x0E]].concat(glyphs), true); charstringsIndex = charstringsIndex.join(" ").split(" "); // XXX why? //Top Dict Index diff --git a/PDFFontUtils.js b/PDFFontUtils.js index e242121db..086648fe2 100644 --- a/PDFFontUtils.js +++ b/PDFFontUtils.js @@ -310,7 +310,8 @@ var Type2Parser = function(aFilePath) { // Read the Global Subr Index that comes just after the Strings Index // (cf. "The Compact Font Format Specification" Chapter 16) dump("Reading Global Subr Index"); - var subrs = readFontIndexData(aStream); + var subrs = readFontIndexData(aStream, true); + dump(subrs); // Reading Private Dict var private = font.get("Private"); From 88512fbdd9fda60993ad718b97a407e81f5f33c1 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 06:06:24 +0200 Subject: [PATCH 65/72] Remove some leftovers from the previous patch --- PDFFont.js | 45 --------------------------------------------- 1 file changed, 45 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 1bf437066..4706121f2 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -589,8 +589,6 @@ var FontsUtils = { }; -/** Implementation dirty logic starts here */ - /** * The TrueType class verify that the ttf embedded inside the PDF is correct in * the point of view of the OTS sanitizer and rewrite it on the fly otherwise. @@ -852,49 +850,6 @@ TrueType.prototype = { /** * This dictionary holds decoded fonts data. */ -var PSFonts = new Dict(); - -var Stack = function(aStackSize) { - var innerStack = new Array(aStackSize || 0); - - this.push = function(aOperand) { - innerStack.push(aOperand); - }; - - this.pop = function() { - if (!this.count()) - throw new Error("stackunderflow"); - return innerStack.pop(); - }; - - this.peek = function() { - if (!this.count()) - return null; - return innerStack[innerStack.length - 1]; - }; - - this.get = function(aIndex) { - return innerStack[aIndex]; - }; - - this.clear = function() { - innerStack = []; - }; - - this.count = function() { - return innerStack.length; - }; - - this.dump = function() { - for (var i = 0; i < this.length; i++) - log(innerStack[i]); - }; - - this.clone = function() { - return innerStack.slice(); - }; -}; - var Type1Parser = function() { // Turn on this flag for additional debugging logs var debug = false; From 01847a0e29e93ad8585f2bbe5575c88b0a38e0e3 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 06:44:16 +0200 Subject: [PATCH 66/72] Get rid or IsNum/IsString/IsArray in flattenCharstring --- PDFFont.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 4706121f2..00637f2cf 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -188,7 +188,7 @@ Font.prototype = { ctx.fillText(testString, 20, 20); var start = Date.now(); - var interval = window.setInterval(function(self) { + var interval = window.setInterval(function canvasInterval(self) { ctx.font = "bold italic 20px " + fontName + ", Symbol, Arial"; // For some reasons the font has not loaded, so mark it loaded for the @@ -1250,7 +1250,7 @@ CFF.prototype = { var i = 0; while (true) { var obj = aCharstring[i]; - if (IsString(obj)) { + if (obj.charAt) { switch (obj) { case "callsubr": var subr = aSubrs[aCharstring[i- 1]].slice(); @@ -1309,13 +1309,13 @@ CFF.prototype = { // CharString is ready to be re-encode to commands number at this point for (var j = 0; j < aCharstring.length; j++) { var command = aCharstring[j]; - if (IsNum(command)) { + if (parseFloat(command) == command) { var number = this.encodeNumber(command, true); aCharstring.splice(j, 1); for (var k = 0; k < number.length; k++) aCharstring.splice(j + k, 0, number[k]); j+= number.length - 1; - } else if (IsString(command)) { + } else if (command.charAt) { var command = this.commandsMap[command]; if (IsArray(command)) { aCharstring.splice(j - 1, 1, command[0], command[1]); @@ -1323,7 +1323,7 @@ CFF.prototype = { } else { aCharstring[j] = command; } - } else if (IsArray(command)) { + } else { aCharstring.splice(j, 1); // command has already been translated, just add them to the @@ -1331,8 +1331,6 @@ CFF.prototype = { for (var k = 0; k < command.length; k++) aCharstring.splice(j + k, 0, command[k]); j+= command.length - 1; - } else { // what else? - error("Error while flattening the Type1 charstring: " + aCharstring); } } return aCharstring; From 89c9bc39de7577c4a55430c609435c424b02d67d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 06:53:18 +0200 Subject: [PATCH 67/72] Add a direct translation to one of the way to format charstring number in flattenCharstring (28, x, y) --- PDFFont.js | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 00637f2cf..24c4a8c3e 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1173,24 +1173,13 @@ CFF.prototype = { return data; }, - encodeNumber: function(aValue, aIsCharstring) { + encodeNumber: function(aValue) { var x = 0; - if (aIsCharstring && aValue >= -107 && aValue <= 107) { - return [aValue + 139]; - } else if (aValue >= -32768 && aValue <= 32767) { - return [ - 28, - FontsUtils.integerToBytes(aValue >> 8, 1), - FontsUtils.integerToBytes(aValue, 1) - ]; + if (aValue >= -32768 && aValue <= 32767) { + return [ 28, aValue >> 8, aValue ]; } else if (aValue >= (-2147483647-1) && aValue <= 2147483647) { return [ - 0xFF, - FontsUtils.integerToBytes(aValue >> 24, 1), - FontsUtils.integerToBytes(aValue >> 16, 1), - FontsUtils.integerToBytes(aValue >> 8, 1), - FontsUtils.integerToBytes(aValue, 1) - ]; + 0xFF, aValue >> 24, Value >> 16, aValue >> 8, aValue ]; } else { error("Value: " + aValue + " is not allowed"); } @@ -1227,7 +1216,7 @@ CFF.prototype = { /* * Flatten the commands by interpreting the postscript code and replacing * every 'callsubr', 'callothersubr' by the real commands. - * + * * TODO This function also do a string to command number transformation * that can probably be avoided if the Type1 decodeCharstring code is smarter */ @@ -1310,11 +1299,8 @@ CFF.prototype = { for (var j = 0; j < aCharstring.length; j++) { var command = aCharstring[j]; if (parseFloat(command) == command) { - var number = this.encodeNumber(command, true); - aCharstring.splice(j, 1); - for (var k = 0; k < number.length; k++) - aCharstring.splice(j + k, 0, number[k]); - j+= number.length - 1; + aCharstring.splice(j, 1, 28, command >> 8, command); + j+= 2; } else if (command.charAt) { var command = this.commandsMap[command]; if (IsArray(command)) { From d31bc90c855489a86c2de2f51a78981efdddb763 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 07:14:11 +0200 Subject: [PATCH 68/72] Use a typed array in the CMAP construction function --- PDFFont.js | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 24c4a8c3e..95d80bfc8 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -274,15 +274,15 @@ Font.prototype = { }, _createCMAPTable: function font_createCMAPTable(aGlyphs) { - var characters = new Array(kMaxGlyphsCount); - for (var i = 0; i < aGlyphs.length; i++) { + var characters = new Uint16Array(kMaxGlyphsCount); + for (var i = 0; i < aGlyphs.length; i++) characters[aGlyphs[i].unicode] = i + 1; - } // Separate the glyphs into continuous range of codes, aka segment. var ranges = []; var range = []; - for (var i = 0; i < characters.length; i++) { + var count = characters.length; + for (var i = 0; i < count; i++) { if (characters[i]) { range.push(i); } else if (range.length) { @@ -548,22 +548,18 @@ Font.prototype = { var FontsUtils = { _bytesArray: new Uint8Array(4), integerToBytes: function fu_integerToBytes(aValue, aBytesCount) { - // If we want only one byte, take a fast path + var bytes = this._bytesArray; + if (aBytesCount == 1) { - this._bytesArray.set([aValue]); - return this._bytesArray[0]; + bytes.set([aValue]); + return bytes[0]; + } else if (aBytesCount == 2) { + bytes.set([aValue >> 8, aValue]); + return [bytes[0], bytes[1]]; + } else if (aBytesCount == 4) { + bytes.set([aValue >> 24, aValue >> 16, aValue >> 8, aValue]); + return [bytes[0], bytes[1], bytes[2], bytes[3]]; } - - var bytes = []; - for (var i = 0; i < aBytesCount; i++) - bytes[i] = 0x00; - - do { - bytes[--aBytesCount] = (aValue & 0xFF); - aValue = aValue >> 8; - } while (aBytesCount && aValue > 0); - - return bytes; }, bytesToInteger: function fu_bytesToInteger(aBytesArray) { @@ -875,7 +871,7 @@ var Type1Parser = function() { var value = ""; var count = aStream.length; for (var i = 0; i < count; i++) { - value = aStream.getByte(); + value = aStream[i]; if (aByteArray) decryptedString[i] = value ^ (r >> 8); else @@ -1054,7 +1050,7 @@ var Type1Parser = function() { * extracted from and eexec encrypted block of data */ this.extractFontInfo = function(aStream) { - var eexecString = decrypt(new Stream(aStream), kEexecEncryptionKey, 4, true); + var eexecString = decrypt(aStream, kEexecEncryptionKey, 4, true); var subrs = [], glyphs = []; var inSubrs = inGlyphs = false; var glyph = ""; @@ -1070,16 +1066,16 @@ var Type1Parser = function() { if (inSubrs && c == 0x52) { length = parseInt(length); - var stream = new Stream(eexecString.slice(i + 3, i + 3 + length)); - var encodedSubr = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var data = eexecString.slice(i + 3, i + 3 + length); + var encodedSubr = decrypt(data, kCharStringsEncryptionKey, 4).join(""); var subr = decodeCharString(new StringStream(encodedSubr)); subrs.push(subr); i += 3 + length; } else if (inGlyphs && c == 0x52) { length = parseInt(length); - var stream = new Stream(eexecString.slice(i + 3, i + 3 + length)); - var encodedCharstring = decrypt(stream, kCharStringsEncryptionKey, 4).join(""); + var data = eexecString.slice(i + 3, i + 3 + length); + var encodedCharstring = decrypt(data, kCharStringsEncryptionKey, 4).join(""); var subr = decodeCharString(new StringStream(encodedCharstring)); glyphs.push({ From 35ceea1ff20b9f1b65b1dd2ef872401e7adeeb45 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 07:48:45 +0200 Subject: [PATCH 69/72] Do not use stream when it is not necessary --- PDFFont.js | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 95d80bfc8..5fa463250 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -990,32 +990,30 @@ var Type1Parser = function() { "31": "hvcurveto" }; - function decodeCharString(aStream) { - var start = Date.now(); + function decodeCharString(aArray) { var charString = []; var value = ""; - var count = aStream.length; + var count = aArray.length; for (var i = 0; i < count; i++) { - value = aStream.getByte(); + value = parseInt(aArray[i]); if (value < 32) { var command = null; if (value == 12) { - var escape = aStream.getByte(); + var escape = aArray[++i]; command = charStringDictionary["12"][escape]; - i++; } else { command = charStringDictionary[value]; } // Some charstring commands are meaningless in Type2 and will return // a null, let's just ignored them - if (!command && i < count) + if (!command && i < count) { continue; - else if (!command) + } else if (!command) { break; - else if (command == -1) { + } else if (command == -1) { log("decodeCharstring: " + charString); error("Support for Type1 command " + value + " (" + escape + ") is not implemented"); } @@ -1024,24 +1022,19 @@ var Type1Parser = function() { } else if (value <= 246) { value = parseInt(value) - 139; } else if (value <= 250) { - value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108; - i++; + value = ((value - 247) * 256) + parseInt(aArray[++i]) + 108; } else if (value <= 254) { - value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108; - i++; + value = -((value - 251) * 256) - parseInt(aArray[++i]) - 108; } else { - var byte = aStream.getByte(); + var byte = aArray[++i]; var high = (byte >> 1); - value = (byte - high) << 24 | aStream.getByte() << 16 | - aStream.getByte() << 8 | aStream.getByte(); - i += 4; + value = (byte - high) << 24 | aArray[++i] << 16 | + aArray[++i] << 8 | aArray[++i]; } charString.push(value); } - var end = Date.now(); - dump("Time to decode charString of length " + count + " is " + (end - start)); return charString; }; @@ -1067,16 +1060,16 @@ var Type1Parser = function() { if (inSubrs && c == 0x52) { length = parseInt(length); var data = eexecString.slice(i + 3, i + 3 + length); - var encodedSubr = decrypt(data, kCharStringsEncryptionKey, 4).join(""); - var subr = decodeCharString(new StringStream(encodedSubr)); + var encodedSubr = decrypt(data, kCharStringsEncryptionKey, 4, true); + var subr = decodeCharString(encodedSubr); subrs.push(subr); i += 3 + length; } else if (inGlyphs && c == 0x52) { length = parseInt(length); var data = eexecString.slice(i + 3, i + 3 + length); - var encodedCharstring = decrypt(data, kCharStringsEncryptionKey, 4).join(""); - var subr = decodeCharString(new StringStream(encodedCharstring)); + var encodedCharstring = decrypt(data, kCharStringsEncryptionKey, 4, true); + var subr = decodeCharString(encodedCharstring); glyphs.push({ glyph: glyph, @@ -1125,12 +1118,11 @@ var CFF = function(aFontName, aFontBBox, aFontFile) { fontInfo.name = aFontName; fontInfo.bbox = aFontBBox; - // XXX + // XXX This hold the glyph data as if, this should be improved this.glyphs = fontInfo.charstrings; this.data = this.convertToCFF(fontInfo); var end = Date.now(); - //log("Time to parse font is:" + (end - start)); }; CFF.prototype = { @@ -1232,13 +1224,22 @@ CFF.prototype = { }, flattenCharstring: function(aCharstring, aSubrs) { + var original = aCharstring.slice(); var i = 0; while (true) { var obj = aCharstring[i]; if (obj.charAt) { switch (obj) { case "callsubr": - var subr = aSubrs[aCharstring[i- 1]].slice(); + if (aCharstring[i - 1] == 351) { + log(original); + log(aCharstring); + error("..."); + aCharstring.splice(i - 1, 2); + continue; + } + + var subr = aSubrs[aCharstring[i - 1]].slice(); if (subr.length > 1) { subr = this.flattenCharstring(subr, aSubrs); subr.pop(); From b5915ab3cd823533d6a0eb478c0d360e9b7baa76 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 08:36:52 +0200 Subject: [PATCH 70/72] Fix CIMM6/CIIM9 --- PDFFont.js | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/PDFFont.js b/PDFFont.js index 5fa463250..2db7e745f 100644 --- a/PDFFont.js +++ b/PDFFont.js @@ -1223,31 +1223,25 @@ CFF.prototype = { "hvcurveto": 31, }, - flattenCharstring: function(aCharstring, aSubrs) { + flattenCharstring: function(aGlyph, aCharstring, aSubrs) { var original = aCharstring.slice(); var i = 0; while (true) { var obj = aCharstring[i]; + if (obj == null) + return []; + if (obj.charAt) { switch (obj) { case "callsubr": - if (aCharstring[i - 1] == 351) { - log(original); - log(aCharstring); - error("..."); - aCharstring.splice(i - 1, 2); - continue; - } - var subr = aSubrs[aCharstring[i - 1]].slice(); if (subr.length > 1) { - subr = this.flattenCharstring(subr, aSubrs); + subr = this.flattenCharstring(aGlyph, subr, aSubrs); subr.pop(); aCharstring.splice(i - 1, 2, subr); - } - else + } else { aCharstring.splice(i - 1, 2); - + } i -= 1; break; @@ -1262,8 +1256,8 @@ CFF.prototype = { error("callothersubr for index: " + index + " (" + aCharstring + ")"); if (!data) { - aCharstring.splice(i - 2, 3, "pop", 3); - i -= 2; + aCharstring.splice(i - 2, 4, "pop", 3); + i -= 3; } else { // 5 to remove the arguments, the callothersubr call and the pop command aCharstring.splice(i - 3, 5, 3); @@ -1279,7 +1273,10 @@ CFF.prototype = { break; case "pop": - aCharstring.splice(i - 2, 2); + if (i) + aCharstring.splice(i - 2, 2); + else + aCharstring.splice(i - 1, 1); i -= 1; break; @@ -1287,7 +1284,11 @@ CFF.prototype = { case "hsbw": var charWidthVector = aCharstring[i - 1]; var leftSidebearing = aCharstring[i - 2]; - aCharstring.splice(i - 2, 3, charWidthVector, leftSidebearing, "hmoveto"); + + if (leftSidebearing) + aCharstring.splice(i - 2, 3, charWidthVector, leftSidebearing, "hmoveto"); + else + aCharstring.splice(i - 2, 3, charWidthVector); break; case "endchar": @@ -1345,7 +1346,7 @@ CFF.prototype = { var charstring = charstrings[i].charstring.slice(); var glyph = charstrings[i].glyph; - var flattened = this.flattenCharstring(charstring, aFontInfo.subrs); + var flattened = this.flattenCharstring(glyph, charstring, aFontInfo.subrs); glyphs.push(flattened); charstringsCount++; charstringsDataLength += flattened.length; From 4db56c5c97563adb9da6917f9eaf9230b69b2f3c Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Fri, 17 Jun 2011 00:37:15 +0800 Subject: [PATCH 71/72] add blogs and twitter link --- README | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README b/README index 18817c340..ee537f0a5 100644 --- a/README +++ b/README @@ -1,3 +1,12 @@ pdf.js is a technology demonstrator prototype to explore whether the HTML5 platform is complete enough to faithfully and efficiently render the ISO 32000-1:2008 Portable Document Format (PDF) without native code assistance. + +You can read more about pdf.js here: + +http://andreasgal.com/2011/06/15/pdf-js/ +http://blog.mozilla.com/cjones/2011/06/15/overview-of-pdf-js-guts/ + +Or follow us on twitter: @pdfjs + +http://twitter.com/#!/pdfjs From 446e958b5de4ed25d37eb2c7db5cc37afbac25dd Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Fri, 17 Jun 2011 09:11:03 +0200 Subject: [PATCH 72/72] Rename PDFFonts.js to fonts.js --- PDFFont.js => fonts.js | 0 PDFFontUtils.js => fonts_utils.js | 0 test.html | 7 +++---- 3 files changed, 3 insertions(+), 4 deletions(-) rename PDFFont.js => fonts.js (100%) rename PDFFontUtils.js => fonts_utils.js (100%) diff --git a/PDFFont.js b/fonts.js similarity index 100% rename from PDFFont.js rename to fonts.js diff --git a/PDFFontUtils.js b/fonts_utils.js similarity index 100% rename from PDFFontUtils.js rename to fonts_utils.js diff --git a/test.html b/test.html index 276ba30da..023cdeec2 100644 --- a/test.html +++ b/test.html @@ -1,15 +1,14 @@ -<<<<<<< HEAD Simple pdf.js page viewer - + + - - +