From 11a1ebaada2de99c3cb9a4c17728c3f04bbb893e Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 21 Jun 2011 21:11:59 +0200 Subject: [PATCH 1/5] Start displaying TrueType fonts --- fonts.js | 10 +++++----- pdf.js | 9 +++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fonts.js b/fonts.js index ad3d4fd35..80a9f4bc2 100644 --- a/fonts.js +++ b/fonts.js @@ -103,7 +103,7 @@ var Font = (function () { // If the font is to be ignored, register it like an already loaded font // to avoid the cost of waiting for it be be loaded by the platform. - if (properties.ignore || properties.type == "TrueType" || kDisableFonts) { + if (properties.ignore || kDisableFonts) { Fonts[name] = { data: file, loading: false, @@ -368,11 +368,11 @@ var Font = (function () { var length = FontsUtils.bytesToInteger(file.getBytes(4)); // Read the table associated data - var currentPosition = file.pos; - file.pos = file.start + offset; - + var previousPosition = file.pos; + file.pos = file.start ? file.start : 0; + file.skip(offset); var data = file.getBytes(length); - file.pos = currentPosition; + file.pos = previousPosition; return { tag: tag, diff --git a/pdf.js b/pdf.js index 4db4ef06f..8268f1673 100644 --- a/pdf.js +++ b/pdf.js @@ -2199,8 +2199,13 @@ var CanvasGraphics = (function() { var tokens = []; var token = ""; - var buffer = cmapObj.ensureBuffer ? cmapObj.ensureBuffer() : cmapObj; - var cmap = cmapObj.getBytes(buffer.byteLength); + var length = cmapObj.length; + if (cmapObj instanceof FlateStream) { + cmapObj.readBlock(); + length = cmapObj.bufferLength; + } + + var cmap = cmapObj.getBytes(length); for (var i =0; i < cmap.length; i++) { var byte = cmap[i]; if (byte == 0x20 || byte == 0x0A || byte == 0x3C || byte == 0x3E) { From fdfd03b671c479e10ab44461a5ba510b33d2128f Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 22 Jun 2011 04:41:31 +0200 Subject: [PATCH 2/5] Support Format 6 cmap table, but does not pass the sanitizer yet --- fonts.js | 59 ++++++++++++++++++++++++++++++++++++++++++-------------- pdf.js | 4 +--- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/fonts.js b/fonts.js index 80a9f4bc2..f5a531785 100644 --- a/fonts.js +++ b/fonts.js @@ -242,7 +242,7 @@ var Font = (function () { return ranges; }; - function createCMAPTable(glyphs) { + function createCMapTable(glyphs) { var ranges = getRanges(glyphs); var headerSize = (12 * 2 + (ranges.length * 4 * 2)); @@ -274,7 +274,7 @@ var Font = (function () { var bias = 0; for (var i = 0; i < segCount - 1; i++) { var range = ranges[i]; - var start = range[0]; + var start = range[0]; var end = range[1]; var delta = (((start - 1) - bias) ^ 0xffff) + 1; bias += (end - start + 1); @@ -393,6 +393,47 @@ var Font = (function () { } }; + function replaceCMapTable(font, properties) { + var version = FontsUtils.bytesToInteger(font.getBytes(2)); + var numTables = FontsUtils.bytesToInteger(font.getBytes(2)); + + var tables = []; + for (var i = 0; i < numTables; i++) { + var platformID = FontsUtils.bytesToInteger(font.getBytes(2)); + var encodingID = FontsUtils.bytesToInteger(font.getBytes(2)); + var offset = FontsUtils.bytesToInteger(font.getBytes(4)); + var format = FontsUtils.bytesToInteger(font.getBytes(2)); + var length = FontsUtils.bytesToInteger(font.getBytes(2)); + var language = FontsUtils.bytesToInteger(font.getBytes(2)); + + if (format == 0 && numTables == 1) { + // Format 0 alone is not allowed by the sanitizer so let's rewrite + // that to a 3-1-4 Unicode BMP table + var charset = properties.charset; + var glyphs = []; + for (var i = 0; i < charset.length; i++) { + glyphs.push({ + unicode: GlyphsUnicode[charset[i]] || 0 + }); + } + + cmap.data = createCMapTable(glyphs); + } else if (format == 6 && numTables == 1) { + // Format 6 is a 2-bytes dense mapping, which means the font data + // lives glue together even if they are pretty far in the unicode + // table. (This looks weird, so I can have missed something) + var firstCode = FontsUtils.bytesToInteger(font.getBytes(2)); + var entryCount = FontsUtils.bytesToInteger(font.getBytes(2)); + + var encoding = properties.encoding; + for (var j = 0; j < entryCount; j++) { + var charcode = FontsUtils.bytesToInteger(font.getBytes(2)); + encoding[charcode + firstCode] = charcode + firstCode; + } + } + } + }; + // Check that required tables are present var requiredTables = [ "OS/2", "cmap", "head", "hhea", "hmtx", "maxp", "name", "post" ]; @@ -448,18 +489,8 @@ var Font = (function () { data: OS2 }); - // If the font is missing a OS/2 table it's could be an old mac font - // without a 3-1-4 Unicode BMP table, so let's rewrite it. - var charset = properties.charset; - var glyphs = []; - for (var i = 0; i < charset.length; i++) { - glyphs.push({ - unicode: GlyphsUnicode[charset[i]] - }); - } - // Replace the old CMAP table with a shiny new one - cmap.data = createCMAPTable(glyphs); + replaceCMapTable(font, properties); // Rewrite the 'post' table if needed if (!post) { @@ -599,7 +630,7 @@ var Font = (function () { var charstrings = font.getOrderedCharStrings(properties.glyphs); /** CMAP */ - cmap = createCMAPTable(charstrings); + cmap = createCMapTable(charstrings); createTableEntry(otf, offsets, "cmap", cmap); /** HEAD */ diff --git a/pdf.js b/pdf.js index 8268f1673..72a7b7970 100644 --- a/pdf.js +++ b/pdf.js @@ -2193,8 +2193,6 @@ var CanvasGraphics = (function() { } else if (IsStream(cmapObj)) { var encoding = Encodings["WinAnsiEncoding"]; var firstChar = xref.fetchIfRef(fontDict.get("FirstChar")); - for (var i = firstChar; i < encoding.length; i++) - encodingMap[i] = new Name(encoding[i]); var tokens = []; var token = ""; @@ -2538,7 +2536,7 @@ var CanvasGraphics = (function() { } this.current.fontSize = size; - this.ctx.font = this.current.fontSize +'px "' + fontName + '"'; + this.ctx.font = this.current.fontSize +'px "' + fontName + '", Symbol'; }, setTextRenderingMode: function(mode) { TODO("text rendering mode"); From 00df9b82eed490d5807fbd57cf0f36004f5d6da1 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 22 Jun 2011 07:46:41 +0200 Subject: [PATCH 3/5] Start of a TTF Format6 to Format4 converter (sigh) --- fonts.js | 30 ++++++++++++++++++++---------- pdf.js | 7 +++++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/fonts.js b/fonts.js index f5a531785..c7230a55a 100644 --- a/fonts.js +++ b/fonts.js @@ -284,8 +284,8 @@ var Font = (function () { idDeltas += string16(delta); idRangeOffsets += string16(0); - for (var j = start; j <= end; j++) - glyphsIds += String.fromCharCode(j); + for (var j = 0; j < range.length; j++) + glyphsIds += String.fromCharCode(range[j]); } startCount += "\xFF\xFF"; @@ -393,11 +393,10 @@ var Font = (function () { } }; - function replaceCMapTable(font, properties) { + function replaceCMapTable(cmap, font, properties) { var version = FontsUtils.bytesToInteger(font.getBytes(2)); var numTables = FontsUtils.bytesToInteger(font.getBytes(2)); - var tables = []; for (var i = 0; i < numTables; i++) { var platformID = FontsUtils.bytesToInteger(font.getBytes(2)); var encodingID = FontsUtils.bytesToInteger(font.getBytes(2)); @@ -406,14 +405,15 @@ var Font = (function () { var length = FontsUtils.bytesToInteger(font.getBytes(2)); var language = FontsUtils.bytesToInteger(font.getBytes(2)); - if (format == 0 && numTables == 1) { + if ((format == 0 && numTables == 1) || + (format == 6 && numTables == 1 && !properties.encoding.empty)) { // Format 0 alone is not allowed by the sanitizer so let's rewrite // that to a 3-1-4 Unicode BMP table var charset = properties.charset; var glyphs = []; - for (var i = 0; i < charset.length; i++) { + for (var j = 0; j < charset.length; j++) { glyphs.push({ - unicode: GlyphsUnicode[charset[i]] || 0 + unicode: GlyphsUnicode[charset[j]] || 0 }); } @@ -421,15 +421,25 @@ var Font = (function () { } else if (format == 6 && numTables == 1) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode - // table. (This looks weird, so I can have missed something) + // table. (This looks weird, so I can have missed something), this + // works on Linux but seems to fails on Mac so let's rewrite the + // cmap table to a 3-1-4 style var firstCode = FontsUtils.bytesToInteger(font.getBytes(2)); var entryCount = FontsUtils.bytesToInteger(font.getBytes(2)); var encoding = properties.encoding; + var glyphs = []; for (var j = 0; j < entryCount; j++) { var charcode = FontsUtils.bytesToInteger(font.getBytes(2)); - encoding[charcode + firstCode] = charcode + firstCode; + glyphs.push({unicode: charcode + firstCode }); } + + var ranges = getRanges(glyphs); + var denseRange = ranges[0]; + var pos = 0; + for (var j = denseRange[0]; j <= denseRange[1]; j++) + encoding[j - 1] = glyphs[pos++].unicode; + cmap.data = createCMapTable(glyphs); } } }; @@ -490,7 +500,7 @@ var Font = (function () { }); // Replace the old CMAP table with a shiny new one - replaceCMapTable(font, properties); + replaceCMapTable(cmap, font, properties); // Rewrite the 'post' table if needed if (!post) { diff --git a/pdf.js b/pdf.js index 72a7b7970..9d06241f7 100644 --- a/pdf.js +++ b/pdf.js @@ -2143,7 +2143,7 @@ var CanvasGraphics = (function() { // Fonts with an embedded cmap but without any assignment in // it are not yet supported, so ask the fonts loader to ignore // them to not pay a stupid one sec latence. - var ignoreFont = true; + var ignoreFont = false; var encodingMap = {}; var charset = []; @@ -2187,6 +2187,7 @@ var CanvasGraphics = (function() { } } } else if (fontDict.has("ToUnicode")) { + encodingMap = {empty: true}; var cmapObj = xref.fetchIfRef(fontDict.get("ToUnicode")); if (IsName(cmapObj)) { error("ToUnicode file cmap translation not implemented"); @@ -2230,7 +2231,9 @@ var CanvasGraphics = (function() { var code = parseInt("0x" + tokens[j+2]); for (var k = startRange; k <= endRange; k++) { - encodingMap[k] = GlyphsUnicode[encoding[code]]; + // The encoding mapping table will be filled + // later during the building phase + //encodingMap[k] = GlyphsUnicode[encoding[code]]; charset.push(encoding[code++]); } } From e0c98ba3c6e562c8da611aaa1a3c411b106d3475 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 22 Jun 2011 10:56:31 +0200 Subject: [PATCH 4/5] Enhance the converter code by filling Format 6 dense array gaps --- fonts.js | 31 ++++++++++++++++++++++++++----- pdf.js | 2 +- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fonts.js b/fonts.js index c7230a55a..ded1b70c2 100644 --- a/fonts.js +++ b/fonts.js @@ -427,18 +427,39 @@ var Font = (function () { var firstCode = FontsUtils.bytesToInteger(font.getBytes(2)); var entryCount = FontsUtils.bytesToInteger(font.getBytes(2)); + // Since Format 6 is a dense array, check for gaps in the indexes + // to fill them later if needed + var gaps = []; + for (var j = 1; j <= entryCount; j++) + gaps.push(j); + var encoding = properties.encoding; var glyphs = []; for (var j = 0; j < entryCount; j++) { var charcode = FontsUtils.bytesToInteger(font.getBytes(2)); - glyphs.push({unicode: charcode + firstCode }); + var index = gaps.indexOf(charcode); + if (index != -1) + gaps.splice(index, 1); + + glyphs.push({unicode: charcode + firstCode}); } + while (gaps.length) + glyphs.push({unicode: gaps.pop() + firstCode }); + var ranges = getRanges(glyphs); - var denseRange = ranges[0]; - var pos = 0; - for (var j = denseRange[0]; j <= denseRange[1]; j++) - encoding[j - 1] = glyphs[pos++].unicode; + + var pos = firstCode; + var bias = 1; + for (var j = 0; j < ranges.length; j++) { + var range = ranges[j]; + var start = range[0]; + var end = range[1]; + for (var k = start; k < end; k++) { + encoding[pos] = glyphs[pos - firstCode].unicode; + pos++; + } + } cmap.data = createCMapTable(glyphs); } } diff --git a/pdf.js b/pdf.js index 9d06241f7..23dd5bee9 100644 --- a/pdf.js +++ b/pdf.js @@ -2234,7 +2234,7 @@ var CanvasGraphics = (function() { // The encoding mapping table will be filled // later during the building phase //encodingMap[k] = GlyphsUnicode[encoding[code]]; - charset.push(encoding[code++]); + charset.push(encoding[code++] || ".notdef"); } } break; From a772c9a2e2c40abdc52aa266fc67b4ecfaa3ea00 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 22 Jun 2011 11:25:00 +0200 Subject: [PATCH 5/5] Fill more gaps for Format 6 dense array --- fonts.js | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/fonts.js b/fonts.js index ded1b70c2..a008dfce0 100644 --- a/fonts.js +++ b/fonts.js @@ -427,39 +427,37 @@ var Font = (function () { var firstCode = FontsUtils.bytesToInteger(font.getBytes(2)); var entryCount = FontsUtils.bytesToInteger(font.getBytes(2)); - // Since Format 6 is a dense array, check for gaps in the indexes - // to fill them later if needed - var gaps = []; - for (var j = 1; j <= entryCount; j++) - gaps.push(j); - - var encoding = properties.encoding; var glyphs = []; + var min = 0xffff, max = 0; for (var j = 0; j < entryCount; j++) { var charcode = FontsUtils.bytesToInteger(font.getBytes(2)); - var index = gaps.indexOf(charcode); - if (index != -1) - gaps.splice(index, 1); + glyphs.push(charcode); - glyphs.push({unicode: charcode + firstCode}); + if (charcode < min) + min = charcode; + if (charcode > max) + max = charcode; } - while (gaps.length) - glyphs.push({unicode: gaps.pop() + firstCode }); - - var ranges = getRanges(glyphs); - - var pos = firstCode; - var bias = 1; - for (var j = 0; j < ranges.length; j++) { - var range = ranges[j]; - var start = range[0]; - var end = range[1]; - for (var k = start; k < end; k++) { - encoding[pos] = glyphs[pos - firstCode].unicode; - pos++; - } + // Since Format 6 is a dense array, check for gaps + for (var j = min; j < max; j++) { + if (glyphs.indexOf(j) == -1) + glyphs.push(j); } + + for (var j = 0; j < glyphs.length; j++) + glyphs[j] = { unicode: glyphs[j] + firstCode }; + + var ranges= getRanges(glyphs); + assert(ranges.length == 1, "Got " + ranges.length + " ranges in a dense array"); + + var encoding = properties.encoding; + var denseRange = ranges[0]; + var start = denseRange[0]; + var end = denseRange[1]; + var index = firstCode; + for (var j = start; j <= end; j++) + encoding[index++] = glyphs[j - firstCode - 1].unicode; cmap.data = createCMapTable(glyphs); } }