From 8241733a20e95815504ca995f6932778d7338f76 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sun, 9 Oct 2011 22:40:49 -0500 Subject: [PATCH 1/3] Getting addition encoding information for cmap from the CFF data (#641) --- fonts.js | 95 +++++++++++++++++++++++++++++++++++++------------------- pdf.js | 8 +++++ 2 files changed, 71 insertions(+), 32 deletions(-) diff --git a/fonts.js b/fonts.js index 2ab3a90b7..4f0d55d74 100644 --- a/fonts.js +++ b/fonts.js @@ -2488,7 +2488,7 @@ var Type2CFF = (function type2CFF() { var charStrings = this.parseIndex(topDict.CharStrings); var charset = this.parseCharsets(topDict.charset, charStrings.length, strings); - var hasSupplement = this.parseEncoding(topDict.Encoding, properties, + var encoding = this.parseEncoding(topDict.Encoding, properties, strings, charset); // The font sanitizer does not support CFF encoding with a @@ -2496,8 +2496,8 @@ var Type2CFF = (function type2CFF() { // between gid to glyph, let's overwrite what is declared in // the top dictionary to let the sanitizer think the font use // StandardEncoding, that's a lie but that's ok. - if (hasSupplement) - bytes[topDict.Encoding] = 0; + if (encoding.hasSupplement) + bytes[topDict.Encoding] &= 0x7F; // The CFF specification state that the 'dotsection' command // (12, 0) is deprecated and treated as a no-op, but all Type2 @@ -2528,7 +2528,7 @@ var Type2CFF = (function type2CFF() { // charstrings contains info about glyphs (one element per glyph // containing mappings for {unicode, width}) - var charstrings = this.getCharStrings(charset, charStrings, + var charstrings = this.getCharStrings(charset, encoding.encoding, privateDict, this.properties); // create the mapping between charstring and glyph id @@ -2545,49 +2545,82 @@ var Type2CFF = (function type2CFF() { return data; }, - getCharStrings: function cff_charstrings(charsets, charStrings, + getCharStrings: function cff_charstrings(charsets, encoding, privateDict, properties) { var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; var differences = properties.differences; - var index = properties.firstChar || 0; for (var i = 1; i < charsets.length; i++) { - var code = -1; + var inDifferences; var glyph = charsets[i]; + var code; for (var j = 0; j < differences.length; j++) { if (differences[j] == glyph) { - index = j; - code = differences.indexOf(glyph); + code = j; + inDifferences = true; break; } } + if (!inDifferences) { + var code = properties.firstChar + i; + for (var s in encoding) { + if (encoding[s] == i) { + code = s | 0; + break; + } + } + } - var mapping = - properties.glyphs[glyph] || properties.glyphs[index] || {}; - if (code == -1) - index = code = mapping.unicode || index; + if (properties.encoding[code] && + properties.encoding[code].inDifferences) + continue; - if (code <= 0x1f || (code >= 127 && code <= 255)) - code += kCmapGlyphOffset; + var mapping = properties.glyphs[code] || properties.glyphs[glyph] || {}; + var unicode = mapping.unicode || code; - var width = mapping.width; - properties.glyphs[glyph] = properties.encoding[index] = { - unicode: code, - width: isNum(width) ? width : defaultWidth + if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) + unicode += kCmapGlyphOffset; + + var width = isNum(mapping.width) ? mapping.width : defaultWidth; + properties.encoding[code] = { + unicode: unicode, + width: width, + inDifferences: inDifferences }; charstrings.push({ - unicode: code, + unicode: unicode, width: width, gid: i }); - index++; } // sort the array by the unicode value charstrings.sort(function type2CFFGetCharStringsSort(a, b) { return a.unicode - b.unicode; }); + + // remove duplicates -- they might appear during selection: + // properties.glyphs[code] || properties.glyphs[glyph] + // TODO make more deterministic + var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; + var lastUnicode = charstrings[0].unicode, wasModified = false; + for (var i = 1; i < charstrings.length; ++i) { + if (lastUnicode != charstrings[i].unicode) { + lastUnicode = charstrings[i].unicode; + continue; + } + // duplicate found -- changing the unicode for previous one + charstrings[i - 1].unicode = nextUnusedUnicode++; + wasModified = true; + } + if (!wasModified) + return charstrings; + + // sort the array by the unicode value (again) + charstrings.sort(function type2CFFGetCharStringsSort(a, b) { + return a.unicode - b.unicode; + }); return charstrings; }, @@ -2595,6 +2628,10 @@ var Type2CFF = (function type2CFF() { charset) { var encoding = {}; var bytes = this.bytes; + var result = { + encoding: encoding, + hasSupplement: false + }; function readSupplement() { var supplementsCount = bytes[pos++]; @@ -2621,11 +2658,6 @@ var Type2CFF = (function type2CFF() { var glyphsCount = bytes[pos++]; for (var i = 1; i <= glyphsCount; i++) encoding[bytes[pos++]] = i; - - if (format & 0x80) { - readSupplement(); - return true; - } break; case 1: @@ -2637,19 +2669,18 @@ var Type2CFF = (function type2CFF() { for (var j = start; j <= start + count; j++) encoding[j] = gid++; } - - if (format & 0x80) { - readSupplement(); - return true; - } break; default: error('Unknow encoding format: ' + format + ' in CFF'); break; } + if (format & 0x80) { + readSupplement(); + result.hasSupplement = true; + } } - return false; + return result; }, parseCharsets: function cff_parsecharsets(pos, length, strings) { diff --git a/pdf.js b/pdf.js index 847ed2ff4..82345b17b 100644 --- a/pdf.js +++ b/pdf.js @@ -3558,6 +3558,12 @@ var Page = (function pagePage() { var self = this; var stats = self.stats; stats.compile = stats.fonts = stats.render = 0; + if (!this.content) { + setTimeout(function norenderingSetTimeout() { + if (continuation) continuation(null); + }); + return; + } var gfx = new CanvasGraphics(canvasCtx); var fonts = []; @@ -4610,6 +4616,8 @@ var PartialEvaluator = (function partialEvaluator() { if (replaceGlyph || !glyphs[glyph]) glyphs[glyph] = map[i]; + if (replaceGlyph || !glyphs[index]) + glyphs[index] = map[i]; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with From 5ec177d88e0f80eb4d66f4288828f59f8c4e461c Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Wed, 12 Oct 2011 17:37:55 -0500 Subject: [PATCH 2/3] Nit: Rename 's' by 'charcode' --- fonts.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fonts.js b/fonts.js index f31c52b76..d4183f146 100644 --- a/fonts.js +++ b/fonts.js @@ -2565,9 +2565,9 @@ var Type2CFF = (function type2CFF() { } if (!inDifferences) { var code = properties.firstChar + i; - for (var s in encoding) { + for (var charcode in encoding) { if (encoding[s] == i) { - code = s | 0; + code = charcode | 0; break; } } From 01f026ce14cf0a147f900f510897d7b058f29459 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Wed, 12 Oct 2011 19:53:57 -0500 Subject: [PATCH 3/3] Fixing duplicate charstring selection --- fonts.js | 61 +++++++++++++++++++++++++++++--------------------------- pdf.js | 4 ++-- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/fonts.js b/fonts.js index d4183f146..e4e7bb289 100644 --- a/fonts.js +++ b/fonts.js @@ -2551,34 +2551,34 @@ var Type2CFF = (function type2CFF() { privateDict, properties) { var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; - var differences = properties.differences; - for (var i = 1; i < charsets.length; i++) { - var inDifferences; + var firstChar = properties.firstChar; + var glyphMap = {}; + for (var i = 0; i < charsets.length; i++) { var glyph = charsets[i]; - var code; - for (var j = 0; j < differences.length; j++) { - if (differences[j] == glyph) { - code = j; - inDifferences = true; - break; - } - } - if (!inDifferences) { - var code = properties.firstChar + i; - for (var charcode in encoding) { - if (encoding[s] == i) { - code = charcode | 0; - break; - } - } + for (var charcode in encoding) { + if (encoding[charcode] == i) + glyphMap[glyph] = charcode | 0; } + } - if (properties.encoding[code] && - properties.encoding[code].inDifferences) - continue; + var differences = properties.differences; + for (var i = 0; i < differences.length; ++i) { + var glyph = differences[i]; + if (!glyph) + continue; + var oldGlyph = charsets[i]; + if (oldGlyph) + delete glyphMap[oldGlyph]; + glyphMap[differences[i]] = i; + } - var mapping = properties.glyphs[code] || properties.glyphs[glyph] || {}; - var unicode = mapping.unicode || code; + var glyphs = properties.glyphs; + for (var i = 1; i < charsets.length; i++) { + var glyph = charsets[i]; + var code = glyphMap[glyph] || 0; + + var mapping = glyphs[code] || glyphs[glyph] || {}; + var unicode = mapping.unicode; if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) unicode += kCmapGlyphOffset; @@ -2586,13 +2586,13 @@ var Type2CFF = (function type2CFF() { var width = isNum(mapping.width) ? mapping.width : defaultWidth; properties.encoding[code] = { unicode: unicode, - width: width, - inDifferences: inDifferences + width: width }; charstrings.push({ unicode: unicode, width: width, + code: code, gid: i }); } @@ -2604,7 +2604,6 @@ var Type2CFF = (function type2CFF() { // remove duplicates -- they might appear during selection: // properties.glyphs[code] || properties.glyphs[glyph] - // TODO make more deterministic var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; var lastUnicode = charstrings[0].unicode, wasModified = false; for (var i = 1; i < charstrings.length; ++i) { @@ -2612,8 +2611,12 @@ var Type2CFF = (function type2CFF() { lastUnicode = charstrings[i].unicode; continue; } - // duplicate found -- changing the unicode for previous one - charstrings[i - 1].unicode = nextUnusedUnicode++; + // duplicate found -- keeping the item that has + // different code and unicode, that one created + // as result of modification of the base encoding + var duplicateIndex = + charstrings[i].unicode == charstrings[i].code ? i : i - 1; + charstrings[duplicateIndex].unicode = nextUnusedUnicode++; wasModified = true; } if (!wasModified) diff --git a/pdf.js b/pdf.js index 6f4524db5..c76ae7da0 100644 --- a/pdf.js +++ b/pdf.js @@ -4615,9 +4615,9 @@ var PartialEvaluator = (function partialEvaluator() { }; if (replaceGlyph || !glyphs[glyph]) - glyphs[glyph] = map[i]; + glyphs[glyph] = map[i]; if (replaceGlyph || !glyphs[index]) - glyphs[index] = map[i]; + glyphs[index] = map[i]; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with