From 1444a7e7947424b29df689599505b33f2b0e9382 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sun, 9 Oct 2011 09:13:10 -0500 Subject: [PATCH 1/9] Making Type 2 font sanitazable (#631) --- fonts.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fonts.js b/fonts.js index 2ab3a90b7..45fe9c974 100644 --- a/fonts.js +++ b/fonts.js @@ -2271,7 +2271,7 @@ CFF.prototype = { 'return': 11, 'sub': [12, 11], 'div': [12, 12], - 'pop': [1, 12, 18], + 'pop': [139, 12, 18], 'drop' : [12, 18], 'endchar': 14, 'rmoveto': 21, @@ -2287,9 +2287,11 @@ CFF.prototype = { var cmd = map[command]; assert(cmd, 'Unknow command: ' + command); - if (isArray(cmd)) + if (isArray(cmd)) { charstring.splice(i++, 1, cmd[0], cmd[1]); - else + if (cmd.length > 2) + charstring.splice(++i, 0, cmd[2]); + } else if (cmd !== null) charstring[i] = cmd; } else { // Type1 charstring use a division for number above 32000 From 8241733a20e95815504ca995f6932778d7338f76 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sun, 9 Oct 2011 22:40:49 -0500 Subject: [PATCH 2/9] Getting addition encoding information for cmap from the CFF data (#641) --- fonts.js | 95 +++++++++++++++++++++++++++++++++++++------------------- pdf.js | 8 +++++ 2 files changed, 71 insertions(+), 32 deletions(-) diff --git a/fonts.js b/fonts.js index 2ab3a90b7..4f0d55d74 100644 --- a/fonts.js +++ b/fonts.js @@ -2488,7 +2488,7 @@ var Type2CFF = (function type2CFF() { var charStrings = this.parseIndex(topDict.CharStrings); var charset = this.parseCharsets(topDict.charset, charStrings.length, strings); - var hasSupplement = this.parseEncoding(topDict.Encoding, properties, + var encoding = this.parseEncoding(topDict.Encoding, properties, strings, charset); // The font sanitizer does not support CFF encoding with a @@ -2496,8 +2496,8 @@ var Type2CFF = (function type2CFF() { // between gid to glyph, let's overwrite what is declared in // the top dictionary to let the sanitizer think the font use // StandardEncoding, that's a lie but that's ok. - if (hasSupplement) - bytes[topDict.Encoding] = 0; + if (encoding.hasSupplement) + bytes[topDict.Encoding] &= 0x7F; // The CFF specification state that the 'dotsection' command // (12, 0) is deprecated and treated as a no-op, but all Type2 @@ -2528,7 +2528,7 @@ var Type2CFF = (function type2CFF() { // charstrings contains info about glyphs (one element per glyph // containing mappings for {unicode, width}) - var charstrings = this.getCharStrings(charset, charStrings, + var charstrings = this.getCharStrings(charset, encoding.encoding, privateDict, this.properties); // create the mapping between charstring and glyph id @@ -2545,49 +2545,82 @@ var Type2CFF = (function type2CFF() { return data; }, - getCharStrings: function cff_charstrings(charsets, charStrings, + getCharStrings: function cff_charstrings(charsets, encoding, privateDict, properties) { var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; var differences = properties.differences; - var index = properties.firstChar || 0; for (var i = 1; i < charsets.length; i++) { - var code = -1; + var inDifferences; var glyph = charsets[i]; + var code; for (var j = 0; j < differences.length; j++) { if (differences[j] == glyph) { - index = j; - code = differences.indexOf(glyph); + code = j; + inDifferences = true; break; } } + if (!inDifferences) { + var code = properties.firstChar + i; + for (var s in encoding) { + if (encoding[s] == i) { + code = s | 0; + break; + } + } + } - var mapping = - properties.glyphs[glyph] || properties.glyphs[index] || {}; - if (code == -1) - index = code = mapping.unicode || index; + if (properties.encoding[code] && + properties.encoding[code].inDifferences) + continue; - if (code <= 0x1f || (code >= 127 && code <= 255)) - code += kCmapGlyphOffset; + var mapping = properties.glyphs[code] || properties.glyphs[glyph] || {}; + var unicode = mapping.unicode || code; - var width = mapping.width; - properties.glyphs[glyph] = properties.encoding[index] = { - unicode: code, - width: isNum(width) ? width : defaultWidth + if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) + unicode += kCmapGlyphOffset; + + var width = isNum(mapping.width) ? mapping.width : defaultWidth; + properties.encoding[code] = { + unicode: unicode, + width: width, + inDifferences: inDifferences }; charstrings.push({ - unicode: code, + unicode: unicode, width: width, gid: i }); - index++; } // sort the array by the unicode value charstrings.sort(function type2CFFGetCharStringsSort(a, b) { return a.unicode - b.unicode; }); + + // remove duplicates -- they might appear during selection: + // properties.glyphs[code] || properties.glyphs[glyph] + // TODO make more deterministic + var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; + var lastUnicode = charstrings[0].unicode, wasModified = false; + for (var i = 1; i < charstrings.length; ++i) { + if (lastUnicode != charstrings[i].unicode) { + lastUnicode = charstrings[i].unicode; + continue; + } + // duplicate found -- changing the unicode for previous one + charstrings[i - 1].unicode = nextUnusedUnicode++; + wasModified = true; + } + if (!wasModified) + return charstrings; + + // sort the array by the unicode value (again) + charstrings.sort(function type2CFFGetCharStringsSort(a, b) { + return a.unicode - b.unicode; + }); return charstrings; }, @@ -2595,6 +2628,10 @@ var Type2CFF = (function type2CFF() { charset) { var encoding = {}; var bytes = this.bytes; + var result = { + encoding: encoding, + hasSupplement: false + }; function readSupplement() { var supplementsCount = bytes[pos++]; @@ -2621,11 +2658,6 @@ var Type2CFF = (function type2CFF() { var glyphsCount = bytes[pos++]; for (var i = 1; i <= glyphsCount; i++) encoding[bytes[pos++]] = i; - - if (format & 0x80) { - readSupplement(); - return true; - } break; case 1: @@ -2637,19 +2669,18 @@ var Type2CFF = (function type2CFF() { for (var j = start; j <= start + count; j++) encoding[j] = gid++; } - - if (format & 0x80) { - readSupplement(); - return true; - } break; default: error('Unknow encoding format: ' + format + ' in CFF'); break; } + if (format & 0x80) { + readSupplement(); + result.hasSupplement = true; + } } - return false; + return result; }, parseCharsets: function cff_parsecharsets(pos, length, strings) { diff --git a/pdf.js b/pdf.js index 847ed2ff4..82345b17b 100644 --- a/pdf.js +++ b/pdf.js @@ -3558,6 +3558,12 @@ var Page = (function pagePage() { var self = this; var stats = self.stats; stats.compile = stats.fonts = stats.render = 0; + if (!this.content) { + setTimeout(function norenderingSetTimeout() { + if (continuation) continuation(null); + }); + return; + } var gfx = new CanvasGraphics(canvasCtx); var fonts = []; @@ -4610,6 +4616,8 @@ var PartialEvaluator = (function partialEvaluator() { if (replaceGlyph || !glyphs[glyph]) glyphs[glyph] = map[i]; + if (replaceGlyph || !glyphs[index]) + glyphs[index] = map[i]; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with From 030b498b6a9f5a0f8c9688c8a55dccb5ad34dbc5 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Mon, 10 Oct 2011 07:03:43 -0500 Subject: [PATCH 3/9] 1 instead of 0 --- fonts.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fonts.js b/fonts.js index 45fe9c974..fa6da78e7 100644 --- a/fonts.js +++ b/fonts.js @@ -2271,7 +2271,7 @@ CFF.prototype = { 'return': 11, 'sub': [12, 11], 'div': [12, 12], - 'pop': [139, 12, 18], + 'pop': [140, 12, 18], 'drop' : [12, 18], 'endchar': 14, 'rmoveto': 21, From 2c09fed17ce039c15b093ff7b38b7f6189b879fa Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Mon, 10 Oct 2011 20:58:40 -0500 Subject: [PATCH 4/9] Implement type2 flex --- fonts.js | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/fonts.js b/fonts.js index fa6da78e7..20edc1529 100644 --- a/fonts.js +++ b/fonts.js @@ -1715,6 +1715,7 @@ var Type1Parser = function type1Parser() { var charstring = []; var lsb = 0; var width = 0; + var flexState = 0, flexPoints; var value = ''; var count = array.length; @@ -1748,8 +1749,9 @@ var Type1Parser = function type1Parser() { i++; continue; } - } else if (!kHintingEnabled && (value == 1 || value == 2)) { - charstring.push('drop', 'drop', 'drop', 'drop', 'drop', 'drop'); + } else if (escape == 17 || escape == 33) { + // pop or setcurrentpoint commands can be ignored + // since we are not doing callothersubr continue; } @@ -1775,6 +1777,31 @@ var Type1Parser = function type1Parser() { charstring.push(lsb, 'hmoveto'); continue; + } else if (value == 10) { // callsubr + if (charstring[charstring.length - 1] < 3) { // subr #0..2 + var subrNumber = charstring.pop(); + switch (subrNumber) { + case 1: + flexState = 1; // prepare for flex coordinates + flexPoints = 0; + break; + case 2: + flexState = 2; // flex in progress + flexPoints++; + break; + case 0: + // type2 flex command does not need final coords + charstring.push('exch', 'drop', 'exch', 'drop'); + charstring.push('flex'); + flexState = 0; + break; + } + continue; + } + } else if (value == 21 && flexState > 0) { + if (flexState > 1) + continue; // ignoring rmoveto + value = 5; // first segment replacing with rlineto } else if (!kHintingEnabled && (value == 1 || value == 3)) { charstring.push('drop', 'drop'); continue; @@ -2271,7 +2298,8 @@ CFF.prototype = { 'return': 11, 'sub': [12, 11], 'div': [12, 12], - 'pop': [140, 12, 18], + 'exch': [12, 28], + 'flex': [12, 35], 'drop' : [12, 18], 'endchar': 14, 'rmoveto': 21, @@ -2287,11 +2315,9 @@ CFF.prototype = { var cmd = map[command]; assert(cmd, 'Unknow command: ' + command); - if (isArray(cmd)) { + if (isArray(cmd)) charstring.splice(i++, 1, cmd[0], cmd[1]); - if (cmd.length > 2) - charstring.splice(++i, 0, cmd[2]); - } else if (cmd !== null) + else if (cmd !== null) charstring[i] = cmd; } else { // Type1 charstring use a division for number above 32000 From 25f6431607c1154936bd2ae3ca89ad965b1b4f46 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Mon, 10 Oct 2011 21:00:49 -0500 Subject: [PATCH 5/9] Cleaning up flex stuff --- fonts.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fonts.js b/fonts.js index 20edc1529..789261ee5 100644 --- a/fonts.js +++ b/fonts.js @@ -1715,7 +1715,7 @@ var Type1Parser = function type1Parser() { var charstring = []; var lsb = 0; var width = 0; - var flexState = 0, flexPoints; + var flexState = 0; var value = ''; var count = array.length; @@ -1783,11 +1783,9 @@ var Type1Parser = function type1Parser() { switch (subrNumber) { case 1: flexState = 1; // prepare for flex coordinates - flexPoints = 0; break; case 2: flexState = 2; // flex in progress - flexPoints++; break; case 0: // type2 flex command does not need final coords @@ -2317,7 +2315,7 @@ CFF.prototype = { if (isArray(cmd)) charstring.splice(i++, 1, cmd[0], cmd[1]); - else if (cmd !== null) + else charstring[i] = cmd; } else { // Type1 charstring use a division for number above 32000 From 66074c08ebbaac0cd06c16440118aa7675f4da89 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Tue, 11 Oct 2011 07:17:45 -0500 Subject: [PATCH 6/9] returning vstem3 and hstem3 --- fonts.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fonts.js b/fonts.js index 789261ee5..3abaaa533 100644 --- a/fonts.js +++ b/fonts.js @@ -1753,6 +1753,9 @@ var Type1Parser = function type1Parser() { // pop or setcurrentpoint commands can be ignored // since we are not doing callothersubr continue; + } else if (!kHintingEnabled && (escape == 1 || escape == 2)) { + charstring.push('drop', 'drop', 'drop', 'drop', 'drop', 'drop'); + continue; } command = charStringDictionary['12'][escape]; From b45f646267cf982b7441738f554abb7d7b806cba Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Tue, 11 Oct 2011 18:26:25 -0500 Subject: [PATCH 7/9] Add fit11-talk as a eq reftest --- test/pdfs/fit11-talk.pdf.link | 1 + test/test_manifest.json | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 test/pdfs/fit11-talk.pdf.link diff --git a/test/pdfs/fit11-talk.pdf.link b/test/pdfs/fit11-talk.pdf.link new file mode 100644 index 000000000..af7107ee2 --- /dev/null +++ b/test/pdfs/fit11-talk.pdf.link @@ -0,0 +1 @@ +http://www.ccs.neu.edu/home/samth/fit11-talk.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 43b799bad..1270cce29 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -144,6 +144,13 @@ "rounds": 1, "type": "eq" }, + { "id": "fit11-talk", + "file": "pdfs/fit11-talk.pdf", + "link": true, + "rounds": 1, + "skipPages": [12,31], + "type": "eq" + }, { "id": "fips197", "file": "pdfs/fips197.pdf", "link": true, From 5ec177d88e0f80eb4d66f4288828f59f8c4e461c Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Wed, 12 Oct 2011 17:37:55 -0500 Subject: [PATCH 8/9] Nit: Rename 's' by 'charcode' --- fonts.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fonts.js b/fonts.js index f31c52b76..d4183f146 100644 --- a/fonts.js +++ b/fonts.js @@ -2565,9 +2565,9 @@ var Type2CFF = (function type2CFF() { } if (!inDifferences) { var code = properties.firstChar + i; - for (var s in encoding) { + for (var charcode in encoding) { if (encoding[s] == i) { - code = s | 0; + code = charcode | 0; break; } } From 01f026ce14cf0a147f900f510897d7b058f29459 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Wed, 12 Oct 2011 19:53:57 -0500 Subject: [PATCH 9/9] Fixing duplicate charstring selection --- fonts.js | 61 +++++++++++++++++++++++++++++--------------------------- pdf.js | 4 ++-- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/fonts.js b/fonts.js index d4183f146..e4e7bb289 100644 --- a/fonts.js +++ b/fonts.js @@ -2551,34 +2551,34 @@ var Type2CFF = (function type2CFF() { privateDict, properties) { var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; - var differences = properties.differences; - for (var i = 1; i < charsets.length; i++) { - var inDifferences; + var firstChar = properties.firstChar; + var glyphMap = {}; + for (var i = 0; i < charsets.length; i++) { var glyph = charsets[i]; - var code; - for (var j = 0; j < differences.length; j++) { - if (differences[j] == glyph) { - code = j; - inDifferences = true; - break; - } - } - if (!inDifferences) { - var code = properties.firstChar + i; - for (var charcode in encoding) { - if (encoding[s] == i) { - code = charcode | 0; - break; - } - } + for (var charcode in encoding) { + if (encoding[charcode] == i) + glyphMap[glyph] = charcode | 0; } + } - if (properties.encoding[code] && - properties.encoding[code].inDifferences) - continue; + var differences = properties.differences; + for (var i = 0; i < differences.length; ++i) { + var glyph = differences[i]; + if (!glyph) + continue; + var oldGlyph = charsets[i]; + if (oldGlyph) + delete glyphMap[oldGlyph]; + glyphMap[differences[i]] = i; + } - var mapping = properties.glyphs[code] || properties.glyphs[glyph] || {}; - var unicode = mapping.unicode || code; + var glyphs = properties.glyphs; + for (var i = 1; i < charsets.length; i++) { + var glyph = charsets[i]; + var code = glyphMap[glyph] || 0; + + var mapping = glyphs[code] || glyphs[glyph] || {}; + var unicode = mapping.unicode; if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) unicode += kCmapGlyphOffset; @@ -2586,13 +2586,13 @@ var Type2CFF = (function type2CFF() { var width = isNum(mapping.width) ? mapping.width : defaultWidth; properties.encoding[code] = { unicode: unicode, - width: width, - inDifferences: inDifferences + width: width }; charstrings.push({ unicode: unicode, width: width, + code: code, gid: i }); } @@ -2604,7 +2604,6 @@ var Type2CFF = (function type2CFF() { // remove duplicates -- they might appear during selection: // properties.glyphs[code] || properties.glyphs[glyph] - // TODO make more deterministic var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; var lastUnicode = charstrings[0].unicode, wasModified = false; for (var i = 1; i < charstrings.length; ++i) { @@ -2612,8 +2611,12 @@ var Type2CFF = (function type2CFF() { lastUnicode = charstrings[i].unicode; continue; } - // duplicate found -- changing the unicode for previous one - charstrings[i - 1].unicode = nextUnusedUnicode++; + // duplicate found -- keeping the item that has + // different code and unicode, that one created + // as result of modification of the base encoding + var duplicateIndex = + charstrings[i].unicode == charstrings[i].code ? i : i - 1; + charstrings[duplicateIndex].unicode = nextUnusedUnicode++; wasModified = true; } if (!wasModified) diff --git a/pdf.js b/pdf.js index 6f4524db5..c76ae7da0 100644 --- a/pdf.js +++ b/pdf.js @@ -4615,9 +4615,9 @@ var PartialEvaluator = (function partialEvaluator() { }; if (replaceGlyph || !glyphs[glyph]) - glyphs[glyph] = map[i]; + glyphs[glyph] = map[i]; if (replaceGlyph || !glyphs[index]) - glyphs[index] = map[i]; + glyphs[index] = map[i]; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with