From 19b670458e82b579b81ecfa17033c5073106072a Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Tue, 13 Mar 2012 18:59:16 -0500 Subject: [PATCH 1/4] Remove empty gryphs; improve glyph unicode movements --- src/fonts.js | 84 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index df0acbbc5..e3178ae9d 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1656,6 +1656,30 @@ var Font = (function FontClosure() { glyf.data = newGlyfData.subarray(0, writeOffset); } + function findEmptyGlyphs(locaTable, isGlyphLocationsLong, emptyGlyphIds) { + var itemSize, itemDecode; + if (isGlyphLocationsLong) { + itemSize = 4; + itemDecode = function fontItemDecodeLong(data, offset) { + return (data[offset] << 24) | (data[offset + 1] << 16) | + (data[offset + 2] << 8) | data[offset + 3]; + }; + } else { + itemSize = 2; + itemDecode = function fontItemDecode(data, offset) { + return (data[offset] << 9) | (data[offset + 1] << 1); + }; + } + var data = locaTable.data, length = data.length; + var lastOffset = itemDecode(data, 0); + for (var i = itemSize, j = 0; i < length; i += itemSize, j++) { + var offset = itemDecode(data, i); + if (offset == lastOffset) + emptyGlyphIds[j] = true; + lastOffset = offset; + } + } + function readGlyphNameMap(post, properties) { var start = (font.start ? font.start : 0) + post.offset; font.pos = start; @@ -1782,11 +1806,15 @@ var Font = (function FontClosure() { sanitizeMetrics(font, hhea, hmtx, numGlyphs); sanitizeMetrics(font, vhea, vmtx, numGlyphs); + var isGlyphLocationsLong = int16([head.data[50], head.data[51]]); if (head && loca && glyf) { - var isGlyphLocationsLong = int16([head.data[50], head.data[51]]); sanitizeGlyphLocations(loca, glyf, numGlyphs, isGlyphLocationsLong); } + var emptyGlyphIds = []; + if (glyf) + findEmptyGlyphs(loca, isGlyphLocationsLong, emptyGlyphIds); + // Sanitizer reduces the glyph advanceWidth to the maxAdvanceWidth // Sometimes it's 0. That needs to be fixed if (hhea.data[10] == 0 && hhea.data[11] == 0) { @@ -1918,6 +1946,15 @@ var Font = (function FontClosure() { } } + // remove glyph references outside range of avaialable glyphs or empty + for (var i = ids.length - 1; i >= 0; i--) { + if (ids[i] < numGlyphs && + (!emptyGlyphIds[ids[i]] || this.isSymbolicFont)) + continue; + ids.splice(i, 1); + glyphs.splice(i, 1); + } + if (hasShortCmap && this.hasEncoding && !this.isSymbolicFont) { // Re-encode short map encoding to unicode -- that simplifies the // resolution of MacRoman encoded glyphs logic for TrueType fonts: @@ -1951,9 +1988,11 @@ var Font = (function FontClosure() { // Re-encode cmap encoding to unicode, based on the 'post' table data // diffrence array or base encoding var reverseMap = []; - for (var i = 0, ii = glyphs.length; i < ii; i++) + for (var i = 0, ii = glyphs.length; i < ii; i++) { reverseMap[glyphs[i].unicode] = i; + } + var backtrackReplacements = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { var code = glyphs[i].unicode; var changeCode = false; @@ -1966,13 +2005,36 @@ var Font = (function FontClosure() { } if (glyphName in GlyphsUnicode) { var unicode = GlyphsUnicode[glyphName]; - if (!unicode || (unicode in reverseMap)) - continue; // unknown glyph name or its place is taken + if (!unicode || reverseMap[unicode] === i) + continue; // unknown glyph name or in its own place - glyphs[i].unicode = unicode; - reverseMap[unicode] = i; - if (changeCode) - toFontChar[code] = unicode; + if (unicode in reverseMap) { + backtrackReplacements[unicode] = { + index: i, + code: code, + changeCode: changeCode + }; + continue; // its place is taken + } + + var index = i; + while (true) { + glyphs[index].unicode = unicode; + reverseMap[unicode] = index; + if (changeCode) + toFontChar[code] = unicode; + + // checking if available place can be used by other glyph + var backtrack = backtrackReplacements[code]; + if (!backtrack) + break; + + delete backtrackReplacements[code]; + index = backtrack.index; + code = backtrack.code; + changeCode = backtrack.changeCode; + unicode = code; + } } this.useToFontChar = true; } @@ -1988,12 +2050,6 @@ var Font = (function FontClosure() { this.useToFontChar = true; } - // remove glyph references outside range of avaialable glyphs - for (var i = 0, ii = ids.length; i < ii; i++) { - if (ids[i] >= numGlyphs) - ids[i] = 0; - } - createGlyphNameMap(glyphs, ids, properties); this.glyphNameMap = properties.glyphNameMap; From a66b1a7ad3a271daabd6d25ed6e420c476905fe0 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:05:22 -0500 Subject: [PATCH 2/4] Fix unicode re-assignment; MacRomanEncoding detection --- src/fonts.js | 56 ++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index e3178ae9d..0c26ec884 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -174,7 +174,6 @@ var Encodings = { '', '', 'Lslash', 'Oslash', 'OE', 'ordmasculine', '', '', '', '', '', 'ae', '', '', '', 'dotlessi', '', '', 'lslash', 'oslash', 'oe', 'germandbls'], WinAnsiEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', - '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus', @@ -1947,12 +1946,22 @@ var Font = (function FontClosure() { } // remove glyph references outside range of avaialable glyphs or empty + var glyphsRemoved = 0; for (var i = ids.length - 1; i >= 0; i--) { if (ids[i] < numGlyphs && (!emptyGlyphIds[ids[i]] || this.isSymbolicFont)) continue; ids.splice(i, 1); glyphs.splice(i, 1); + glyphsRemoved++; + } + + // heuristics: if removed more than 2 glyphs encoding WinAnsiEncoding + // does not set properly + if (glyphsRemoved > 2) { + warn('Switching TrueType encoding to MacRomanEncoding for ' + + this.name + ' font'); + encoding = Encodings.MacRomanEncoding; } if (hasShortCmap && this.hasEncoding && !this.isSymbolicFont) { @@ -1992,7 +2001,7 @@ var Font = (function FontClosure() { reverseMap[glyphs[i].unicode] = i; } - var backtrackReplacements = []; + var newGlyphUnicodes = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { var code = glyphs[i].unicode; var changeCode = false; @@ -2008,36 +2017,23 @@ var Font = (function FontClosure() { if (!unicode || reverseMap[unicode] === i) continue; // unknown glyph name or in its own place - if (unicode in reverseMap) { - backtrackReplacements[unicode] = { - index: i, - code: code, - changeCode: changeCode - }; - continue; // its place is taken - } - - var index = i; - while (true) { - glyphs[index].unicode = unicode; - reverseMap[unicode] = index; - if (changeCode) - toFontChar[code] = unicode; - - // checking if available place can be used by other glyph - var backtrack = backtrackReplacements[code]; - if (!backtrack) - break; - - delete backtrackReplacements[code]; - index = backtrack.index; - code = backtrack.code; - changeCode = backtrack.changeCode; - unicode = code; - } + newGlyphUnicodes[i] = unicode; + if (changeCode) + toFontChar[code] = unicode; + delete reverseMap[code]; } - this.useToFontChar = true; } + for (var index in newGlyphUnicodes) { + var unicode = newGlyphUnicodes[index]; + if (reverseMap[unicode]) { + // avoiding assigning to the same unicode + glyphs[index].unicode = unusedUnicode++; + continue; + } + glyphs[index].unicode = unicode; + reverseMap[unicode] = index; + } + this.useToFontChar = true; } // Moving all symbolic font glyphs into 0xF000 - 0xF0FF range. From b2b78cfeb2a07e6301d6f3238954b77c07bc7322 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:13:54 -0500 Subject: [PATCH 3/4] Add tests from #1309 and #1317 --- test/test_manifest.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/test_manifest.json b/test/test_manifest.json index 16d924151..9cfe673f2 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -515,5 +515,19 @@ "pageLimit": 2, "link": true, "type": "eq" + }, + { "id": "issue1309", + "file": "pdfs/issue1309.pdf", + "md5": "e835fb7f3dab3073ad37d0bd3c6399fa", + "rounds": 1, + "link": true, + "type": "eq" + }, + { "id": "issue1317", + "file": "pdfs/issue1317.pdf", + "md5": "6fb46275b30c48c8985617d4f86199e3", + "rounds": 1, + "link": true, + "type": "eq" } ] From 73c9f8797b7c35f62f7d29f3cc77bfbd4153be46 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:22:42 -0500 Subject: [PATCH 4/4] Add missing .link files; minor fix --- src/fonts.js | 3 +-- test/pdfs/issue1309.pdf.link | 1 + test/pdfs/issue1317.pdf.link | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 test/pdfs/issue1309.pdf.link create mode 100644 test/pdfs/issue1317.pdf.link diff --git a/src/fonts.js b/src/fonts.js index 0c26ec884..a13c948ab 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1997,9 +1997,8 @@ var Font = (function FontClosure() { // Re-encode cmap encoding to unicode, based on the 'post' table data // diffrence array or base encoding var reverseMap = []; - for (var i = 0, ii = glyphs.length; i < ii; i++) { + for (var i = 0, ii = glyphs.length; i < ii; i++) reverseMap[glyphs[i].unicode] = i; - } var newGlyphUnicodes = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { diff --git a/test/pdfs/issue1309.pdf.link b/test/pdfs/issue1309.pdf.link new file mode 100644 index 000000000..1351d45ef --- /dev/null +++ b/test/pdfs/issue1309.pdf.link @@ -0,0 +1 @@ +http://www.lufthansa.com/mediapool/pdf/31/media_907231.pdf diff --git a/test/pdfs/issue1317.pdf.link b/test/pdfs/issue1317.pdf.link new file mode 100644 index 000000000..67c4d50ef --- /dev/null +++ b/test/pdfs/issue1317.pdf.link @@ -0,0 +1 @@ +http://iliad.fr/presse/2012/CP_080312_Free_mobile.pdf