From dd066f8369cad79ca534c077af9f421da3f611e3 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Thu, 26 Jan 2012 18:51:58 -0600 Subject: [PATCH 1/3] Fixing standard encoding mapping --- src/fonts.js | 27 ++++++++++++++------------- test/pdfs/issue1127.pdf.link | 1 + test/test_manifest.json | 7 +++++++ 3 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 test/pdfs/issue1127.pdf.link diff --git a/src/fonts.js b/src/fonts.js index adcedd55c..7f72c8086 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -160,19 +160,20 @@ var Encodings = { 'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'braceleft', 'bar', 'braceright', 'asciitilde', '', '', 'exclamdown', - 'cent', 'sterling', 'fraction', 'yen', 'florin', 'section', 'currency', - 'quotesingle', 'quotedblleft', 'guillemotleft', 'guilsinglleft', - 'guilsinglright', 'fi', 'fl', '', 'endash', 'dagger', 'daggerdbl', - 'periodcentered', '', 'paragraph', 'bullet', 'quotesinglbase', - 'quotedblbase', 'quotedblright', 'guillemotright', 'ellipsis', - 'perthousand', '', 'questiondown', '', 'grave', 'acute', 'circumflex', - 'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', '', 'ring', - 'cedilla', '', 'hungarumlaut', 'ogonek', 'caron', 'emdash', '', '', '', - '', '', '', '', '', '', '', '', '', '', '', '', '', 'AE', '', - 'ordfeminine', '', '', '', '', 'Lslash', 'Oslash', 'OE', 'ordmasculine', - '', '', '', '', '', 'ae', '', '', '', 'dotlessi', '', '', 'lslash', - 'oslash', 'oe', 'germandbls' + 'braceleft', 'bar', 'braceright', 'asciitilde', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', 'exclamdown', 'cent', 'sterling', + 'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle', + 'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', + 'fl', '', 'endash', 'dagger', 'daggerdbl', 'periodcentered', '', + 'paragraph', 'bullet', 'quotesinglbase', 'quotedblbase', 'quotedblright', + 'guillemotright', 'ellipsis', 'perthousand', '', 'questiondown', '', + 'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent', + 'dieresis', '', 'ring', 'cedilla', '', 'hungarumlaut', 'ogonek', 'caron', + 'emdash', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', + 'AE', '', 'ordfeminine', '', '', '', '', 'Lslash', 'Oslash', 'OE', + 'ordmasculine', '', '', '', '', '', 'ae', '', '', '', 'dotlessi', '', '', + 'lslash', 'oslash', 'oe', 'germandbls' ]); }, get WinAnsiEncoding() { diff --git a/test/pdfs/issue1127.pdf.link b/test/pdfs/issue1127.pdf.link new file mode 100644 index 000000000..2df2304ba --- /dev/null +++ b/test/pdfs/issue1127.pdf.link @@ -0,0 +1 @@ +https://vmp.ethz.ch/pdfs/diplome/vordiplome/Block%201/Algorithmen_%26_Komplexitaet/AlgoKo_f08_Aufg.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 648d1b49b..c6fed0a35 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -410,6 +410,13 @@ "link": true, "type": "eq" }, + { "id": "issue1127", + "file": "pdfs/issue1127.pdf", + "md5": "4fb2be5ffefeafda4ba977de2a1bb4d8", + "rounds": 1, + "link": true, + "type": "eq" + }, { "id": "liveprogramming", "file": "pdfs/liveprogramming.pdf", "md5": "7bd4dad1188232ef597d36fd72c33e52", From 10a0a60f8e7ae231a44a91a7b1df9ab1ff166be8 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Fri, 27 Jan 2012 18:53:05 -0600 Subject: [PATCH 2/3] Fixing symbols encoding --- src/evaluator.js | 9 ++++++--- src/fonts.js | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index 21530f42f..c70013d25 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -481,8 +481,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { properties.cidToGidMap = this.readCidToGidMap(cidToGidMap); } + var flags = properties.flags; var differences = []; - var baseEncoding = Encodings.StandardEncoding; + var baseEncoding = !!(flags & FontFlags.Symbolic) ? + Encodings.symbolsEncoding : Encodings.StandardEncoding; var hasEncoding = dict.has('Encoding'); if (hasEncoding) { var encoding = xref.fetchIfRef(dict.get('Encoding')); @@ -761,8 +763,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // Simulating descriptor flags attribute var fontNameWoStyle = baseFontName.split('-')[0]; var flags = (serifFonts[fontNameWoStyle] || - (fontNameWoStyle.search(/serif/gi) != -1) ? 2 : 0) | - (symbolsFonts[fontNameWoStyle] ? 4 : 32); + (fontNameWoStyle.search(/serif/gi) != -1) ? FontFlags.Serif : 0) | + (symbolsFonts[fontNameWoStyle] ? FontFlags.Symbolic : + FontFlags.Nonsymbolic); var properties = { type: type.name, diff --git a/src/fonts.js b/src/fonts.js index 7f72c8086..bb91d7969 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -19,6 +19,18 @@ var kPDFGlyphSpaceUnits = 1000; // Until hinting is fully supported this constant can be used var kHintingEnabled = false; +var FontFlags = { + FixedPitch: 1, + Serif: 2, + Symbolic: 4, + Script: 8, + Nonsymbolic: 32, + Italic: 64, + AllCap: 65536, + SmallCap: 131072, + ForceBold: 262144 +}; + var Encodings = { get ExpertEncoding() { return shadow(this, 'ExpertEncoding', ['', '', '', '', '', '', '', '', '', @@ -762,8 +774,8 @@ var Font = (function FontClosure() { var names = name.split('+'); names = names.length > 1 ? names[1] : names[0]; names = names.split(/[-,_]/g)[0]; - this.isSerifFont = !!(properties.flags & 2); - this.isSymbolicFont = !!(properties.flags & 4); + this.isSerifFont = !!(properties.flags & FontFlags.Serif); + this.isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); var type = properties.type; this.type = type; From 5415fed14d13a117438efef12a2ea616479785c1 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Fri, 27 Jan 2012 20:36:27 -0600 Subject: [PATCH 3/3] Mapping well-known chars to the similar equivalents in the normal characters range --- src/fonts.js | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index bb91d7969..3f618b82a 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -418,6 +418,19 @@ var symbolsFonts = { 'Dingbats': true, 'Symbol': true, 'ZapfDingbats': true }; +// Some characters, e.g. copyrightserif, mapped to the private use area and +// might not be displayed using standard fonts. Mapping/hacking well-known chars +// to the similar equivalents in the normal characters range. +function mapPrivateUseChars(code) { + switch (code) { + case 0xF8E9: // copyrightsans + case 0xF6D9: // copyrightserif + return 0x00A9; // copyright + default: + return code; + } +} + var FontLoader = { listeningForFontLoad: false, @@ -2199,7 +2212,7 @@ var Font = (function FontClosure() { case 'CIDFontType0': if (this.noUnicodeAdaptation) { width = this.widths[this.unicodeToCID[charcode] || charcode]; - unicode = charcode; + unicode = mapPrivateUseChars(charcode); break; } unicode = this.toUnicode[charcode] || charcode; @@ -2207,7 +2220,7 @@ var Font = (function FontClosure() { case 'CIDFontType2': if (this.noUnicodeAdaptation) { width = this.widths[this.unicodeToCID[charcode] || charcode]; - unicode = charcode; + unicode = mapPrivateUseChars(charcode); break; } unicode = this.toUnicode[charcode] || charcode; @@ -2217,7 +2230,7 @@ var Font = (function FontClosure() { if (!isNum(width)) width = this.widths[glyphName]; if (this.noUnicodeAdaptation) { - unicode = GlyphsUnicode[glyphName] || charcode; + unicode = mapPrivateUseChars(GlyphsUnicode[glyphName] || charcode); break; } unicode = this.glyphNameMap[glyphName] ||