From 08de65517758ef313d9cd1fd6d5224d00674c0f9 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 3 Dec 2017 14:02:22 +0100 Subject: [PATCH] Add basic support for non-embedded Calibri fonts (issue 9195) There's a number of issues with the fonts in the referenced PDF file. First of all, they contain broken `ToUnicode` data (`NUL` bytes all over the place). However even if you skip those, the `ToUnicode` data appears to contain nothing but a `IdentityH` CMap which won't help provide a proper glyph mapping. The real issue actually turns out to be that the PDF file uses the "Calibri" font[1], but doesn't include any font files. Since that one isn't a standard font, and uses a fairly different CID to GID map compared to the standard fonts, we're not able to render the file even remotely correct. To work around this, I'm thus proposing that we include a (incomplete) glyph map for Calibri, and fallback to the standard Helvetica font. Obviously this isn't going to look perfect, but it's really the best that we can hope to achieve given that the PDF file is missing the necessary font data. Finally, please note that none of the PDF readers I've tried (Adobe Reader, PDFium in Chrome) were able to extract the text (which isn't very surprising, given the broken `ToUnicode` data). Fixes 9195. --- [1] According to Wikipedia, see https://en.wikipedia.org/wiki/Calibri, Calibri is (primarily) a Windows font. --- src/core/fonts.js | 9 ++++++++- src/core/standard_fonts.js | 27 +++++++++++++++++++++++++++ test/pdfs/issue9195.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/issue9195.pdf.link diff --git a/src/core/fonts.js b/src/core/fonts.js index 2afa54d1d..f7e6582ad 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -28,7 +28,7 @@ import { } from './encodings'; import { getGlyphMapForStandardFonts, getNonStdFontMap, getStdFontMap, - getSupplementalGlyphMapForArialBlack + getSupplementalGlyphMapForArialBlack, getSupplementalGlyphMapForCalibri } from './standard_fonts'; import { getUnicodeForGlyph, getUnicodeRangeFor, mapSpecialUnicodeValues @@ -1242,7 +1242,14 @@ var Font = (function FontClosure() { for (charCode in SupplementalGlyphMapForArialBlack) { map[+charCode] = SupplementalGlyphMapForArialBlack[charCode]; } + } else if (/Calibri/i.test(name)) { + let SupplementalGlyphMapForCalibri = + getSupplementalGlyphMapForCalibri(); + for (charCode in SupplementalGlyphMapForCalibri) { + map[+charCode] = SupplementalGlyphMapForCalibri[charCode]; + } } + var isIdentityUnicode = this.toUnicode instanceof IdentityToUnicodeMap; if (!isIdentityUnicode) { this.toUnicode.forEach(function(charCode, unicodeCharCode) { diff --git a/src/core/standard_fonts.js b/src/core/standard_fonts.js index 541941935..3c73bc322 100644 --- a/src/core/standard_fonts.js +++ b/src/core/standard_fonts.js @@ -83,6 +83,10 @@ var getStdFontMap = getLookupTableFactory(function (t) { * a standard fonts without glyph data. */ var getNonStdFontMap = getLookupTableFactory(function (t) { + t['Calibri'] = 'Helvetica'; + t['Calibri-Bold'] = 'Helvetica-Bold'; + t['Calibri-BoldItalic'] = 'Helvetica-BoldOblique'; + t['Calibri-Italic'] = 'Helvetica-Oblique'; t['CenturyGothic'] = 'Helvetica'; t['CenturyGothic-Bold'] = 'Helvetica-Bold'; t['CenturyGothic-BoldItalic'] = 'Helvetica-BoldOblique'; @@ -355,6 +359,28 @@ var getSupplementalGlyphMapForArialBlack = t[227] = 322; t[264] = 261; t[291] = 346; }); +// The glyph map for Calibri (a Windows font) differs from the glyph map used +// in the standard fonts. Hence we use this (incomplete) CID to GID mapping to +// adjust the glyph map for non-embedded Calibri fonts. +let getSupplementalGlyphMapForCalibri = getLookupTableFactory(function(t) { + t[1] = 32; t[4] = 65; t[17] = 66; t[18] = 67; t[24] = 68; t[28] = 69; + t[38] = 70; t[39] = 71; t[44] = 72; t[47] = 73; t[58] = 74; t[60] = 75; + t[62] = 76; t[68] = 77; t[69] = 78; t[75] = 79; t[87] = 80; t[89] = 81; + t[90] = 82; t[94] = 83; t[100] = 84; t[104] = 85; t[115] = 86; t[116] = 87; + t[121] = 88; t[122] = 89; t[127] = 90; t[258] = 97; t[268] = 261; t[271] = 98; + t[272] = 99; t[273] = 263; t[282] = 100; t[286] = 101; t[295] = 281; + t[296] = 102; t[336] = 103; t[346] = 104; t[349] = 105; t[361] = 106; + t[364] = 107; t[367] = 108; t[371] = 322; t[373] = 109; t[374] = 110; + t[381] = 111; t[383] = 243; t[393] = 112; t[395] = 113; t[396] = 114; + t[400] = 115; t[401] = 347; t[410] = 116; t[437] = 117; t[448] = 118; + t[449] = 119; t[454] = 120; t[455] = 121; t[460] = 122; t[463] = 380; + t[853] = 44; t[855] = 58; t[856] = 46; t[876] = 47; t[878] = 45; t[882] = 45; + t[894] = 40; t[895] = 41; t[896] = 91; t[897] = 93; t[923] = 64; t[1004] = 48; + t[1005] = 49; t[1006] = 50; t[1007] = 51; t[1008] = 52; t[1009] = 53; + t[1010] = 54; t[1011] = 55; t[1012] = 56; t[1013] = 57; t[1081] = 37; + t[1085] = 43; t[1086] = 45; +}); + export { getStdFontMap, getNonStdFontMap, @@ -362,4 +388,5 @@ export { getSymbolsFonts, getGlyphMapForStandardFonts, getSupplementalGlyphMapForArialBlack, + getSupplementalGlyphMapForCalibri, }; diff --git a/test/pdfs/issue9195.pdf.link b/test/pdfs/issue9195.pdf.link new file mode 100644 index 000000000..02eda0457 --- /dev/null +++ b/test/pdfs/issue9195.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/1506940/testfile.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 4cbc776b8..e5ac69934 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -718,6 +718,14 @@ "link": false, "type": "eq" }, + { "id": "issue9195", + "file": "pdfs/issue9195.pdf", + "md5": "90e78a11abdc6c5ae79b8b95cfbb1895", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "issue8707", "file": "pdfs/issue8707.pdf", "md5": "d3dc670adde9ec9fb82c974027033029",