Add basic support for non-embedded Calibri fonts (issue 9195)
There's a number of issues with the fonts in the referenced PDF file. First of all, they contain broken `ToUnicode` data (`NUL` bytes all over the place). However even if you skip those, the `ToUnicode` data appears to contain nothing but a `IdentityH` CMap which won't help provide a proper glyph mapping. The real issue actually turns out to be that the PDF file uses the "Calibri" font[1], but doesn't include any font files. Since that one isn't a standard font, and uses a fairly different CID to GID map compared to the standard fonts, we're not able to render the file even remotely correct. To work around this, I'm thus proposing that we include a (incomplete) glyph map for Calibri, and fallback to the standard Helvetica font. Obviously this isn't going to look perfect, but it's really the best that we can hope to achieve given that the PDF file is missing the necessary font data. Finally, please note that none of the PDF readers I've tried (Adobe Reader, PDFium in Chrome) were able to extract the text (which isn't very surprising, given the broken `ToUnicode` data). Fixes 9195. --- [1] According to Wikipedia, see https://en.wikipedia.org/wiki/Calibri, Calibri is (primarily) a Windows font.
This commit is contained in:
		
							parent
							
								
									de0bac727e
								
							
						
					
					
						commit
						08de655177
					
				| @ -28,7 +28,7 @@ import { | ||||
| } from './encodings'; | ||||
| import { | ||||
|   getGlyphMapForStandardFonts, getNonStdFontMap, getStdFontMap, | ||||
|   getSupplementalGlyphMapForArialBlack | ||||
|   getSupplementalGlyphMapForArialBlack, getSupplementalGlyphMapForCalibri | ||||
| } from './standard_fonts'; | ||||
| import { | ||||
|   getUnicodeForGlyph, getUnicodeRangeFor, mapSpecialUnicodeValues | ||||
| @ -1242,7 +1242,14 @@ var Font = (function FontClosure() { | ||||
|           for (charCode in SupplementalGlyphMapForArialBlack) { | ||||
|             map[+charCode] = SupplementalGlyphMapForArialBlack[charCode]; | ||||
|           } | ||||
|         } else if (/Calibri/i.test(name)) { | ||||
|           let SupplementalGlyphMapForCalibri = | ||||
|             getSupplementalGlyphMapForCalibri(); | ||||
|           for (charCode in SupplementalGlyphMapForCalibri) { | ||||
|             map[+charCode] = SupplementalGlyphMapForCalibri[charCode]; | ||||
|           } | ||||
|         } | ||||
| 
 | ||||
|         var isIdentityUnicode = this.toUnicode instanceof IdentityToUnicodeMap; | ||||
|         if (!isIdentityUnicode) { | ||||
|           this.toUnicode.forEach(function(charCode, unicodeCharCode) { | ||||
|  | ||||
| @ -83,6 +83,10 @@ var getStdFontMap = getLookupTableFactory(function (t) { | ||||
|  * a standard fonts without glyph data. | ||||
|  */ | ||||
| var getNonStdFontMap = getLookupTableFactory(function (t) { | ||||
|   t['Calibri'] = 'Helvetica'; | ||||
|   t['Calibri-Bold'] = 'Helvetica-Bold'; | ||||
|   t['Calibri-BoldItalic'] = 'Helvetica-BoldOblique'; | ||||
|   t['Calibri-Italic'] = 'Helvetica-Oblique'; | ||||
|   t['CenturyGothic'] = 'Helvetica'; | ||||
|   t['CenturyGothic-Bold'] = 'Helvetica-Bold'; | ||||
|   t['CenturyGothic-BoldItalic'] = 'Helvetica-BoldOblique'; | ||||
| @ -355,6 +359,28 @@ var getSupplementalGlyphMapForArialBlack = | ||||
|   t[227] = 322; t[264] = 261; t[291] = 346; | ||||
| }); | ||||
| 
 | ||||
| // The glyph map for Calibri (a Windows font) differs from the glyph map used
 | ||||
| // in the standard fonts. Hence we use this (incomplete) CID to GID mapping to
 | ||||
| // adjust the glyph map for non-embedded Calibri fonts.
 | ||||
| let getSupplementalGlyphMapForCalibri = getLookupTableFactory(function(t) { | ||||
|   t[1] = 32; t[4] = 65; t[17] = 66; t[18] = 67; t[24] = 68; t[28] = 69; | ||||
|   t[38] = 70; t[39] = 71; t[44] = 72; t[47] = 73; t[58] = 74; t[60] = 75; | ||||
|   t[62] = 76; t[68] = 77; t[69] = 78; t[75] = 79; t[87] = 80; t[89] = 81; | ||||
|   t[90] = 82; t[94] = 83; t[100] = 84; t[104] = 85; t[115] = 86; t[116] = 87; | ||||
|   t[121] = 88; t[122] = 89; t[127] = 90; t[258] = 97; t[268] = 261; t[271] = 98; | ||||
|   t[272] = 99; t[273] = 263; t[282] = 100; t[286] = 101; t[295] = 281; | ||||
|   t[296] = 102; t[336] = 103; t[346] = 104; t[349] = 105; t[361] = 106; | ||||
|   t[364] = 107; t[367] = 108; t[371] = 322; t[373] = 109; t[374] = 110; | ||||
|   t[381] = 111; t[383] = 243; t[393] = 112; t[395] = 113; t[396] = 114; | ||||
|   t[400] = 115; t[401] = 347; t[410] = 116; t[437] = 117; t[448] = 118; | ||||
|   t[449] = 119; t[454] = 120; t[455] = 121; t[460] = 122; t[463] = 380; | ||||
|   t[853] = 44; t[855] = 58; t[856] = 46; t[876] = 47; t[878] = 45; t[882] = 45; | ||||
|   t[894] = 40; t[895] = 41; t[896] = 91; t[897] = 93; t[923] = 64; t[1004] = 48; | ||||
|   t[1005] = 49; t[1006] = 50; t[1007] = 51; t[1008] = 52; t[1009] = 53; | ||||
|   t[1010] = 54; t[1011] = 55; t[1012] = 56; t[1013] = 57; t[1081] = 37; | ||||
|   t[1085] = 43; t[1086] = 45; | ||||
| }); | ||||
| 
 | ||||
| export { | ||||
|   getStdFontMap, | ||||
|   getNonStdFontMap, | ||||
| @ -362,4 +388,5 @@ export { | ||||
|   getSymbolsFonts, | ||||
|   getGlyphMapForStandardFonts, | ||||
|   getSupplementalGlyphMapForArialBlack, | ||||
|   getSupplementalGlyphMapForCalibri, | ||||
| }; | ||||
|  | ||||
							
								
								
									
										1
									
								
								test/pdfs/issue9195.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/issue9195.pdf.link
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| https://github.com/mozilla/pdf.js/files/1506940/testfile.pdf | ||||
| @ -718,6 +718,14 @@ | ||||
|        "link": false, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue9195", | ||||
|        "file": "pdfs/issue9195.pdf", | ||||
|        "md5": "90e78a11abdc6c5ae79b8b95cfbb1895", | ||||
|        "rounds": 1, | ||||
|        "link": true, | ||||
|        "lastPage": 1, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue8707", | ||||
|        "file": "pdfs/issue8707.pdf", | ||||
|        "md5": "d3dc670adde9ec9fb82c974027033029", | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user