Merge pull request #7550 from Snuffleupagus/Type1-toUnicode-builtInEncoding-fallback
For embedded Type1 fonts without included `ToUnicode`/`Encoding` data, attempt to improve text selection by using the `builtInEncoding` to amend the `toUnicode` map (issue 6901, issue 7182, issue 7217, bug 917796, bug 1242142)
This commit is contained in:
		
						commit
						4acd31f51e
					
				| @ -1757,6 +1757,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
| 
 | ||||
|       properties.differences = differences; | ||||
|       properties.baseEncodingName = baseEncodingName; | ||||
|       properties.hasEncoding = !!baseEncodingName || differences.length > 0; | ||||
|       properties.dict = dict; | ||||
|       return toUnicodePromise.then(function(toUnicode) { | ||||
|         properties.toUnicode = toUnicode; | ||||
| @ -1774,8 +1775,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
|      *   {ToUnicodeMap|IdentityToUnicodeMap} object. | ||||
|      */ | ||||
|     buildToUnicode: function PartialEvaluator_buildToUnicode(properties) { | ||||
|       properties.hasIncludedToUnicodeMap = | ||||
|         !!properties.toUnicode && properties.toUnicode.length > 0; | ||||
|       // Section 9.10.2 Mapping Character Codes to Unicode Values
 | ||||
|       if (properties.toUnicode && properties.toUnicode.length !== 0) { | ||||
|       if (properties.hasIncludedToUnicodeMap) { | ||||
|         return Promise.resolve(properties.toUnicode); | ||||
|       } | ||||
|       // According to the spec if the font is a simple font we should only map
 | ||||
|  | ||||
| @ -163,6 +163,30 @@ function adjustWidths(properties) { | ||||
|   properties.defaultWidth *= scale; | ||||
| } | ||||
| 
 | ||||
| function adjustToUnicode(properties, builtInEncoding) { | ||||
|   if (properties.hasIncludedToUnicodeMap) { | ||||
|     return; // The font dictionary has a `ToUnicode` entry.
 | ||||
|   } | ||||
|   if (properties.hasEncoding) { | ||||
|     return; // The font dictionary has an `Encoding` entry.
 | ||||
|   } | ||||
|   if (builtInEncoding === properties.defaultEncoding) { | ||||
|     return; // No point in trying to adjust `toUnicode` if the encodings match.
 | ||||
|   } | ||||
|   if (properties.toUnicode instanceof IdentityToUnicodeMap) { | ||||
|     return; | ||||
|   } | ||||
|   var toUnicode = [], glyphsUnicodeMap = getGlyphsUnicode(); | ||||
|   for (var charCode in builtInEncoding) { | ||||
|     var glyphName = builtInEncoding[charCode]; | ||||
|     var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); | ||||
|     if (unicode !== -1) { | ||||
|       toUnicode[charCode] = String.fromCharCode(unicode); | ||||
|     } | ||||
|   } | ||||
|   properties.toUnicode.amend(toUnicode); | ||||
| } | ||||
| 
 | ||||
| function getFontType(type, subtype) { | ||||
|   switch (type) { | ||||
|     case 'Type1': | ||||
| @ -261,7 +285,13 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() { | ||||
| 
 | ||||
|     charCodeOf: function(v) { | ||||
|       return this._map.indexOf(v); | ||||
|     } | ||||
|     }, | ||||
| 
 | ||||
|     amend: function (map) { | ||||
|       for (var charCode in map) { | ||||
|         this._map[charCode] = map[charCode]; | ||||
|       } | ||||
|     }, | ||||
|   }; | ||||
| 
 | ||||
|   return ToUnicodeMap; | ||||
| @ -297,7 +327,11 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() { | ||||
| 
 | ||||
|     charCodeOf: function (v) { | ||||
|       return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1; | ||||
|     } | ||||
|     }, | ||||
| 
 | ||||
|     amend: function (map) { | ||||
|       error('Should not call amend()'); | ||||
|     }, | ||||
|   }; | ||||
| 
 | ||||
|   return IdentityToUnicodeMap; | ||||
| @ -765,6 +799,7 @@ var Font = (function FontClosure() { | ||||
|     this.fontMatrix = properties.fontMatrix; | ||||
|     this.widths = properties.widths; | ||||
|     this.defaultWidth = properties.defaultWidth; | ||||
|     this.toUnicode = properties.toUnicode; | ||||
|     this.encoding = properties.baseEncoding; | ||||
|     this.seacMap = properties.seacMap; | ||||
| 
 | ||||
| @ -2386,10 +2421,8 @@ var Font = (function FontClosure() { | ||||
|       } else { | ||||
|         // Most of the following logic in this code branch is based on the
 | ||||
|         // 9.6.6.4 of the PDF spec.
 | ||||
|         var hasEncoding = | ||||
|           properties.differences.length > 0 || !!properties.baseEncodingName; | ||||
|         var cmapTable = | ||||
|           readCmapTable(tables['cmap'], font, this.isSymbolicFont, hasEncoding); | ||||
|         var cmapTable = readCmapTable(tables['cmap'], font, this.isSymbolicFont, | ||||
|                                       properties.hasEncoding); | ||||
|         var cmapPlatformId = cmapTable.platformId; | ||||
|         var cmapEncodingId = cmapTable.encodingId; | ||||
|         var cmapMappings = cmapTable.mappings; | ||||
| @ -2398,7 +2431,7 @@ var Font = (function FontClosure() { | ||||
|         // The spec seems to imply that if the font is symbolic the encoding
 | ||||
|         // should be ignored, this doesn't appear to work for 'preistabelle.pdf'
 | ||||
|         // where the the font is symbolic and it has an encoding.
 | ||||
|         if (hasEncoding && | ||||
|         if (properties.hasEncoding && | ||||
|             (cmapPlatformId === 3 && cmapEncodingId === 1 || | ||||
|              cmapPlatformId === 1 && cmapEncodingId === 0) || | ||||
|             (cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
 | ||||
| @ -2562,6 +2595,12 @@ var Font = (function FontClosure() { | ||||
|       // TODO: Check the charstring widths to determine this.
 | ||||
|       properties.fixedPitch = false; | ||||
| 
 | ||||
|       if (properties.builtInEncoding) { | ||||
|         // For Type1 fonts that do not include either `ToUnicode` or `Encoding`
 | ||||
|         // data, attempt to use the `builtInEncoding` to improve text selection.
 | ||||
|         adjustToUnicode(properties, properties.builtInEncoding); | ||||
|       } | ||||
| 
 | ||||
|       var mapping = font.getGlyphMapping(properties); | ||||
|       var newMapping = adjustMapping(mapping, properties); | ||||
|       this.toFontChar = newMapping.toFontChar; | ||||
|  | ||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -22,6 +22,7 @@ | ||||
| !issue5808.pdf | ||||
| !issue6204.pdf | ||||
| !issue6782.pdf | ||||
| !issue6901.pdf | ||||
| !issue6961.pdf | ||||
| !issue6962.pdf | ||||
| !issue7020.pdf | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								test/pdfs/issue6901.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								test/pdfs/issue6901.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -1220,6 +1220,20 @@ | ||||
|        "link": false, | ||||
|        "type": "text" | ||||
|     }, | ||||
|     {  "id": "issue6901-eq", | ||||
|        "file": "pdfs/issue6901.pdf", | ||||
|        "md5": "1a0604b1a7a3aaf2162b425a9a84230b", | ||||
|        "rounds": 1, | ||||
|        "link": false, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue6901-text", | ||||
|        "file": "pdfs/issue6901.pdf", | ||||
|        "md5": "1a0604b1a7a3aaf2162b425a9a84230b", | ||||
|        "rounds": 1, | ||||
|        "link": false, | ||||
|        "type": "text" | ||||
|     }, | ||||
|     {  "id": "issue6962", | ||||
|        "file": "pdfs/issue6962.pdf", | ||||
|        "md5": "d40e871ecca68baf93114bd28c782148", | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user