Merge pull request #7550 from Snuffleupagus/Type1-toUnicode-builtInEncoding-fallback
For embedded Type1 fonts without included `ToUnicode`/`Encoding` data, attempt to improve text selection by using the `builtInEncoding` to amend the `toUnicode` map (issue 6901, issue 7182, issue 7217, bug 917796, bug 1242142)
This commit is contained in:
		
						commit
						4acd31f51e
					
				| @ -1757,6 +1757,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | |||||||
| 
 | 
 | ||||||
|       properties.differences = differences; |       properties.differences = differences; | ||||||
|       properties.baseEncodingName = baseEncodingName; |       properties.baseEncodingName = baseEncodingName; | ||||||
|  |       properties.hasEncoding = !!baseEncodingName || differences.length > 0; | ||||||
|       properties.dict = dict; |       properties.dict = dict; | ||||||
|       return toUnicodePromise.then(function(toUnicode) { |       return toUnicodePromise.then(function(toUnicode) { | ||||||
|         properties.toUnicode = toUnicode; |         properties.toUnicode = toUnicode; | ||||||
| @ -1774,8 +1775,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | |||||||
|      *   {ToUnicodeMap|IdentityToUnicodeMap} object. |      *   {ToUnicodeMap|IdentityToUnicodeMap} object. | ||||||
|      */ |      */ | ||||||
|     buildToUnicode: function PartialEvaluator_buildToUnicode(properties) { |     buildToUnicode: function PartialEvaluator_buildToUnicode(properties) { | ||||||
|  |       properties.hasIncludedToUnicodeMap = | ||||||
|  |         !!properties.toUnicode && properties.toUnicode.length > 0; | ||||||
|       // Section 9.10.2 Mapping Character Codes to Unicode Values
 |       // Section 9.10.2 Mapping Character Codes to Unicode Values
 | ||||||
|       if (properties.toUnicode && properties.toUnicode.length !== 0) { |       if (properties.hasIncludedToUnicodeMap) { | ||||||
|         return Promise.resolve(properties.toUnicode); |         return Promise.resolve(properties.toUnicode); | ||||||
|       } |       } | ||||||
|       // According to the spec if the font is a simple font we should only map
 |       // According to the spec if the font is a simple font we should only map
 | ||||||
|  | |||||||
| @ -163,6 +163,30 @@ function adjustWidths(properties) { | |||||||
|   properties.defaultWidth *= scale; |   properties.defaultWidth *= scale; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | function adjustToUnicode(properties, builtInEncoding) { | ||||||
|  |   if (properties.hasIncludedToUnicodeMap) { | ||||||
|  |     return; // The font dictionary has a `ToUnicode` entry.
 | ||||||
|  |   } | ||||||
|  |   if (properties.hasEncoding) { | ||||||
|  |     return; // The font dictionary has an `Encoding` entry.
 | ||||||
|  |   } | ||||||
|  |   if (builtInEncoding === properties.defaultEncoding) { | ||||||
|  |     return; // No point in trying to adjust `toUnicode` if the encodings match.
 | ||||||
|  |   } | ||||||
|  |   if (properties.toUnicode instanceof IdentityToUnicodeMap) { | ||||||
|  |     return; | ||||||
|  |   } | ||||||
|  |   var toUnicode = [], glyphsUnicodeMap = getGlyphsUnicode(); | ||||||
|  |   for (var charCode in builtInEncoding) { | ||||||
|  |     var glyphName = builtInEncoding[charCode]; | ||||||
|  |     var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); | ||||||
|  |     if (unicode !== -1) { | ||||||
|  |       toUnicode[charCode] = String.fromCharCode(unicode); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   properties.toUnicode.amend(toUnicode); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| function getFontType(type, subtype) { | function getFontType(type, subtype) { | ||||||
|   switch (type) { |   switch (type) { | ||||||
|     case 'Type1': |     case 'Type1': | ||||||
| @ -261,7 +285,13 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() { | |||||||
| 
 | 
 | ||||||
|     charCodeOf: function(v) { |     charCodeOf: function(v) { | ||||||
|       return this._map.indexOf(v); |       return this._map.indexOf(v); | ||||||
|     } |     }, | ||||||
|  | 
 | ||||||
|  |     amend: function (map) { | ||||||
|  |       for (var charCode in map) { | ||||||
|  |         this._map[charCode] = map[charCode]; | ||||||
|  |       } | ||||||
|  |     }, | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   return ToUnicodeMap; |   return ToUnicodeMap; | ||||||
| @ -297,7 +327,11 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() { | |||||||
| 
 | 
 | ||||||
|     charCodeOf: function (v) { |     charCodeOf: function (v) { | ||||||
|       return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1; |       return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1; | ||||||
|     } |     }, | ||||||
|  | 
 | ||||||
|  |     amend: function (map) { | ||||||
|  |       error('Should not call amend()'); | ||||||
|  |     }, | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|   return IdentityToUnicodeMap; |   return IdentityToUnicodeMap; | ||||||
| @ -765,6 +799,7 @@ var Font = (function FontClosure() { | |||||||
|     this.fontMatrix = properties.fontMatrix; |     this.fontMatrix = properties.fontMatrix; | ||||||
|     this.widths = properties.widths; |     this.widths = properties.widths; | ||||||
|     this.defaultWidth = properties.defaultWidth; |     this.defaultWidth = properties.defaultWidth; | ||||||
|  |     this.toUnicode = properties.toUnicode; | ||||||
|     this.encoding = properties.baseEncoding; |     this.encoding = properties.baseEncoding; | ||||||
|     this.seacMap = properties.seacMap; |     this.seacMap = properties.seacMap; | ||||||
| 
 | 
 | ||||||
| @ -2386,10 +2421,8 @@ var Font = (function FontClosure() { | |||||||
|       } else { |       } else { | ||||||
|         // Most of the following logic in this code branch is based on the
 |         // Most of the following logic in this code branch is based on the
 | ||||||
|         // 9.6.6.4 of the PDF spec.
 |         // 9.6.6.4 of the PDF spec.
 | ||||||
|         var hasEncoding = |         var cmapTable = readCmapTable(tables['cmap'], font, this.isSymbolicFont, | ||||||
|           properties.differences.length > 0 || !!properties.baseEncodingName; |                                       properties.hasEncoding); | ||||||
|         var cmapTable = |  | ||||||
|           readCmapTable(tables['cmap'], font, this.isSymbolicFont, hasEncoding); |  | ||||||
|         var cmapPlatformId = cmapTable.platformId; |         var cmapPlatformId = cmapTable.platformId; | ||||||
|         var cmapEncodingId = cmapTable.encodingId; |         var cmapEncodingId = cmapTable.encodingId; | ||||||
|         var cmapMappings = cmapTable.mappings; |         var cmapMappings = cmapTable.mappings; | ||||||
| @ -2398,7 +2431,7 @@ var Font = (function FontClosure() { | |||||||
|         // The spec seems to imply that if the font is symbolic the encoding
 |         // The spec seems to imply that if the font is symbolic the encoding
 | ||||||
|         // should be ignored, this doesn't appear to work for 'preistabelle.pdf'
 |         // should be ignored, this doesn't appear to work for 'preistabelle.pdf'
 | ||||||
|         // where the the font is symbolic and it has an encoding.
 |         // where the the font is symbolic and it has an encoding.
 | ||||||
|         if (hasEncoding && |         if (properties.hasEncoding && | ||||||
|             (cmapPlatformId === 3 && cmapEncodingId === 1 || |             (cmapPlatformId === 3 && cmapEncodingId === 1 || | ||||||
|              cmapPlatformId === 1 && cmapEncodingId === 0) || |              cmapPlatformId === 1 && cmapEncodingId === 0) || | ||||||
|             (cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
 |             (cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
 | ||||||
| @ -2562,6 +2595,12 @@ var Font = (function FontClosure() { | |||||||
|       // TODO: Check the charstring widths to determine this.
 |       // TODO: Check the charstring widths to determine this.
 | ||||||
|       properties.fixedPitch = false; |       properties.fixedPitch = false; | ||||||
| 
 | 
 | ||||||
|  |       if (properties.builtInEncoding) { | ||||||
|  |         // For Type1 fonts that do not include either `ToUnicode` or `Encoding`
 | ||||||
|  |         // data, attempt to use the `builtInEncoding` to improve text selection.
 | ||||||
|  |         adjustToUnicode(properties, properties.builtInEncoding); | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|       var mapping = font.getGlyphMapping(properties); |       var mapping = font.getGlyphMapping(properties); | ||||||
|       var newMapping = adjustMapping(mapping, properties); |       var newMapping = adjustMapping(mapping, properties); | ||||||
|       this.toFontChar = newMapping.toFontChar; |       this.toFontChar = newMapping.toFontChar; | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -22,6 +22,7 @@ | |||||||
| !issue5808.pdf | !issue5808.pdf | ||||||
| !issue6204.pdf | !issue6204.pdf | ||||||
| !issue6782.pdf | !issue6782.pdf | ||||||
|  | !issue6901.pdf | ||||||
| !issue6961.pdf | !issue6961.pdf | ||||||
| !issue6962.pdf | !issue6962.pdf | ||||||
| !issue7020.pdf | !issue7020.pdf | ||||||
|  | |||||||
							
								
								
									
										
											BIN
										
									
								
								test/pdfs/issue6901.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								test/pdfs/issue6901.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -1220,6 +1220,20 @@ | |||||||
|        "link": false, |        "link": false, | ||||||
|        "type": "text" |        "type": "text" | ||||||
|     }, |     }, | ||||||
|  |     {  "id": "issue6901-eq", | ||||||
|  |        "file": "pdfs/issue6901.pdf", | ||||||
|  |        "md5": "1a0604b1a7a3aaf2162b425a9a84230b", | ||||||
|  |        "rounds": 1, | ||||||
|  |        "link": false, | ||||||
|  |        "type": "eq" | ||||||
|  |     }, | ||||||
|  |     {  "id": "issue6901-text", | ||||||
|  |        "file": "pdfs/issue6901.pdf", | ||||||
|  |        "md5": "1a0604b1a7a3aaf2162b425a9a84230b", | ||||||
|  |        "rounds": 1, | ||||||
|  |        "link": false, | ||||||
|  |        "type": "text" | ||||||
|  |     }, | ||||||
|     {  "id": "issue6962", |     {  "id": "issue6962", | ||||||
|        "file": "pdfs/issue6962.pdf", |        "file": "pdfs/issue6962.pdf", | ||||||
|        "md5": "d40e871ecca68baf93114bd28c782148", |        "md5": "d40e871ecca68baf93114bd28c782148", | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user