Merge pull request #7550 from Snuffleupagus/Type1-toUnicode-builtInEncoding-fallback
For embedded Type1 fonts without included `ToUnicode`/`Encoding` data, attempt to improve text selection by using the `builtInEncoding` to amend the `toUnicode` map (issue 6901, issue 7182, issue 7217, bug 917796, bug 1242142)
This commit is contained in:
commit
4acd31f51e
@ -1757,6 +1757,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
|
||||
properties.differences = differences;
|
||||
properties.baseEncodingName = baseEncodingName;
|
||||
properties.hasEncoding = !!baseEncodingName || differences.length > 0;
|
||||
properties.dict = dict;
|
||||
return toUnicodePromise.then(function(toUnicode) {
|
||||
properties.toUnicode = toUnicode;
|
||||
@ -1774,8 +1775,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
||||
*/
|
||||
buildToUnicode: function PartialEvaluator_buildToUnicode(properties) {
|
||||
properties.hasIncludedToUnicodeMap =
|
||||
!!properties.toUnicode && properties.toUnicode.length > 0;
|
||||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||||
if (properties.toUnicode && properties.toUnicode.length !== 0) {
|
||||
if (properties.hasIncludedToUnicodeMap) {
|
||||
return Promise.resolve(properties.toUnicode);
|
||||
}
|
||||
// According to the spec if the font is a simple font we should only map
|
||||
|
@ -163,6 +163,30 @@ function adjustWidths(properties) {
|
||||
properties.defaultWidth *= scale;
|
||||
}
|
||||
|
||||
function adjustToUnicode(properties, builtInEncoding) {
|
||||
if (properties.hasIncludedToUnicodeMap) {
|
||||
return; // The font dictionary has a `ToUnicode` entry.
|
||||
}
|
||||
if (properties.hasEncoding) {
|
||||
return; // The font dictionary has an `Encoding` entry.
|
||||
}
|
||||
if (builtInEncoding === properties.defaultEncoding) {
|
||||
return; // No point in trying to adjust `toUnicode` if the encodings match.
|
||||
}
|
||||
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
|
||||
return;
|
||||
}
|
||||
var toUnicode = [], glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (var charCode in builtInEncoding) {
|
||||
var glyphName = builtInEncoding[charCode];
|
||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
toUnicode[charCode] = String.fromCharCode(unicode);
|
||||
}
|
||||
}
|
||||
properties.toUnicode.amend(toUnicode);
|
||||
}
|
||||
|
||||
function getFontType(type, subtype) {
|
||||
switch (type) {
|
||||
case 'Type1':
|
||||
@ -261,7 +285,13 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() {
|
||||
|
||||
charCodeOf: function(v) {
|
||||
return this._map.indexOf(v);
|
||||
}
|
||||
},
|
||||
|
||||
amend: function (map) {
|
||||
for (var charCode in map) {
|
||||
this._map[charCode] = map[charCode];
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
return ToUnicodeMap;
|
||||
@ -297,7 +327,11 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {
|
||||
|
||||
charCodeOf: function (v) {
|
||||
return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
|
||||
}
|
||||
},
|
||||
|
||||
amend: function (map) {
|
||||
error('Should not call amend()');
|
||||
},
|
||||
};
|
||||
|
||||
return IdentityToUnicodeMap;
|
||||
@ -765,6 +799,7 @@ var Font = (function FontClosure() {
|
||||
this.fontMatrix = properties.fontMatrix;
|
||||
this.widths = properties.widths;
|
||||
this.defaultWidth = properties.defaultWidth;
|
||||
this.toUnicode = properties.toUnicode;
|
||||
this.encoding = properties.baseEncoding;
|
||||
this.seacMap = properties.seacMap;
|
||||
|
||||
@ -2386,10 +2421,8 @@ var Font = (function FontClosure() {
|
||||
} else {
|
||||
// Most of the following logic in this code branch is based on the
|
||||
// 9.6.6.4 of the PDF spec.
|
||||
var hasEncoding =
|
||||
properties.differences.length > 0 || !!properties.baseEncodingName;
|
||||
var cmapTable =
|
||||
readCmapTable(tables['cmap'], font, this.isSymbolicFont, hasEncoding);
|
||||
var cmapTable = readCmapTable(tables['cmap'], font, this.isSymbolicFont,
|
||||
properties.hasEncoding);
|
||||
var cmapPlatformId = cmapTable.platformId;
|
||||
var cmapEncodingId = cmapTable.encodingId;
|
||||
var cmapMappings = cmapTable.mappings;
|
||||
@ -2398,7 +2431,7 @@ var Font = (function FontClosure() {
|
||||
// The spec seems to imply that if the font is symbolic the encoding
|
||||
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
|
||||
// where the the font is symbolic and it has an encoding.
|
||||
if (hasEncoding &&
|
||||
if (properties.hasEncoding &&
|
||||
(cmapPlatformId === 3 && cmapEncodingId === 1 ||
|
||||
cmapPlatformId === 1 && cmapEncodingId === 0) ||
|
||||
(cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
|
||||
@ -2562,6 +2595,12 @@ var Font = (function FontClosure() {
|
||||
// TODO: Check the charstring widths to determine this.
|
||||
properties.fixedPitch = false;
|
||||
|
||||
if (properties.builtInEncoding) {
|
||||
// For Type1 fonts that do not include either `ToUnicode` or `Encoding`
|
||||
// data, attempt to use the `builtInEncoding` to improve text selection.
|
||||
adjustToUnicode(properties, properties.builtInEncoding);
|
||||
}
|
||||
|
||||
var mapping = font.getGlyphMapping(properties);
|
||||
var newMapping = adjustMapping(mapping, properties);
|
||||
this.toFontChar = newMapping.toFontChar;
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -22,6 +22,7 @@
|
||||
!issue5808.pdf
|
||||
!issue6204.pdf
|
||||
!issue6782.pdf
|
||||
!issue6901.pdf
|
||||
!issue6961.pdf
|
||||
!issue6962.pdf
|
||||
!issue7020.pdf
|
||||
|
BIN
test/pdfs/issue6901.pdf
Normal file
BIN
test/pdfs/issue6901.pdf
Normal file
Binary file not shown.
@ -1220,6 +1220,20 @@
|
||||
"link": false,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "issue6901-eq",
|
||||
"file": "pdfs/issue6901.pdf",
|
||||
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue6901-text",
|
||||
"file": "pdfs/issue6901.pdf",
|
||||
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "issue6962",
|
||||
"file": "pdfs/issue6962.pdf",
|
||||
"md5": "d40e871ecca68baf93114bd28c782148",
|
||||
|
Loading…
x
Reference in New Issue
Block a user