For embedded Type1 fonts without included ToUnicode
/Encoding
data, attempt to improve text selection by using the builtInEncoding
to amend the toUnicode
map (issue 6901, issue 7182, issue 7217, bug 917796, bug 1242142)
Note that in order to prevent any possible issues, this patch does *not* try to amend the `toUnicode` data for Type1 fonts that contain either `ToUnicode` or `Encoding` entries in the font dictionary. Fixes, or at least improves, issues/bugs such as e.g. 6658, 6901, 7182, 7217, bug 917796, bug 1242142.
This commit is contained in:
parent
bf6f5d1cc9
commit
325f7afcca
@ -1757,6 +1757,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
|
|
||||||
properties.differences = differences;
|
properties.differences = differences;
|
||||||
properties.baseEncodingName = baseEncodingName;
|
properties.baseEncodingName = baseEncodingName;
|
||||||
|
properties.hasEncoding = !!baseEncodingName || differences.length > 0;
|
||||||
properties.dict = dict;
|
properties.dict = dict;
|
||||||
return toUnicodePromise.then(function(toUnicode) {
|
return toUnicodePromise.then(function(toUnicode) {
|
||||||
properties.toUnicode = toUnicode;
|
properties.toUnicode = toUnicode;
|
||||||
@ -1774,8 +1775,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
||||||
*/
|
*/
|
||||||
buildToUnicode: function PartialEvaluator_buildToUnicode(properties) {
|
buildToUnicode: function PartialEvaluator_buildToUnicode(properties) {
|
||||||
|
properties.hasIncludedToUnicodeMap =
|
||||||
|
!!properties.toUnicode && properties.toUnicode.length > 0;
|
||||||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||||||
if (properties.toUnicode && properties.toUnicode.length !== 0) {
|
if (properties.hasIncludedToUnicodeMap) {
|
||||||
return Promise.resolve(properties.toUnicode);
|
return Promise.resolve(properties.toUnicode);
|
||||||
}
|
}
|
||||||
// According to the spec if the font is a simple font we should only map
|
// According to the spec if the font is a simple font we should only map
|
||||||
|
@ -163,6 +163,30 @@ function adjustWidths(properties) {
|
|||||||
properties.defaultWidth *= scale;
|
properties.defaultWidth *= scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function adjustToUnicode(properties, builtInEncoding) {
|
||||||
|
if (properties.hasIncludedToUnicodeMap) {
|
||||||
|
return; // The font dictionary has a `ToUnicode` entry.
|
||||||
|
}
|
||||||
|
if (properties.hasEncoding) {
|
||||||
|
return; // The font dictionary has an `Encoding` entry.
|
||||||
|
}
|
||||||
|
if (builtInEncoding === properties.defaultEncoding) {
|
||||||
|
return; // No point in trying to adjust `toUnicode` if the encodings match.
|
||||||
|
}
|
||||||
|
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var toUnicode = [], glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
|
for (var charCode in builtInEncoding) {
|
||||||
|
var glyphName = builtInEncoding[charCode];
|
||||||
|
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
toUnicode[charCode] = String.fromCharCode(unicode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
properties.toUnicode.amend(toUnicode);
|
||||||
|
}
|
||||||
|
|
||||||
function getFontType(type, subtype) {
|
function getFontType(type, subtype) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case 'Type1':
|
case 'Type1':
|
||||||
@ -261,7 +285,13 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() {
|
|||||||
|
|
||||||
charCodeOf: function(v) {
|
charCodeOf: function(v) {
|
||||||
return this._map.indexOf(v);
|
return this._map.indexOf(v);
|
||||||
}
|
},
|
||||||
|
|
||||||
|
amend: function (map) {
|
||||||
|
for (var charCode in map) {
|
||||||
|
this._map[charCode] = map[charCode];
|
||||||
|
}
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
return ToUnicodeMap;
|
return ToUnicodeMap;
|
||||||
@ -297,7 +327,11 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {
|
|||||||
|
|
||||||
charCodeOf: function (v) {
|
charCodeOf: function (v) {
|
||||||
return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
|
return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
|
||||||
}
|
},
|
||||||
|
|
||||||
|
amend: function (map) {
|
||||||
|
error('Should not call amend()');
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
return IdentityToUnicodeMap;
|
return IdentityToUnicodeMap;
|
||||||
@ -765,6 +799,7 @@ var Font = (function FontClosure() {
|
|||||||
this.fontMatrix = properties.fontMatrix;
|
this.fontMatrix = properties.fontMatrix;
|
||||||
this.widths = properties.widths;
|
this.widths = properties.widths;
|
||||||
this.defaultWidth = properties.defaultWidth;
|
this.defaultWidth = properties.defaultWidth;
|
||||||
|
this.toUnicode = properties.toUnicode;
|
||||||
this.encoding = properties.baseEncoding;
|
this.encoding = properties.baseEncoding;
|
||||||
this.seacMap = properties.seacMap;
|
this.seacMap = properties.seacMap;
|
||||||
|
|
||||||
@ -2386,10 +2421,8 @@ var Font = (function FontClosure() {
|
|||||||
} else {
|
} else {
|
||||||
// Most of the following logic in this code branch is based on the
|
// Most of the following logic in this code branch is based on the
|
||||||
// 9.6.6.4 of the PDF spec.
|
// 9.6.6.4 of the PDF spec.
|
||||||
var hasEncoding =
|
var cmapTable = readCmapTable(tables['cmap'], font, this.isSymbolicFont,
|
||||||
properties.differences.length > 0 || !!properties.baseEncodingName;
|
properties.hasEncoding);
|
||||||
var cmapTable =
|
|
||||||
readCmapTable(tables['cmap'], font, this.isSymbolicFont, hasEncoding);
|
|
||||||
var cmapPlatformId = cmapTable.platformId;
|
var cmapPlatformId = cmapTable.platformId;
|
||||||
var cmapEncodingId = cmapTable.encodingId;
|
var cmapEncodingId = cmapTable.encodingId;
|
||||||
var cmapMappings = cmapTable.mappings;
|
var cmapMappings = cmapTable.mappings;
|
||||||
@ -2398,7 +2431,7 @@ var Font = (function FontClosure() {
|
|||||||
// The spec seems to imply that if the font is symbolic the encoding
|
// The spec seems to imply that if the font is symbolic the encoding
|
||||||
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
|
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
|
||||||
// where the the font is symbolic and it has an encoding.
|
// where the the font is symbolic and it has an encoding.
|
||||||
if (hasEncoding &&
|
if (properties.hasEncoding &&
|
||||||
(cmapPlatformId === 3 && cmapEncodingId === 1 ||
|
(cmapPlatformId === 3 && cmapEncodingId === 1 ||
|
||||||
cmapPlatformId === 1 && cmapEncodingId === 0) ||
|
cmapPlatformId === 1 && cmapEncodingId === 0) ||
|
||||||
(cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
|
(cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
|
||||||
@ -2562,6 +2595,12 @@ var Font = (function FontClosure() {
|
|||||||
// TODO: Check the charstring widths to determine this.
|
// TODO: Check the charstring widths to determine this.
|
||||||
properties.fixedPitch = false;
|
properties.fixedPitch = false;
|
||||||
|
|
||||||
|
if (properties.builtInEncoding) {
|
||||||
|
// For Type1 fonts that do not include either `ToUnicode` or `Encoding`
|
||||||
|
// data, attempt to use the `builtInEncoding` to improve text selection.
|
||||||
|
adjustToUnicode(properties, properties.builtInEncoding);
|
||||||
|
}
|
||||||
|
|
||||||
var mapping = font.getGlyphMapping(properties);
|
var mapping = font.getGlyphMapping(properties);
|
||||||
var newMapping = adjustMapping(mapping, properties);
|
var newMapping = adjustMapping(mapping, properties);
|
||||||
this.toFontChar = newMapping.toFontChar;
|
this.toFontChar = newMapping.toFontChar;
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -22,6 +22,7 @@
|
|||||||
!issue5808.pdf
|
!issue5808.pdf
|
||||||
!issue6204.pdf
|
!issue6204.pdf
|
||||||
!issue6782.pdf
|
!issue6782.pdf
|
||||||
|
!issue6901.pdf
|
||||||
!issue6961.pdf
|
!issue6961.pdf
|
||||||
!issue6962.pdf
|
!issue6962.pdf
|
||||||
!issue7020.pdf
|
!issue7020.pdf
|
||||||
|
BIN
test/pdfs/issue6901.pdf
Normal file
BIN
test/pdfs/issue6901.pdf
Normal file
Binary file not shown.
@ -1220,6 +1220,20 @@
|
|||||||
"link": false,
|
"link": false,
|
||||||
"type": "text"
|
"type": "text"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue6901-eq",
|
||||||
|
"file": "pdfs/issue6901.pdf",
|
||||||
|
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{ "id": "issue6901-text",
|
||||||
|
"file": "pdfs/issue6901.pdf",
|
||||||
|
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
{ "id": "issue6962",
|
{ "id": "issue6962",
|
||||||
"file": "pdfs/issue6962.pdf",
|
"file": "pdfs/issue6962.pdf",
|
||||||
"md5": "d40e871ecca68baf93114bd28c782148",
|
"md5": "d40e871ecca68baf93114bd28c782148",
|
||||||
|
Loading…
Reference in New Issue
Block a user