Merge pull request #7069 from Snuffleupagus/TrueType-recover-glyphNames
Convert `uniXXXX` glyph names to proper ones when building the `charCodeToGlyphId` map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This commit is contained in:
commit
4784863ef7
@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
|
|||||||
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
|
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
|
||||||
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
|
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
|
||||||
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
|
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
|
||||||
|
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
|
||||||
|
|
||||||
// Unicode Private Use Area
|
// Unicode Private Use Area
|
||||||
var PRIVATE_USE_OFFSET_START = 0xE000;
|
var PRIVATE_USE_OFFSET_START = 0xE000;
|
||||||
@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
|
|||||||
*/
|
*/
|
||||||
var Font = (function FontClosure() {
|
var Font = (function FontClosure() {
|
||||||
function Font(name, file, properties) {
|
function Font(name, file, properties) {
|
||||||
var charCode, glyphName, fontChar;
|
var charCode, glyphName, unicode, fontChar;
|
||||||
|
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.loadedName = properties.loadedName;
|
this.loadedName = properties.loadedName;
|
||||||
@ -609,21 +610,25 @@ var Font = (function FontClosure() {
|
|||||||
this.toFontChar[charCode] = fontChar;
|
this.toFontChar[charCode] = fontChar;
|
||||||
}
|
}
|
||||||
} else if (isStandardFont) {
|
} else if (isStandardFont) {
|
||||||
this.toFontChar = [];
|
|
||||||
glyphsUnicodeMap = getGlyphsUnicode();
|
glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
for (charCode in properties.defaultEncoding) {
|
for (charCode in properties.defaultEncoding) {
|
||||||
glyphName = (properties.differences[charCode] ||
|
glyphName = (properties.differences[charCode] ||
|
||||||
properties.defaultEncoding[charCode]);
|
properties.defaultEncoding[charCode]);
|
||||||
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
|
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
this.toFontChar[charCode] = unicode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
|
|
||||||
glyphsUnicodeMap = getGlyphsUnicode();
|
glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
|
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
|
||||||
if (notCidFont) {
|
if (!this.composite) {
|
||||||
glyphName = (properties.differences[charCode] ||
|
glyphName = (properties.differences[charCode] ||
|
||||||
properties.defaultEncoding[charCode]);
|
properties.defaultEncoding[charCode]);
|
||||||
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
|
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
unicodeCharCode = unicode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.toFontChar[charCode] = unicodeCharCode;
|
this.toFontChar[charCode] = unicodeCharCode;
|
||||||
}.bind(this));
|
}.bind(this));
|
||||||
@ -722,7 +727,7 @@ var Font = (function FontClosure() {
|
|||||||
function int16(b0, b1) {
|
function int16(b0, b1) {
|
||||||
return (b0 << 8) + b1;
|
return (b0 << 8) + b1;
|
||||||
}
|
}
|
||||||
|
|
||||||
function signedInt16(b0, b1) {
|
function signedInt16(b0, b1) {
|
||||||
var value = (b0 << 8) + b1;
|
var value = (b0 << 8) + b1;
|
||||||
return value & (1 << 15) ? value - 0x10000 : value;
|
return value & (1 << 15) ? value - 0x10000 : value;
|
||||||
@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
|
||||||
|
// in a 'uniXXXX' format -- attempting to recover proper ones.
|
||||||
|
function recoverGlyphName(name, glyphsUnicodeMap) {
|
||||||
|
if (glyphsUnicodeMap[name] !== undefined) {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
// The glyph name is non-standard, trying to recover.
|
||||||
|
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
for (var key in glyphsUnicodeMap) {
|
||||||
|
if (glyphsUnicodeMap[key] === unicode) {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
warn('Unable to recover a standard glyph name for: ' + name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (properties.type === 'CIDFontType2') {
|
if (properties.type === 'CIDFontType2') {
|
||||||
var cidToGidMap = properties.cidToGidMap || [];
|
var cidToGidMap = properties.cidToGidMap || [];
|
||||||
var isCidToGidMapEmpty = cidToGidMap.length === 0;
|
var isCidToGidMapEmpty = cidToGidMap.length === 0;
|
||||||
@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
|
|||||||
}
|
}
|
||||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
var glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
for (charCode = 0; charCode < 256; charCode++) {
|
for (charCode = 0; charCode < 256; charCode++) {
|
||||||
var glyphName;
|
var glyphName, standardGlyphName;
|
||||||
if (this.differences && charCode in this.differences) {
|
if (this.differences && charCode in this.differences) {
|
||||||
glyphName = this.differences[charCode];
|
glyphName = this.differences[charCode];
|
||||||
} else if (charCode in baseEncoding &&
|
} else if (charCode in baseEncoding &&
|
||||||
@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
|
|||||||
if (!glyphName) {
|
if (!glyphName) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Ensure that non-standard glyph names are resolved to valid ones.
|
||||||
|
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
|
||||||
|
|
||||||
var unicodeOrCharCode, isUnicode = false;
|
var unicodeOrCharCode, isUnicode = false;
|
||||||
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
|
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
|
||||||
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
|
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
|
||||||
isUnicode = true;
|
isUnicode = true;
|
||||||
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
|
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
|
||||||
// TODO: the encoding needs to be updated with mac os table.
|
// TODO: the encoding needs to be updated with mac os table.
|
||||||
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
|
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
|
||||||
}
|
}
|
||||||
|
|
||||||
var found = false;
|
var found = false;
|
||||||
@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
|
|||||||
if (!found && properties.glyphNames) {
|
if (!found && properties.glyphNames) {
|
||||||
// Try to map using the post table.
|
// Try to map using the post table.
|
||||||
var glyphId = properties.glyphNames.indexOf(glyphName);
|
var glyphId = properties.glyphNames.indexOf(glyphName);
|
||||||
|
// The post table ought to use the same kind of glyph names as the
|
||||||
|
// `differences` array, but check the standard ones as a fallback.
|
||||||
|
if (glyphId === -1 && standardGlyphName !== glyphName) {
|
||||||
|
glyphId = properties.glyphNames.indexOf(standardGlyphName);
|
||||||
|
}
|
||||||
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
|
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
|
||||||
charCodeToGlyphId[charCode] = glyphId;
|
charCodeToGlyphId[charCode] = glyphId;
|
||||||
found = true;
|
found = true;
|
||||||
@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
|
|||||||
code = +glyphName.substr(1);
|
code = +glyphName.substr(1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||||
|
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
code = unicode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (code) {
|
if (code) {
|
||||||
// If |baseEncodingName| is one the predefined encodings,
|
// If |baseEncodingName| is one the predefined encodings,
|
||||||
|
@ -65,6 +65,36 @@
|
|||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getUnicodeForGlyph(name, glyphsUnicodeMap) {
|
||||||
|
var unicode = glyphsUnicodeMap[name];
|
||||||
|
if (unicode !== undefined) {
|
||||||
|
return unicode;
|
||||||
|
}
|
||||||
|
if (!name) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
|
||||||
|
if (name[0] === 'u') {
|
||||||
|
var nameLen = name.length, hexStr;
|
||||||
|
|
||||||
|
if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
|
||||||
|
hexStr = name.substr(3);
|
||||||
|
} else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
|
||||||
|
hexStr = name.substr(1);
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Check for upper-case hexadecimal characters, to avoid false positives.
|
||||||
|
if (hexStr === hexStr.toUpperCase()) {
|
||||||
|
unicode = parseInt(hexStr, 16);
|
||||||
|
if (unicode >= 0) {
|
||||||
|
return unicode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
var UnicodeRanges = [
|
var UnicodeRanges = [
|
||||||
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
|
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
|
||||||
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
|
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
|
||||||
@ -1612,4 +1642,5 @@
|
|||||||
exports.reverseIfRtl = reverseIfRtl;
|
exports.reverseIfRtl = reverseIfRtl;
|
||||||
exports.getUnicodeRangeFor = getUnicodeRangeFor;
|
exports.getUnicodeRangeFor = getUnicodeRangeFor;
|
||||||
exports.getNormalizedUnicodes = getNormalizedUnicodes;
|
exports.getNormalizedUnicodes = getNormalizedUnicodes;
|
||||||
|
exports.getUnicodeForGlyph = getUnicodeForGlyph;
|
||||||
}));
|
}));
|
||||||
|
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -32,6 +32,8 @@
|
|||||||
!bug1200096.pdf
|
!bug1200096.pdf
|
||||||
!issue5564_reduced.pdf
|
!issue5564_reduced.pdf
|
||||||
!canvas.pdf
|
!canvas.pdf
|
||||||
|
!bug1132849.pdf
|
||||||
|
!issue6894.pdf
|
||||||
!issue5804.pdf
|
!issue5804.pdf
|
||||||
!ShowText-ShadingPattern.pdf
|
!ShowText-ShadingPattern.pdf
|
||||||
!complex_ttf_font.pdf
|
!complex_ttf_font.pdf
|
||||||
|
BIN
test/pdfs/bug1132849.pdf
Normal file
BIN
test/pdfs/bug1132849.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue6894.pdf
Normal file
BIN
test/pdfs/issue6894.pdf
Normal file
Binary file not shown.
@ -728,6 +728,20 @@
|
|||||||
"rounds": 1,
|
"rounds": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "bug1132849",
|
||||||
|
"file": "pdfs/bug1132849.pdf",
|
||||||
|
"md5": "aedfbead1f8feb35cf2e38b279133b47",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{ "id": "issue6894",
|
||||||
|
"file": "pdfs/issue6894.pdf",
|
||||||
|
"md5": "bb84f2025c11f23cf436170049f81215",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "personwithdog",
|
{ "id": "personwithdog",
|
||||||
"file": "pdfs/personwithdog.pdf",
|
"file": "pdfs/personwithdog.pdf",
|
||||||
"md5": "cd68fb2ce00dab97801b3e51495b99e3",
|
"md5": "cd68fb2ce00dab97801b3e51495b99e3",
|
||||||
|
Loading…
Reference in New Issue
Block a user