Convert uniXXXX glyph names to proper ones when building the charCodeToGlyphId map for TrueType fonts (bug 1132849, issue 6893, issue 6894)

This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names.

Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead.

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849.
Fixes 6893.
Fixes 6894.
This commit is contained in:
Jonas Jenwald 2016-03-07 20:56:15 +01:00
parent 147598417c
commit dfe9015a43
6 changed files with 96 additions and 10 deletions

View File

@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
// Unicode Private Use Area
var PRIVATE_USE_OFFSET_START = 0xE000;
@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
*/
var Font = (function FontClosure() {
function Font(name, file, properties) {
var charCode, glyphName, fontChar;
var charCode, glyphName, unicode, fontChar;
this.name = name;
this.loadedName = properties.loadedName;
@ -609,21 +610,25 @@ var Font = (function FontClosure() {
this.toFontChar[charCode] = fontChar;
}
} else if (isStandardFont) {
this.toFontChar = [];
glyphsUnicodeMap = getGlyphsUnicode();
for (charCode in properties.defaultEncoding) {
glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]);
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
this.toFontChar[charCode] = unicode;
}
}
} else {
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
glyphsUnicodeMap = getGlyphsUnicode();
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
if (notCidFont) {
if (!this.composite) {
glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]);
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
unicodeCharCode = unicode;
}
}
this.toFontChar[charCode] = unicodeCharCode;
}.bind(this));
@ -722,7 +727,7 @@ var Font = (function FontClosure() {
function int16(b0, b1) {
return (b0 << 8) + b1;
}
function signedInt16(b0, b1) {
var value = (b0 << 8) + b1;
return value & (1 << 15) ? value - 0x10000 : value;
@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
return false;
}
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
// in a 'uniXXXX' format -- attempting to recover proper ones.
function recoverGlyphName(name, glyphsUnicodeMap) {
if (glyphsUnicodeMap[name] !== undefined) {
return name;
}
// The glyph name is non-standard, trying to recover.
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
if (unicode !== -1) {
for (var key in glyphsUnicodeMap) {
if (glyphsUnicodeMap[key] === unicode) {
return key;
}
}
}
warn('Unable to recover a standard glyph name for: ' + name);
return name;
}
if (properties.type === 'CIDFontType2') {
var cidToGidMap = properties.cidToGidMap || [];
var isCidToGidMapEmpty = cidToGidMap.length === 0;
@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
}
var glyphsUnicodeMap = getGlyphsUnicode();
for (charCode = 0; charCode < 256; charCode++) {
var glyphName;
var glyphName, standardGlyphName;
if (this.differences && charCode in this.differences) {
glyphName = this.differences[charCode];
} else if (charCode in baseEncoding &&
@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
if (!glyphName) {
continue;
}
// Ensure that non-standard glyph names are resolved to valid ones.
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
var unicodeOrCharCode, isUnicode = false;
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
isUnicode = true;
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
// TODO: the encoding needs to be updated with mac os table.
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
}
var found = false;
@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
if (!found && properties.glyphNames) {
// Try to map using the post table.
var glyphId = properties.glyphNames.indexOf(glyphName);
// The post table ought to use the same kind of glyph names as the
// `differences` array, but check the standard ones as a fallback.
if (glyphId === -1 && standardGlyphName !== glyphName) {
glyphId = properties.glyphNames.indexOf(standardGlyphName);
}
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
charCodeToGlyphId[charCode] = glyphId;
found = true;
@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
code = +glyphName.substr(1);
}
break;
default:
// 'uniXXXX'/'uXXXX{XX}' glyphs
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
code = unicode;
}
}
if (code) {
// If |baseEncodingName| is one the predefined encodings,

View File

@ -65,6 +65,36 @@
return code;
}
function getUnicodeForGlyph(name, glyphsUnicodeMap) {
var unicode = glyphsUnicodeMap[name];
if (unicode !== undefined) {
return unicode;
}
if (!name) {
return -1;
}
// Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
if (name[0] === 'u') {
var nameLen = name.length, hexStr;
if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
hexStr = name.substr(3);
} else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
hexStr = name.substr(1);
} else {
return -1;
}
// Check for upper-case hexadecimal characters, to avoid false positives.
if (hexStr === hexStr.toUpperCase()) {
unicode = parseInt(hexStr, 16);
if (unicode >= 0) {
return unicode;
}
}
}
return -1;
}
var UnicodeRanges = [
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
@ -1612,4 +1642,5 @@
exports.reverseIfRtl = reverseIfRtl;
exports.getUnicodeRangeFor = getUnicodeRangeFor;
exports.getNormalizedUnicodes = getNormalizedUnicodes;
exports.getUnicodeForGlyph = getUnicodeForGlyph;
}));

View File

@ -32,6 +32,8 @@
!bug1200096.pdf
!issue5564_reduced.pdf
!canvas.pdf
!bug1132849.pdf
!issue6894.pdf
!issue5804.pdf
!ShowText-ShadingPattern.pdf
!complex_ttf_font.pdf

BIN
test/pdfs/bug1132849.pdf Normal file

Binary file not shown.

BIN
test/pdfs/issue6894.pdf Normal file

Binary file not shown.

View File

@ -728,6 +728,20 @@
"rounds": 1,
"type": "eq"
},
{ "id": "bug1132849",
"file": "pdfs/bug1132849.pdf",
"md5": "aedfbead1f8feb35cf2e38b279133b47",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue6894",
"file": "pdfs/issue6894.pdf",
"md5": "bb84f2025c11f23cf436170049f81215",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "personwithdog",
"file": "pdfs/personwithdog.pdf",
"md5": "cd68fb2ce00dab97801b3e51495b99e3",