Convert uniXXXX
glyph names to proper ones when building the charCodeToGlyphId
map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names. Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead. Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849. Fixes 6893. Fixes 6894.
This commit is contained in:
parent
147598417c
commit
dfe9015a43
@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
|
||||
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
|
||||
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
|
||||
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
|
||||
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
|
||||
|
||||
// Unicode Private Use Area
|
||||
var PRIVATE_USE_OFFSET_START = 0xE000;
|
||||
@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
|
||||
*/
|
||||
var Font = (function FontClosure() {
|
||||
function Font(name, file, properties) {
|
||||
var charCode, glyphName, fontChar;
|
||||
var charCode, glyphName, unicode, fontChar;
|
||||
|
||||
this.name = name;
|
||||
this.loadedName = properties.loadedName;
|
||||
@ -609,21 +610,25 @@ var Font = (function FontClosure() {
|
||||
this.toFontChar[charCode] = fontChar;
|
||||
}
|
||||
} else if (isStandardFont) {
|
||||
this.toFontChar = [];
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode in properties.defaultEncoding) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
this.toFontChar[charCode] = unicode;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
|
||||
glyphsUnicodeMap = getGlyphsUnicode();
|
||||
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
|
||||
if (notCidFont) {
|
||||
if (!this.composite) {
|
||||
glyphName = (properties.differences[charCode] ||
|
||||
properties.defaultEncoding[charCode]);
|
||||
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
unicodeCharCode = unicode;
|
||||
}
|
||||
}
|
||||
this.toFontChar[charCode] = unicodeCharCode;
|
||||
}.bind(this));
|
||||
@ -722,7 +727,7 @@ var Font = (function FontClosure() {
|
||||
function int16(b0, b1) {
|
||||
return (b0 << 8) + b1;
|
||||
}
|
||||
|
||||
|
||||
function signedInt16(b0, b1) {
|
||||
var value = (b0 << 8) + b1;
|
||||
return value & (1 << 15) ? value - 0x10000 : value;
|
||||
@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
|
||||
// in a 'uniXXXX' format -- attempting to recover proper ones.
|
||||
function recoverGlyphName(name, glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[name] !== undefined) {
|
||||
return name;
|
||||
}
|
||||
// The glyph name is non-standard, trying to recover.
|
||||
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
for (var key in glyphsUnicodeMap) {
|
||||
if (glyphsUnicodeMap[key] === unicode) {
|
||||
return key;
|
||||
}
|
||||
}
|
||||
}
|
||||
warn('Unable to recover a standard glyph name for: ' + name);
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
if (properties.type === 'CIDFontType2') {
|
||||
var cidToGidMap = properties.cidToGidMap || [];
|
||||
var isCidToGidMapEmpty = cidToGidMap.length === 0;
|
||||
@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
|
||||
}
|
||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
||||
for (charCode = 0; charCode < 256; charCode++) {
|
||||
var glyphName;
|
||||
var glyphName, standardGlyphName;
|
||||
if (this.differences && charCode in this.differences) {
|
||||
glyphName = this.differences[charCode];
|
||||
} else if (charCode in baseEncoding &&
|
||||
@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
|
||||
if (!glyphName) {
|
||||
continue;
|
||||
}
|
||||
// Ensure that non-standard glyph names are resolved to valid ones.
|
||||
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
|
||||
|
||||
var unicodeOrCharCode, isUnicode = false;
|
||||
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
|
||||
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
|
||||
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
|
||||
isUnicode = true;
|
||||
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
|
||||
// TODO: the encoding needs to be updated with mac os table.
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
|
||||
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
|
||||
}
|
||||
|
||||
var found = false;
|
||||
@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
|
||||
if (!found && properties.glyphNames) {
|
||||
// Try to map using the post table.
|
||||
var glyphId = properties.glyphNames.indexOf(glyphName);
|
||||
// The post table ought to use the same kind of glyph names as the
|
||||
// `differences` array, but check the standard ones as a fallback.
|
||||
if (glyphId === -1 && standardGlyphName !== glyphName) {
|
||||
glyphId = properties.glyphNames.indexOf(standardGlyphName);
|
||||
}
|
||||
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
|
||||
charCodeToGlyphId[charCode] = glyphId;
|
||||
found = true;
|
||||
@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
|
||||
code = +glyphName.substr(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
}
|
||||
if (code) {
|
||||
// If |baseEncodingName| is one the predefined encodings,
|
||||
|
@ -65,6 +65,36 @@
|
||||
return code;
|
||||
}
|
||||
|
||||
function getUnicodeForGlyph(name, glyphsUnicodeMap) {
|
||||
var unicode = glyphsUnicodeMap[name];
|
||||
if (unicode !== undefined) {
|
||||
return unicode;
|
||||
}
|
||||
if (!name) {
|
||||
return -1;
|
||||
}
|
||||
// Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
|
||||
if (name[0] === 'u') {
|
||||
var nameLen = name.length, hexStr;
|
||||
|
||||
if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
|
||||
hexStr = name.substr(3);
|
||||
} else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
|
||||
hexStr = name.substr(1);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
// Check for upper-case hexadecimal characters, to avoid false positives.
|
||||
if (hexStr === hexStr.toUpperCase()) {
|
||||
unicode = parseInt(hexStr, 16);
|
||||
if (unicode >= 0) {
|
||||
return unicode;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
var UnicodeRanges = [
|
||||
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
|
||||
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
|
||||
@ -1612,4 +1642,5 @@
|
||||
exports.reverseIfRtl = reverseIfRtl;
|
||||
exports.getUnicodeRangeFor = getUnicodeRangeFor;
|
||||
exports.getNormalizedUnicodes = getNormalizedUnicodes;
|
||||
exports.getUnicodeForGlyph = getUnicodeForGlyph;
|
||||
}));
|
||||
|
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -32,6 +32,8 @@
|
||||
!bug1200096.pdf
|
||||
!issue5564_reduced.pdf
|
||||
!canvas.pdf
|
||||
!bug1132849.pdf
|
||||
!issue6894.pdf
|
||||
!issue5804.pdf
|
||||
!ShowText-ShadingPattern.pdf
|
||||
!complex_ttf_font.pdf
|
||||
|
BIN
test/pdfs/bug1132849.pdf
Normal file
BIN
test/pdfs/bug1132849.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue6894.pdf
Normal file
BIN
test/pdfs/issue6894.pdf
Normal file
Binary file not shown.
@ -728,6 +728,20 @@
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "bug1132849",
|
||||
"file": "pdfs/bug1132849.pdf",
|
||||
"md5": "aedfbead1f8feb35cf2e38b279133b47",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue6894",
|
||||
"file": "pdfs/issue6894.pdf",
|
||||
"md5": "bb84f2025c11f23cf436170049f81215",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "personwithdog",
|
||||
"file": "pdfs/personwithdog.pdf",
|
||||
"md5": "cd68fb2ce00dab97801b3e51495b99e3",
|
||||
|
Loading…
Reference in New Issue
Block a user