diff --git a/src/core/fonts.js b/src/core/fonts.js index 78ff4b16b..594432505 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack = coreStandardFonts.getSupplementalGlyphMapForArialBlack; var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor; var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues; +var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph; // Unicode Private Use Area var PRIVATE_USE_OFFSET_START = 0xE000; @@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([ */ var Font = (function FontClosure() { function Font(name, file, properties) { - var charCode, glyphName, fontChar; + var charCode, glyphName, unicode, fontChar; this.name = name; this.loadedName = properties.loadedName; @@ -609,21 +610,25 @@ var Font = (function FontClosure() { this.toFontChar[charCode] = fontChar; } } else if (isStandardFont) { - this.toFontChar = []; glyphsUnicodeMap = getGlyphsUnicode(); for (charCode in properties.defaultEncoding) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - this.toFontChar[charCode] = glyphsUnicodeMap[glyphName]; + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + this.toFontChar[charCode] = unicode; + } } } else { - var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1); glyphsUnicodeMap = getGlyphsUnicode(); this.toUnicode.forEach(function(charCode, unicodeCharCode) { - if (notCidFont) { + if (!this.composite) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode); + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + unicodeCharCode = unicode; + } } this.toFontChar[charCode] = unicodeCharCode; }.bind(this)); @@ -722,7 +727,7 @@ var Font = (function FontClosure() { function int16(b0, b1) { return (b0 << 8) + b1; } - + function signedInt16(b0, b1) { var value = (b0 << 8) + b1; return value & (1 << 15) ? value - 0x10000 : value; @@ -2283,6 +2288,26 @@ var Font = (function FontClosure() { return false; } + // Some bad PDF generators, e.g. Scribus PDF, include glyph names + // in a 'uniXXXX' format -- attempting to recover proper ones. + function recoverGlyphName(name, glyphsUnicodeMap) { + if (glyphsUnicodeMap[name] !== undefined) { + return name; + } + // The glyph name is non-standard, trying to recover. + var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap); + if (unicode !== -1) { + for (var key in glyphsUnicodeMap) { + if (glyphsUnicodeMap[key] === unicode) { + return key; + } + } + } + warn('Unable to recover a standard glyph name for: ' + name); + return name; + } + + if (properties.type === 'CIDFontType2') { var cidToGidMap = properties.cidToGidMap || []; var isCidToGidMapEmpty = cidToGidMap.length === 0; @@ -2337,7 +2362,7 @@ var Font = (function FontClosure() { } var glyphsUnicodeMap = getGlyphsUnicode(); for (charCode = 0; charCode < 256; charCode++) { - var glyphName; + var glyphName, standardGlyphName; if (this.differences && charCode in this.differences) { glyphName = this.differences[charCode]; } else if (charCode in baseEncoding && @@ -2349,13 +2374,16 @@ var Font = (function FontClosure() { if (!glyphName) { continue; } + // Ensure that non-standard glyph names are resolved to valid ones. + standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap); + var unicodeOrCharCode, isUnicode = false; if (cmapPlatformId === 3 && cmapEncodingId === 1) { - unicodeOrCharCode = glyphsUnicodeMap[glyphName]; + unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName]; isUnicode = true; } else if (cmapPlatformId === 1 && cmapEncodingId === 0) { // TODO: the encoding needs to be updated with mac os table. - unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName); + unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName); } var found = false; @@ -2373,6 +2401,11 @@ var Font = (function FontClosure() { if (!found && properties.glyphNames) { // Try to map using the post table. var glyphId = properties.glyphNames.indexOf(glyphName); + // The post table ought to use the same kind of glyph names as the + // `differences` array, but check the standard ones as a fallback. + if (glyphId === -1 && standardGlyphName !== glyphName) { + glyphId = properties.glyphNames.indexOf(standardGlyphName); + } if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) { charCodeToGlyphId[charCode] = glyphId; found = true; @@ -2686,6 +2719,12 @@ var Font = (function FontClosure() { code = +glyphName.substr(1); } break; + default: + // 'uniXXXX'/'uXXXX{XX}' glyphs + var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + code = unicode; + } } if (code) { // If |baseEncodingName| is one the predefined encodings, diff --git a/src/core/unicode.js b/src/core/unicode.js index dca93ba40..dcfe92517 100644 --- a/src/core/unicode.js +++ b/src/core/unicode.js @@ -65,6 +65,36 @@ return code; } + function getUnicodeForGlyph(name, glyphsUnicodeMap) { + var unicode = glyphsUnicodeMap[name]; + if (unicode !== undefined) { + return unicode; + } + if (!name) { + return -1; + } + // Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs. + if (name[0] === 'u') { + var nameLen = name.length, hexStr; + + if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX' + hexStr = name.substr(3); + } else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}' + hexStr = name.substr(1); + } else { + return -1; + } + // Check for upper-case hexadecimal characters, to avoid false positives. + if (hexStr === hexStr.toUpperCase()) { + unicode = parseInt(hexStr, 16); + if (unicode >= 0) { + return unicode; + } + } + } + return -1; + } + var UnicodeRanges = [ { 'begin': 0x0000, 'end': 0x007F }, // Basic Latin { 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement @@ -1612,4 +1642,5 @@ exports.reverseIfRtl = reverseIfRtl; exports.getUnicodeRangeFor = getUnicodeRangeFor; exports.getNormalizedUnicodes = getNormalizedUnicodes; + exports.getUnicodeForGlyph = getUnicodeForGlyph; })); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 5fad85a37..8806c0339 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -32,6 +32,8 @@ !bug1200096.pdf !issue5564_reduced.pdf !canvas.pdf +!bug1132849.pdf +!issue6894.pdf !issue5804.pdf !ShowText-ShadingPattern.pdf !complex_ttf_font.pdf diff --git a/test/pdfs/bug1132849.pdf b/test/pdfs/bug1132849.pdf new file mode 100644 index 000000000..1754b5026 Binary files /dev/null and b/test/pdfs/bug1132849.pdf differ diff --git a/test/pdfs/issue6894.pdf b/test/pdfs/issue6894.pdf new file mode 100644 index 000000000..7220f7c0e Binary files /dev/null and b/test/pdfs/issue6894.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 51f03f358..ae7532c67 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -728,6 +728,20 @@ "rounds": 1, "type": "eq" }, + { "id": "bug1132849", + "file": "pdfs/bug1132849.pdf", + "md5": "aedfbead1f8feb35cf2e38b279133b47", + "rounds": 1, + "link": false, + "type": "eq" + }, + { "id": "issue6894", + "file": "pdfs/issue6894.pdf", + "md5": "bb84f2025c11f23cf436170049f81215", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "personwithdog", "file": "pdfs/personwithdog.pdf", "md5": "cd68fb2ce00dab97801b3e51495b99e3",