Merge pull request #4550 from yurydelendik/macenc
Heuristics to recognize the unknown glyphs for toUnicode
This commit is contained in:
commit
938547276e
@ -4193,7 +4193,7 @@ var Font = (function FontClosure() {
|
|||||||
toUnicode: null
|
toUnicode: null
|
||||||
};
|
};
|
||||||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||||||
if (properties.toUnicode) {
|
if (properties.toUnicode && properties.toUnicode.length !== 0) {
|
||||||
map.toUnicode = properties.toUnicode;
|
map.toUnicode = properties.toUnicode;
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
@ -4217,6 +4217,21 @@ var Font = (function FontClosure() {
|
|||||||
// b) Look up the character name in the Adobe Glyph List (see the
|
// b) Look up the character name in the Adobe Glyph List (see the
|
||||||
// Bibliography) to obtain the corresponding Unicode value.
|
// Bibliography) to obtain the corresponding Unicode value.
|
||||||
if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
|
if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
|
||||||
|
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||||
|
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||||
|
var code;
|
||||||
|
// Gxx glyph
|
||||||
|
if (glyphName.length === 3 &&
|
||||||
|
glyphName[0] === 'G' &&
|
||||||
|
(code = parseInt(glyphName.substr(1), 16))) {
|
||||||
|
toUnicode[charcode] = String.fromCharCode(code);
|
||||||
|
}
|
||||||
|
// Cddd glyph
|
||||||
|
if (glyphName.length >= 3 &&
|
||||||
|
glyphName[0] === 'C' &&
|
||||||
|
(code = +glyphName.substr(1))) {
|
||||||
|
toUnicode[charcode] = String.fromCharCode(code);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
|
toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
|
||||||
@ -5467,17 +5482,19 @@ var CFFFont = (function CFFFontClosure() {
|
|||||||
},
|
},
|
||||||
getGlyphMapping: function CFFFont_getGlyphMapping() {
|
getGlyphMapping: function CFFFont_getGlyphMapping() {
|
||||||
var cff = this.cff;
|
var cff = this.cff;
|
||||||
|
var properties = this.properties;
|
||||||
var charsets = cff.charset.charset;
|
var charsets = cff.charset.charset;
|
||||||
var charCodeToGlyphId = Object.create(null);
|
var charCodeToGlyphId;
|
||||||
var glyphId;
|
var glyphId;
|
||||||
|
|
||||||
if (this.properties.composite) {
|
if (properties.composite) {
|
||||||
if (this.cff.isCIDFont) {
|
charCodeToGlyphId = Object.create(null);
|
||||||
|
if (cff.isCIDFont) {
|
||||||
// If the font is actually a CID font then we should use the charset
|
// If the font is actually a CID font then we should use the charset
|
||||||
// to map CIDs to GIDs.
|
// to map CIDs to GIDs.
|
||||||
for (glyphId = 0; glyphId < charsets.length; glyphId++) {
|
for (glyphId = 0; glyphId < charsets.length; glyphId++) {
|
||||||
var cidString = String.fromCharCode(charsets[glyphId]);
|
var cidString = String.fromCharCode(charsets[glyphId]);
|
||||||
var charCode = this.properties.cMap.map.indexOf(cidString);
|
var charCode = properties.cMap.map.indexOf(cidString);
|
||||||
charCodeToGlyphId[charCode] = glyphId;
|
charCodeToGlyphId[charCode] = glyphId;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -5491,7 +5508,8 @@ var CFFFont = (function CFFFontClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var encoding = cff.encoding ? cff.encoding.encoding : null;
|
var encoding = cff.encoding ? cff.encoding.encoding : null;
|
||||||
return type1FontGlyphMapping(this.properties, encoding, charsets);
|
charCodeToGlyphId = type1FontGlyphMapping(properties, encoding, charsets);
|
||||||
|
return charCodeToGlyphId;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -33,6 +33,7 @@
|
|||||||
!issue918.pdf
|
!issue918.pdf
|
||||||
!issue1905.pdf
|
!issue1905.pdf
|
||||||
!issue2833.pdf
|
!issue2833.pdf
|
||||||
|
!issue4550.pdf
|
||||||
!rotated.pdf
|
!rotated.pdf
|
||||||
!issue1249.pdf
|
!issue1249.pdf
|
||||||
!issue1171.pdf
|
!issue1171.pdf
|
||||||
|
BIN
test/pdfs/issue4550.pdf
Normal file
BIN
test/pdfs/issue4550.pdf
Normal file
Binary file not shown.
@ -352,6 +352,12 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue4550-text",
|
||||||
|
"file": "pdfs/issue4550.pdf",
|
||||||
|
"md5": "d64cfc4b50e225f596130d9938e8d5cc",
|
||||||
|
"rounds": 1,
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
{ "id": "jai-pdf",
|
{ "id": "jai-pdf",
|
||||||
"file": "pdfs/jai.pdf",
|
"file": "pdfs/jai.pdf",
|
||||||
"md5": "1f5dd128c3757420a881a155f2f8ace3",
|
"md5": "1f5dd128c3757420a881a155f2f8ace3",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user