Merge pull request #15572 from Snuffleupagus/simpleFontToUnicode-refactor
Slightly re-factor `PartialEvaluator._simpleFontToUnicode`
This commit is contained in:
commit
2508792f29
@ -3526,77 +3526,77 @@ class PartialEvaluator {
|
||||
for (const charcode in encoding) {
|
||||
// a) Map the character code to a character name.
|
||||
let glyphName = encoding[charcode];
|
||||
// b) Look up the character name in the Adobe Glyph List (see the
|
||||
// Bibliography) to obtain the corresponding Unicode value.
|
||||
if (glyphName === "") {
|
||||
continue;
|
||||
} else if (glyphsUnicodeMap[glyphName] === undefined) {
|
||||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||
let code = 0;
|
||||
switch (glyphName[0]) {
|
||||
case "G": // Gxx glyph
|
||||
if (glyphName.length === 3) {
|
||||
code = parseInt(glyphName.substring(1), 16);
|
||||
}
|
||||
break;
|
||||
case "g": // g00xx glyph
|
||||
if (glyphName.length === 5) {
|
||||
code = parseInt(glyphName.substring(1), 16);
|
||||
}
|
||||
break;
|
||||
case "C": // Cdd{d} glyph
|
||||
case "c": // cdd{d} glyph
|
||||
if (glyphName.length >= 3 && glyphName.length <= 4) {
|
||||
const codeStr = glyphName.substring(1);
|
||||
|
||||
if (forceGlyphs) {
|
||||
code = parseInt(codeStr, 16);
|
||||
break;
|
||||
}
|
||||
// Normally the Cdd{d}/cdd{d} glyphName format will contain
|
||||
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
|
||||
code = +codeStr;
|
||||
|
||||
// ... however some PDF generators violate that assumption by
|
||||
// containing glyph, i.e. base 16, codes instead.
|
||||
// In that case we need to re-parse the *entire* encoding to
|
||||
// prevent broken text-selection (fixes issue9655_reduced.pdf).
|
||||
if (
|
||||
Number.isNaN(code) &&
|
||||
Number.isInteger(parseInt(codeStr, 16))
|
||||
) {
|
||||
return this._simpleFontToUnicode(
|
||||
properties,
|
||||
/* forceGlyphs */ true
|
||||
);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: // 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
}
|
||||
if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
|
||||
// If `baseEncodingName` is one the predefined encodings, and `code`
|
||||
// equals `charcode`, using the glyph defined in the baseEncoding
|
||||
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
||||
if (baseEncodingName && code === +charcode) {
|
||||
const baseEncoding = getEncoding(baseEncodingName);
|
||||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||||
toUnicode[charcode] = String.fromCharCode(
|
||||
glyphsUnicodeMap[glyphName]
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
toUnicode[charcode] = String.fromCodePoint(code);
|
||||
}
|
||||
}
|
||||
// b) Look up the character name in the Adobe Glyph List (see the
|
||||
// Bibliography) to obtain the corresponding Unicode value.
|
||||
let unicode = glyphsUnicodeMap[glyphName];
|
||||
if (unicode !== undefined) {
|
||||
toUnicode[charcode] = String.fromCharCode(unicode);
|
||||
continue;
|
||||
}
|
||||
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||
let code = 0;
|
||||
switch (glyphName[0]) {
|
||||
case "G": // Gxx glyph
|
||||
if (glyphName.length === 3) {
|
||||
code = parseInt(glyphName.substring(1), 16);
|
||||
}
|
||||
break;
|
||||
case "g": // g00xx glyph
|
||||
if (glyphName.length === 5) {
|
||||
code = parseInt(glyphName.substring(1), 16);
|
||||
}
|
||||
break;
|
||||
case "C": // Cdd{d} glyph
|
||||
case "c": // cdd{d} glyph
|
||||
if (glyphName.length >= 3 && glyphName.length <= 4) {
|
||||
const codeStr = glyphName.substring(1);
|
||||
|
||||
if (forceGlyphs) {
|
||||
code = parseInt(codeStr, 16);
|
||||
break;
|
||||
}
|
||||
// Normally the Cdd{d}/cdd{d} glyphName format will contain
|
||||
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
|
||||
code = +codeStr;
|
||||
|
||||
// ... however some PDF generators violate that assumption by
|
||||
// containing glyph, i.e. base 16, codes instead.
|
||||
// In that case we need to re-parse the *entire* encoding to
|
||||
// prevent broken text-selection (fixes issue9655_reduced.pdf).
|
||||
if (Number.isNaN(code) && Number.isInteger(parseInt(codeStr, 16))) {
|
||||
return this._simpleFontToUnicode(
|
||||
properties,
|
||||
/* forceGlyphs */ true
|
||||
);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "u": // 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||
if (unicode !== -1) {
|
||||
code = unicode;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
|
||||
// If `baseEncodingName` is one the predefined encodings, and `code`
|
||||
// equals `charcode`, using the glyph defined in the baseEncoding
|
||||
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
||||
if (baseEncodingName && code === +charcode) {
|
||||
const baseEncoding = getEncoding(baseEncodingName);
|
||||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||||
toUnicode[charcode] = String.fromCharCode(
|
||||
glyphsUnicodeMap[glyphName]
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
toUnicode[charcode] = String.fromCodePoint(code);
|
||||
}
|
||||
}
|
||||
return toUnicode;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user