Slightly re-factor PartialEvaluator._simpleFontToUnicode
Given the sheer number of heuristics added to this method over the years, moving the *valid* unicode found case to the top should improve readability of the code.
This commit is contained in:
parent
c6cc7c6e6a
commit
fa47d4b9b1
@ -3526,77 +3526,77 @@ class PartialEvaluator {
|
|||||||
for (const charcode in encoding) {
|
for (const charcode in encoding) {
|
||||||
// a) Map the character code to a character name.
|
// a) Map the character code to a character name.
|
||||||
let glyphName = encoding[charcode];
|
let glyphName = encoding[charcode];
|
||||||
// b) Look up the character name in the Adobe Glyph List (see the
|
|
||||||
// Bibliography) to obtain the corresponding Unicode value.
|
|
||||||
if (glyphName === "") {
|
if (glyphName === "") {
|
||||||
continue;
|
continue;
|
||||||
} else if (glyphsUnicodeMap[glyphName] === undefined) {
|
}
|
||||||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
// b) Look up the character name in the Adobe Glyph List (see the
|
||||||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
// Bibliography) to obtain the corresponding Unicode value.
|
||||||
let code = 0;
|
let unicode = glyphsUnicodeMap[glyphName];
|
||||||
switch (glyphName[0]) {
|
if (unicode !== undefined) {
|
||||||
case "G": // Gxx glyph
|
toUnicode[charcode] = String.fromCharCode(unicode);
|
||||||
if (glyphName.length === 3) {
|
|
||||||
code = parseInt(glyphName.substring(1), 16);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "g": // g00xx glyph
|
|
||||||
if (glyphName.length === 5) {
|
|
||||||
code = parseInt(glyphName.substring(1), 16);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "C": // Cdd{d} glyph
|
|
||||||
case "c": // cdd{d} glyph
|
|
||||||
if (glyphName.length >= 3 && glyphName.length <= 4) {
|
|
||||||
const codeStr = glyphName.substring(1);
|
|
||||||
|
|
||||||
if (forceGlyphs) {
|
|
||||||
code = parseInt(codeStr, 16);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Normally the Cdd{d}/cdd{d} glyphName format will contain
|
|
||||||
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
|
|
||||||
code = +codeStr;
|
|
||||||
|
|
||||||
// ... however some PDF generators violate that assumption by
|
|
||||||
// containing glyph, i.e. base 16, codes instead.
|
|
||||||
// In that case we need to re-parse the *entire* encoding to
|
|
||||||
// prevent broken text-selection (fixes issue9655_reduced.pdf).
|
|
||||||
if (
|
|
||||||
Number.isNaN(code) &&
|
|
||||||
Number.isInteger(parseInt(codeStr, 16))
|
|
||||||
) {
|
|
||||||
return this._simpleFontToUnicode(
|
|
||||||
properties,
|
|
||||||
/* forceGlyphs */ true
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default: // 'uniXXXX'/'uXXXX{XX}' glyphs
|
|
||||||
const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
|
||||||
if (unicode !== -1) {
|
|
||||||
code = unicode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
|
|
||||||
// If `baseEncodingName` is one the predefined encodings, and `code`
|
|
||||||
// equals `charcode`, using the glyph defined in the baseEncoding
|
|
||||||
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
|
||||||
if (baseEncodingName && code === +charcode) {
|
|
||||||
const baseEncoding = getEncoding(baseEncodingName);
|
|
||||||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
|
||||||
toUnicode[charcode] = String.fromCharCode(
|
|
||||||
glyphsUnicodeMap[glyphName]
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
toUnicode[charcode] = String.fromCodePoint(code);
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||||
|
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||||
|
let code = 0;
|
||||||
|
switch (glyphName[0]) {
|
||||||
|
case "G": // Gxx glyph
|
||||||
|
if (glyphName.length === 3) {
|
||||||
|
code = parseInt(glyphName.substring(1), 16);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "g": // g00xx glyph
|
||||||
|
if (glyphName.length === 5) {
|
||||||
|
code = parseInt(glyphName.substring(1), 16);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "C": // Cdd{d} glyph
|
||||||
|
case "c": // cdd{d} glyph
|
||||||
|
if (glyphName.length >= 3 && glyphName.length <= 4) {
|
||||||
|
const codeStr = glyphName.substring(1);
|
||||||
|
|
||||||
|
if (forceGlyphs) {
|
||||||
|
code = parseInt(codeStr, 16);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Normally the Cdd{d}/cdd{d} glyphName format will contain
|
||||||
|
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
|
||||||
|
code = +codeStr;
|
||||||
|
|
||||||
|
// ... however some PDF generators violate that assumption by
|
||||||
|
// containing glyph, i.e. base 16, codes instead.
|
||||||
|
// In that case we need to re-parse the *entire* encoding to
|
||||||
|
// prevent broken text-selection (fixes issue9655_reduced.pdf).
|
||||||
|
if (Number.isNaN(code) && Number.isInteger(parseInt(codeStr, 16))) {
|
||||||
|
return this._simpleFontToUnicode(
|
||||||
|
properties,
|
||||||
|
/* forceGlyphs */ true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "u": // 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||||
|
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
|
if (unicode !== -1) {
|
||||||
|
code = unicode;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
|
||||||
|
// If `baseEncodingName` is one the predefined encodings, and `code`
|
||||||
|
// equals `charcode`, using the glyph defined in the baseEncoding
|
||||||
|
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
||||||
|
if (baseEncodingName && code === +charcode) {
|
||||||
|
const baseEncoding = getEncoding(baseEncodingName);
|
||||||
|
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||||||
|
toUnicode[charcode] = String.fromCharCode(
|
||||||
|
glyphsUnicodeMap[glyphName]
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
toUnicode[charcode] = String.fromCodePoint(code);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return toUnicode;
|
return toUnicode;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user