Re-factor the fallbackToUnicode
functionality (PR 9192 follow-up)
Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.
This commit is contained in:
parent
7190bc23a8
commit
229a49b9b9
@ -3178,10 +3178,10 @@ class PartialEvaluator {
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {ToUnicodeMap}
|
||||
* @returns {Array}
|
||||
* @private
|
||||
*/
|
||||
_buildSimpleFontToUnicode(properties, forceGlyphs = false) {
|
||||
_simpleFontToUnicode(properties, forceGlyphs = false) {
|
||||
assert(!properties.composite, "Must be a simple font.");
|
||||
|
||||
const toUnicode = [];
|
||||
@ -3242,7 +3242,7 @@ class PartialEvaluator {
|
||||
Number.isNaN(code) &&
|
||||
Number.isInteger(parseInt(codeStr, 16))
|
||||
) {
|
||||
return this._buildSimpleFontToUnicode(
|
||||
return this._simpleFontToUnicode(
|
||||
properties,
|
||||
/* forceGlyphs */ true
|
||||
);
|
||||
@ -3275,7 +3275,7 @@ class PartialEvaluator {
|
||||
}
|
||||
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||
}
|
||||
return new ToUnicodeMap(toUnicode);
|
||||
return toUnicode;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3294,8 +3294,7 @@ class PartialEvaluator {
|
||||
// text-extraction. For simple fonts, containing encoding information,
|
||||
// use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
|
||||
if (!properties.composite && properties.hasEncoding) {
|
||||
properties.fallbackToUnicode =
|
||||
this._buildSimpleFontToUnicode(properties);
|
||||
properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
|
||||
}
|
||||
return properties.toUnicode;
|
||||
}
|
||||
@ -3306,7 +3305,7 @@ class PartialEvaluator {
|
||||
// in pratice it seems better to always try to create a toUnicode map
|
||||
// based of the default encoding.
|
||||
if (!properties.composite /* is simple font */) {
|
||||
return this._buildSimpleFontToUnicode(properties);
|
||||
return new ToUnicodeMap(this._simpleFontToUnicode(properties));
|
||||
}
|
||||
|
||||
// If the font is a composite font that uses one of the predefined CMaps
|
||||
|
@ -167,6 +167,29 @@ function adjustToUnicode(properties, builtInEncoding) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: This function should only be called at the *end* of font-parsing,
|
||||
* after e.g. `adjustToUnicode` has run, to prevent any issues.
|
||||
*/
|
||||
function amendFallbackToUnicode(properties) {
|
||||
if (!properties.fallbackToUnicode) {
|
||||
return;
|
||||
}
|
||||
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
|
||||
return;
|
||||
}
|
||||
const toUnicode = [];
|
||||
for (const charCode in properties.fallbackToUnicode) {
|
||||
if (properties.toUnicode.has(charCode)) {
|
||||
continue; // The font dictionary has a `ToUnicode` entry.
|
||||
}
|
||||
toUnicode[charCode] = properties.fallbackToUnicode[charCode];
|
||||
}
|
||||
if (toUnicode.length > 0) {
|
||||
properties.toUnicode.amend(toUnicode);
|
||||
}
|
||||
}
|
||||
|
||||
class Glyph {
|
||||
constructor(
|
||||
originalCharCode,
|
||||
@ -854,8 +877,6 @@ class Font {
|
||||
this.defaultEncoding = properties.defaultEncoding;
|
||||
|
||||
this.toUnicode = properties.toUnicode;
|
||||
this.fallbackToUnicode = properties.fallbackToUnicode || new ToUnicodeMap();
|
||||
|
||||
this.toFontChar = [];
|
||||
|
||||
if (properties.type === "Type3") {
|
||||
@ -941,6 +962,7 @@ class Font {
|
||||
return;
|
||||
}
|
||||
|
||||
amendFallbackToUnicode(properties);
|
||||
this.data = data;
|
||||
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
||||
|
||||
@ -1099,6 +1121,8 @@ class Font {
|
||||
}
|
||||
this.toFontChar = map;
|
||||
}
|
||||
|
||||
amendFallbackToUnicode(properties);
|
||||
this.loadedName = fontName.split("-")[0];
|
||||
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
||||
}
|
||||
@ -2957,15 +2981,12 @@ class Font {
|
||||
width = isNum(width) ? width : this.defaultWidth;
|
||||
const vmetric = this.vmetrics && this.vmetrics[widthCode];
|
||||
|
||||
let unicode =
|
||||
this.toUnicode.get(charcode) ||
|
||||
this.fallbackToUnicode.get(charcode) ||
|
||||
charcode;
|
||||
let unicode = this.toUnicode.get(charcode) || charcode;
|
||||
if (typeof unicode === "number") {
|
||||
unicode = String.fromCharCode(unicode);
|
||||
}
|
||||
|
||||
let isInFont = charcode in this.toFontChar;
|
||||
let isInFont = this.toFontChar[charcode] !== undefined;
|
||||
// First try the toFontChar map, if it's not there then try falling
|
||||
// back to the char code.
|
||||
fontCharCode = this.toFontChar[charcode] || charcode;
|
||||
|
Loading…
Reference in New Issue
Block a user