Merge pull request #13393 from Snuffleupagus/adjustToUnicode-hasIncludedToUnicodeMap
Tweak `adjustToUnicode` to allow extending a built-in /ToUnicode map
This commit is contained in:
commit
7fa61c062c
@ -3178,10 +3178,10 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @returns {ToUnicodeMap}
|
* @returns {Array}
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
_buildSimpleFontToUnicode(properties, forceGlyphs = false) {
|
_simpleFontToUnicode(properties, forceGlyphs = false) {
|
||||||
assert(!properties.composite, "Must be a simple font.");
|
assert(!properties.composite, "Must be a simple font.");
|
||||||
|
|
||||||
const toUnicode = [];
|
const toUnicode = [];
|
||||||
@ -3242,7 +3242,7 @@ class PartialEvaluator {
|
|||||||
Number.isNaN(code) &&
|
Number.isNaN(code) &&
|
||||||
Number.isInteger(parseInt(codeStr, 16))
|
Number.isInteger(parseInt(codeStr, 16))
|
||||||
) {
|
) {
|
||||||
return this._buildSimpleFontToUnicode(
|
return this._simpleFontToUnicode(
|
||||||
properties,
|
properties,
|
||||||
/* forceGlyphs */ true
|
/* forceGlyphs */ true
|
||||||
);
|
);
|
||||||
@ -3275,7 +3275,7 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||||
}
|
}
|
||||||
return new ToUnicodeMap(toUnicode);
|
return toUnicode;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -3284,7 +3284,7 @@ class PartialEvaluator {
|
|||||||
* @returns {Promise} A Promise that is resolved with a
|
* @returns {Promise} A Promise that is resolved with a
|
||||||
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
||||||
*/
|
*/
|
||||||
buildToUnicode(properties) {
|
async buildToUnicode(properties) {
|
||||||
properties.hasIncludedToUnicodeMap =
|
properties.hasIncludedToUnicodeMap =
|
||||||
!!properties.toUnicode && properties.toUnicode.length > 0;
|
!!properties.toUnicode && properties.toUnicode.length > 0;
|
||||||
|
|
||||||
@ -3294,11 +3294,9 @@ class PartialEvaluator {
|
|||||||
// text-extraction. For simple fonts, containing encoding information,
|
// text-extraction. For simple fonts, containing encoding information,
|
||||||
// use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
|
// use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
|
||||||
if (!properties.composite && properties.hasEncoding) {
|
if (!properties.composite && properties.hasEncoding) {
|
||||||
properties.fallbackToUnicode =
|
properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
|
||||||
this._buildSimpleFontToUnicode(properties);
|
|
||||||
}
|
}
|
||||||
|
return properties.toUnicode;
|
||||||
return Promise.resolve(properties.toUnicode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// According to the spec if the font is a simple font we should only map
|
// According to the spec if the font is a simple font we should only map
|
||||||
@ -3307,7 +3305,7 @@ class PartialEvaluator {
|
|||||||
// in pratice it seems better to always try to create a toUnicode map
|
// in pratice it seems better to always try to create a toUnicode map
|
||||||
// based of the default encoding.
|
// based of the default encoding.
|
||||||
if (!properties.composite /* is simple font */) {
|
if (!properties.composite /* is simple font */) {
|
||||||
return Promise.resolve(this._buildSimpleFontToUnicode(properties));
|
return new ToUnicodeMap(this._simpleFontToUnicode(properties));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the font is a composite font that uses one of the predefined CMaps
|
// If the font is a composite font that uses one of the predefined CMaps
|
||||||
@ -3330,22 +3328,20 @@ class PartialEvaluator {
|
|||||||
// b) Obtain the registry and ordering of the character collection used
|
// b) Obtain the registry and ordering of the character collection used
|
||||||
// by the font’s CMap (for example, Adobe and Japan1) from its
|
// by the font’s CMap (for example, Adobe and Japan1) from its
|
||||||
// CIDSystemInfo dictionary.
|
// CIDSystemInfo dictionary.
|
||||||
const registry = properties.cidSystemInfo.registry;
|
const { registry, ordering } = properties.cidSystemInfo;
|
||||||
const ordering = properties.cidSystemInfo.ordering;
|
|
||||||
// c) Construct a second CMap name by concatenating the registry and
|
// c) Construct a second CMap name by concatenating the registry and
|
||||||
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
||||||
// (for example, Adobe–Japan1–UCS2).
|
// (for example, Adobe–Japan1–UCS2).
|
||||||
const ucs2CMapName = Name.get(registry + "-" + ordering + "-UCS2");
|
const ucs2CMapName = Name.get(`${registry}-${ordering}-UCS2`);
|
||||||
// d) Obtain the CMap with the name constructed in step (c) (available
|
// d) Obtain the CMap with the name constructed in step (c) (available
|
||||||
// from the ASN Web site; see the Bibliography).
|
// from the ASN Web site; see the Bibliography).
|
||||||
return CMapFactory.create({
|
const ucs2CMap = await CMapFactory.create({
|
||||||
encoding: ucs2CMapName,
|
encoding: ucs2CMapName,
|
||||||
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
||||||
useCMap: null,
|
useCMap: null,
|
||||||
}).then(function (ucs2CMap) {
|
});
|
||||||
const cMap = properties.cMap;
|
|
||||||
const toUnicode = [];
|
const toUnicode = [];
|
||||||
cMap.forEach(function (charcode, cid) {
|
properties.cMap.forEach(function (charcode, cid) {
|
||||||
if (cid > 0xffff) {
|
if (cid > 0xffff) {
|
||||||
throw new FormatError("Max size of CID is 65,535");
|
throw new FormatError("Max size of CID is 65,535");
|
||||||
}
|
}
|
||||||
@ -3359,13 +3355,10 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
return new ToUnicodeMap(toUnicode);
|
return new ToUnicodeMap(toUnicode);
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The viewer's choice, just use an identity map.
|
// The viewer's choice, just use an identity map.
|
||||||
return Promise.resolve(
|
return new IdentityToUnicodeMap(properties.firstChar, properties.lastChar);
|
||||||
new IdentityToUnicodeMap(properties.firstChar, properties.lastChar)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
readToUnicode(cmapObj) {
|
readToUnicode(cmapObj) {
|
||||||
|
@ -135,9 +135,6 @@ function adjustToUnicode(properties, builtInEncoding) {
|
|||||||
if (properties.isInternalFont) {
|
if (properties.isInternalFont) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (properties.hasIncludedToUnicodeMap) {
|
|
||||||
return; // The font dictionary has a `ToUnicode` entry.
|
|
||||||
}
|
|
||||||
if (builtInEncoding === properties.defaultEncoding) {
|
if (builtInEncoding === properties.defaultEncoding) {
|
||||||
return; // No point in trying to adjust `toUnicode` if the encodings match.
|
return; // No point in trying to adjust `toUnicode` if the encodings match.
|
||||||
}
|
}
|
||||||
@ -147,20 +144,51 @@ function adjustToUnicode(properties, builtInEncoding) {
|
|||||||
const toUnicode = [],
|
const toUnicode = [],
|
||||||
glyphsUnicodeMap = getGlyphsUnicode();
|
glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
for (const charCode in builtInEncoding) {
|
for (const charCode in builtInEncoding) {
|
||||||
|
if (properties.hasIncludedToUnicodeMap) {
|
||||||
|
if (properties.toUnicode.has(charCode)) {
|
||||||
|
continue; // The font dictionary has a `ToUnicode` entry.
|
||||||
|
}
|
||||||
|
} else {
|
||||||
if (
|
if (
|
||||||
properties.hasEncoding &&
|
properties.hasEncoding &&
|
||||||
properties.differences[charCode] !== undefined
|
properties.differences[charCode] !== undefined
|
||||||
) {
|
) {
|
||||||
continue; // The font dictionary has an `Encoding`/`Differences` entry.
|
continue; // The font dictionary has an `Encoding`/`Differences` entry.
|
||||||
}
|
}
|
||||||
|
}
|
||||||
const glyphName = builtInEncoding[charCode];
|
const glyphName = builtInEncoding[charCode];
|
||||||
const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
if (unicode !== -1) {
|
if (unicode !== -1) {
|
||||||
toUnicode[charCode] = String.fromCharCode(unicode);
|
toUnicode[charCode] = String.fromCharCode(unicode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (toUnicode.length > 0) {
|
||||||
properties.toUnicode.amend(toUnicode);
|
properties.toUnicode.amend(toUnicode);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NOTE: This function should only be called at the *end* of font-parsing,
|
||||||
|
* after e.g. `adjustToUnicode` has run, to prevent any issues.
|
||||||
|
*/
|
||||||
|
function amendFallbackToUnicode(properties) {
|
||||||
|
if (!properties.fallbackToUnicode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const toUnicode = [];
|
||||||
|
for (const charCode in properties.fallbackToUnicode) {
|
||||||
|
if (properties.toUnicode.has(charCode)) {
|
||||||
|
continue; // The font dictionary has a `ToUnicode` entry.
|
||||||
|
}
|
||||||
|
toUnicode[charCode] = properties.fallbackToUnicode[charCode];
|
||||||
|
}
|
||||||
|
if (toUnicode.length > 0) {
|
||||||
|
properties.toUnicode.amend(toUnicode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Glyph {
|
class Glyph {
|
||||||
constructor(
|
constructor(
|
||||||
@ -849,8 +877,6 @@ class Font {
|
|||||||
this.defaultEncoding = properties.defaultEncoding;
|
this.defaultEncoding = properties.defaultEncoding;
|
||||||
|
|
||||||
this.toUnicode = properties.toUnicode;
|
this.toUnicode = properties.toUnicode;
|
||||||
this.fallbackToUnicode = properties.fallbackToUnicode || new ToUnicodeMap();
|
|
||||||
|
|
||||||
this.toFontChar = [];
|
this.toFontChar = [];
|
||||||
|
|
||||||
if (properties.type === "Type3") {
|
if (properties.type === "Type3") {
|
||||||
@ -936,6 +962,7 @@ class Font {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amendFallbackToUnicode(properties);
|
||||||
this.data = data;
|
this.data = data;
|
||||||
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
||||||
|
|
||||||
@ -1094,6 +1121,8 @@ class Font {
|
|||||||
}
|
}
|
||||||
this.toFontChar = map;
|
this.toFontChar = map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amendFallbackToUnicode(properties);
|
||||||
this.loadedName = fontName.split("-")[0];
|
this.loadedName = fontName.split("-")[0];
|
||||||
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
this.fontType = getFontType(type, subtype, properties.isStandardFont);
|
||||||
}
|
}
|
||||||
@ -2545,12 +2574,9 @@ class Font {
|
|||||||
const glyphsUnicodeMap = getGlyphsUnicode();
|
const glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
for (let charCode = 0; charCode < 256; charCode++) {
|
for (let charCode = 0; charCode < 256; charCode++) {
|
||||||
let glyphName;
|
let glyphName;
|
||||||
if (this.differences && charCode in this.differences) {
|
if (this.differences[charCode] !== undefined) {
|
||||||
glyphName = this.differences[charCode];
|
glyphName = this.differences[charCode];
|
||||||
} else if (
|
} else if (baseEncoding[charCode] !== "") {
|
||||||
charCode in baseEncoding &&
|
|
||||||
baseEncoding[charCode] !== ""
|
|
||||||
) {
|
|
||||||
glyphName = baseEncoding[charCode];
|
glyphName = baseEncoding[charCode];
|
||||||
} else {
|
} else {
|
||||||
glyphName = StandardEncoding[charCode];
|
glyphName = StandardEncoding[charCode];
|
||||||
@ -2955,15 +2981,12 @@ class Font {
|
|||||||
width = isNum(width) ? width : this.defaultWidth;
|
width = isNum(width) ? width : this.defaultWidth;
|
||||||
const vmetric = this.vmetrics && this.vmetrics[widthCode];
|
const vmetric = this.vmetrics && this.vmetrics[widthCode];
|
||||||
|
|
||||||
let unicode =
|
let unicode = this.toUnicode.get(charcode) || charcode;
|
||||||
this.toUnicode.get(charcode) ||
|
|
||||||
this.fallbackToUnicode.get(charcode) ||
|
|
||||||
charcode;
|
|
||||||
if (typeof unicode === "number") {
|
if (typeof unicode === "number") {
|
||||||
unicode = String.fromCharCode(unicode);
|
unicode = String.fromCharCode(unicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
let isInFont = charcode in this.toFontChar;
|
let isInFont = this.toFontChar[charcode] !== undefined;
|
||||||
// First try the toFontChar map, if it's not there then try falling
|
// First try the toFontChar map, if it's not there then try falling
|
||||||
// back to the char code.
|
// back to the char code.
|
||||||
fontCharCode = this.toFontChar[charcode] || charcode;
|
fontCharCode = this.toFontChar[charcode] || charcode;
|
||||||
|
Loading…
Reference in New Issue
Block a user