Refactor the building of ToUnicode
maps for simple fonts a helper method
This commit is contained in:
parent
ada47fe373
commit
ffbfc3c2a7
@ -1932,30 +1932,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds a char code to unicode map based on section 9.10 of the spec.
|
* @returns {ToUnicodeMap}
|
||||||
* @param {Object} properties Font properties object.
|
* @private
|
||||||
* @return {Promise} A Promise that is resolved with a
|
|
||||||
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
|
||||||
*/
|
*/
|
||||||
buildToUnicode: function PartialEvaluator_buildToUnicode(properties) {
|
_buildSimpleFontToUnicode(properties) {
|
||||||
properties.hasIncludedToUnicodeMap =
|
assert(!properties.composite, 'Must be a simple font.');
|
||||||
!!properties.toUnicode && properties.toUnicode.length > 0;
|
|
||||||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
let toUnicode = [], charcode, glyphName;
|
||||||
if (properties.hasIncludedToUnicodeMap) {
|
let encoding = properties.defaultEncoding.slice();
|
||||||
return Promise.resolve(properties.toUnicode);
|
let baseEncodingName = properties.baseEncodingName;
|
||||||
}
|
|
||||||
// According to the spec if the font is a simple font we should only map
|
|
||||||
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
|
|
||||||
// the differences array only contains adobe standard or symbol set names,
|
|
||||||
// in pratice it seems better to always try to create a toUnicode
|
|
||||||
// map based of the default encoding.
|
|
||||||
var toUnicode, charcode, glyphName;
|
|
||||||
if (!properties.composite /* is simple font */) {
|
|
||||||
toUnicode = [];
|
|
||||||
var encoding = properties.defaultEncoding.slice();
|
|
||||||
var baseEncodingName = properties.baseEncodingName;
|
|
||||||
// Merge in the differences array.
|
// Merge in the differences array.
|
||||||
var differences = properties.differences;
|
let differences = properties.differences;
|
||||||
for (charcode in differences) {
|
for (charcode in differences) {
|
||||||
glyphName = differences[charcode];
|
glyphName = differences[charcode];
|
||||||
if (glyphName === '.notdef') {
|
if (glyphName === '.notdef') {
|
||||||
@ -1965,7 +1952,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
encoding[charcode] = glyphName;
|
encoding[charcode] = glyphName;
|
||||||
}
|
}
|
||||||
var glyphsUnicodeMap = getGlyphsUnicode();
|
let glyphsUnicodeMap = getGlyphsUnicode();
|
||||||
for (charcode in encoding) {
|
for (charcode in encoding) {
|
||||||
// a) Map the character code to a character name.
|
// a) Map the character code to a character name.
|
||||||
glyphName = encoding[charcode];
|
glyphName = encoding[charcode];
|
||||||
@ -1976,7 +1963,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
} else if (glyphsUnicodeMap[glyphName] === undefined) {
|
} else if (glyphsUnicodeMap[glyphName] === undefined) {
|
||||||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||||||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||||||
var code = 0;
|
let code = 0;
|
||||||
switch (glyphName[0]) {
|
switch (glyphName[0]) {
|
||||||
case 'G': // Gxx glyph
|
case 'G': // Gxx glyph
|
||||||
if (glyphName.length === 3) {
|
if (glyphName.length === 3) {
|
||||||
@ -1996,18 +1983,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
||||||
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
let unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||||||
if (unicode !== -1) {
|
if (unicode !== -1) {
|
||||||
code = unicode;
|
code = unicode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (code) {
|
if (code) {
|
||||||
// If |baseEncodingName| is one the predefined encodings,
|
// If `baseEncodingName` is one the predefined encodings, and `code`
|
||||||
// and |code| equals |charcode|, using the glyph defined in the
|
// equals `charcode`, using the glyph defined in the baseEncoding
|
||||||
// baseEncoding seems to yield a better |toUnicode| mapping
|
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
||||||
// (fixes issue 5070).
|
|
||||||
if (baseEncodingName && code === +charcode) {
|
if (baseEncodingName && code === +charcode) {
|
||||||
var baseEncoding = getEncoding(baseEncodingName);
|
let baseEncoding = getEncoding(baseEncodingName);
|
||||||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||||||
toUnicode[charcode] =
|
toUnicode[charcode] =
|
||||||
String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||||
@ -2018,11 +2004,35 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
toUnicode[charcode] =
|
toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
||||||
String.fromCharCode(glyphsUnicodeMap[glyphName]);
|
|
||||||
}
|
}
|
||||||
return Promise.resolve(new ToUnicodeMap(toUnicode));
|
return new ToUnicodeMap(toUnicode);
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a char code to unicode map based on section 9.10 of the spec.
|
||||||
|
* @param {Object} properties Font properties object.
|
||||||
|
* @return {Promise} A Promise that is resolved with a
|
||||||
|
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
||||||
|
*/
|
||||||
|
buildToUnicode(properties) {
|
||||||
|
properties.hasIncludedToUnicodeMap =
|
||||||
|
!!properties.toUnicode && properties.toUnicode.length > 0;
|
||||||
|
|
||||||
|
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||||||
|
if (properties.hasIncludedToUnicodeMap) {
|
||||||
|
return Promise.resolve(properties.toUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// According to the spec if the font is a simple font we should only map
|
||||||
|
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
|
||||||
|
// the differences array only contains adobe standard or symbol set names,
|
||||||
|
// in pratice it seems better to always try to create a toUnicode map
|
||||||
|
// based of the default encoding.
|
||||||
|
if (!properties.composite /* is simple font */) {
|
||||||
|
return Promise.resolve(this._buildSimpleFontToUnicode(properties));
|
||||||
|
}
|
||||||
|
|
||||||
// If the font is a composite font that uses one of the predefined CMaps
|
// If the font is a composite font that uses one of the predefined CMaps
|
||||||
// listed in Table 118 (except Identity–H and Identity–V) or whose
|
// listed in Table 118 (except Identity–H and Identity–V) or whose
|
||||||
// descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
|
// descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
|
||||||
@ -2041,12 +2051,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
// b) Obtain the registry and ordering of the character collection used
|
// b) Obtain the registry and ordering of the character collection used
|
||||||
// by the font’s CMap (for example, Adobe and Japan1) from its
|
// by the font’s CMap (for example, Adobe and Japan1) from its
|
||||||
// CIDSystemInfo dictionary.
|
// CIDSystemInfo dictionary.
|
||||||
var registry = properties.cidSystemInfo.registry;
|
let registry = properties.cidSystemInfo.registry;
|
||||||
var ordering = properties.cidSystemInfo.ordering;
|
let ordering = properties.cidSystemInfo.ordering;
|
||||||
// c) Construct a second CMap name by concatenating the registry and
|
// c) Construct a second CMap name by concatenating the registry and
|
||||||
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
||||||
// (for example, Adobe–Japan1–UCS2).
|
// (for example, Adobe–Japan1–UCS2).
|
||||||
var ucs2CMapName = Name.get(registry + '-' + ordering + '-UCS2');
|
let ucs2CMapName = Name.get(registry + '-' + ordering + '-UCS2');
|
||||||
// d) Obtain the CMap with the name constructed in step (c) (available
|
// d) Obtain the CMap with the name constructed in step (c) (available
|
||||||
// from the ASN Web site; see the Bibliography).
|
// from the ASN Web site; see the Bibliography).
|
||||||
return CMapFactory.create({
|
return CMapFactory.create({
|
||||||
@ -2054,15 +2064,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
fetchBuiltInCMap: this.fetchBuiltInCMap,
|
fetchBuiltInCMap: this.fetchBuiltInCMap,
|
||||||
useCMap: null,
|
useCMap: null,
|
||||||
}).then(function (ucs2CMap) {
|
}).then(function (ucs2CMap) {
|
||||||
var cMap = properties.cMap;
|
let cMap = properties.cMap;
|
||||||
toUnicode = [];
|
let toUnicode = [];
|
||||||
cMap.forEach(function(charcode, cid) {
|
cMap.forEach(function(charcode, cid) {
|
||||||
if (cid > 0xffff) {
|
if (cid > 0xffff) {
|
||||||
throw new FormatError('Max size of CID is 65,535');
|
throw new FormatError('Max size of CID is 65,535');
|
||||||
}
|
}
|
||||||
// e) Map the CID obtained in step (a) according to the CMap
|
// e) Map the CID obtained in step (a) according to the CMap
|
||||||
// obtained in step (d), producing a Unicode value.
|
// obtained in step (d), producing a Unicode value.
|
||||||
var ucs2 = ucs2CMap.lookup(cid);
|
let ucs2 = ucs2CMap.lookup(cid);
|
||||||
if (ucs2) {
|
if (ucs2) {
|
||||||
toUnicode[charcode] =
|
toUnicode[charcode] =
|
||||||
String.fromCharCode((ucs2.charCodeAt(0) << 8) +
|
String.fromCharCode((ucs2.charCodeAt(0) << 8) +
|
||||||
|
Loading…
Reference in New Issue
Block a user