Removing adaptUnicode; making cmap equal to ToUnicode tables

This commit is contained in:
notmasteryet 2011-11-27 20:43:23 -06:00
parent 709dc1a0c9
commit 08e3fd88ad
2 changed files with 172 additions and 123 deletions

@ -555,9 +555,21 @@ var PartialEvaluator = (function partialEvaluator() {
var startRange = tokens[j]; var startRange = tokens[j];
var endRange = tokens[j + 1]; var endRange = tokens[j + 1];
var code = tokens[j + 2]; var code = tokens[j + 2];
while (startRange <= endRange) { if (code == 0xFFFF) {
charToUnicode[startRange] = code++; // CMap is broken, assuming code == startRange
++startRange; code = startRange;
}
if (isArray(code)) {
var codeindex = 0;
while (startRange <= endRange) {
charToUnicode[startRange] = code[codeindex++];
++startRange;
}
} else {
while (startRange <= endRange) {
charToUnicode[startRange] = code++;
++startRange;
}
} }
} }
break; break;

@ -719,20 +719,10 @@ function getUnicodeRangeFor(value) {
return -1; return -1;
} }
function adaptUnicode(unicode) {
return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ?
unicode + kCmapGlyphOffset : unicode;
}
function isAdaptedUnicode(unicode) {
return unicode >= kCmapGlyphOffset &&
unicode < kCmapGlyphOffset + kSizeOfGlyphArea;
}
function isSpecialUnicode(unicode) { function isSpecialUnicode(unicode) {
return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) || return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ||
unicode >= kCmapGlyphOffset && (unicode >= kCmapGlyphOffset &&
unicode < kCmapGlyphOffset + kSizeOfGlyphArea; unicode < kCmapGlyphOffset + kSizeOfGlyphArea);
} }
/** /**
@ -965,15 +955,15 @@ var Font = (function Font() {
var ranges = []; var ranges = [];
for (var n = 0; n < length; ) { for (var n = 0; n < length; ) {
var start = codes[n].unicode; var start = codes[n].unicode;
var startCode = codes[n].code; var codeIndices = [codes[n].code];
++n; ++n;
var end = start; var end = start;
while (n < length && end + 1 == codes[n].unicode) { while (n < length && end + 1 == codes[n].unicode) {
codeIndices.push(codes[n].code);
++end; ++end;
++n; ++n;
} }
var endCode = codes[n - 1].code; ranges.push([start, end, codeIndices]);
ranges.push([start, end, startCode, endCode]);
} }
return ranges; return ranges;
@ -1016,17 +1006,16 @@ var Font = (function Font() {
idDeltas += string16(0); idDeltas += string16(0);
idRangeOffsets += string16(offset); idRangeOffsets += string16(offset);
var startCode = range[2]; var codes = range[2];
var endCode = range[3]; for (var j = 0, jj = codes.length; j < jj; ++j)
for (var j = startCode; j <= endCode; ++j) glyphsIds += string16(deltas[codes[j]]);
glyphsIds += string16(deltas[j]);
} }
} else { } else {
for (var i = 0; i < segCount - 1; i++) { for (var i = 0; i < segCount - 1; i++) {
var range = ranges[i]; var range = ranges[i];
var start = range[0]; var start = range[0];
var end = range[1]; var end = range[1];
var startCode = range[2]; var startCode = range[2][0];
startCount += string16(start); startCount += string16(start);
endCount += string16(end); endCount += string16(end);
@ -1303,7 +1292,7 @@ var Font = (function Font() {
properties.baseEncoding = encoding; properties.baseEncoding = encoding;
} }
function replaceCMapTable(cmap, font, properties) { function readCMapTable(cmap, font) {
var start = (font.start ? font.start : 0) + cmap.offset; var start = (font.start ? font.start : 0) + cmap.offset;
font.pos = start; font.pos = start;
@ -1320,7 +1309,7 @@ var Font = (function Font() {
} }
// Check that table are sorted by platformID then encodingID, // Check that table are sorted by platformID then encodingID,
records.sort(function fontReplaceCMapTableSort(a, b) { records.sort(function fontReadCMapTableSort(a, b) {
return ((a.platformID << 16) + a.encodingID) - return ((a.platformID << 16) + a.encodingID) -
((b.platformID << 16) + b.encodingID); ((b.platformID << 16) + b.encodingID);
}); });
@ -1375,16 +1364,15 @@ var Font = (function Font() {
for (var j = 0; j < 256; j++) { for (var j = 0; j < 256; j++) {
var index = font.getByte(); var index = font.getByte();
if (index) { if (index) {
var unicode = adaptUnicode(j); glyphs.push({ unicode: j, code: j });
glyphs.push({ unicode: unicode, code: j });
ids.push(index); ids.push(index);
} }
} }
return {
properties.hasShortCmap = true; glyphs: glyphs,
ids: ids,
createGlyphNameMap(glyphs, ids, properties); hasShortCmap: true
return cmap.data = createCMapTable(glyphs, ids); };
} else if (format == 4) { } else if (format == 4) {
// re-creating the table in format 4 since the encoding // re-creating the table in format 4 since the encoding
// might be changed // might be changed
@ -1436,17 +1424,18 @@ var Font = (function Font() {
var glyphCode = offsetIndex < 0 ? j : var glyphCode = offsetIndex < 0 ? j :
offsets[offsetIndex + j - start]; offsets[offsetIndex + j - start];
glyphCode = (glyphCode + delta) & 0xFFFF; glyphCode = (glyphCode + delta) & 0xFFFF;
if (glyphCode == 0 || isAdaptedUnicode(j)) if (glyphCode == 0)
continue; continue;
var unicode = adaptUnicode(j); glyphs.push({ unicode: j, code: j });
glyphs.push({ unicode: unicode, code: j });
ids.push(glyphCode); ids.push(glyphCode);
} }
} }
createGlyphNameMap(glyphs, ids, properties); return {
return cmap.data = createCMapTable(glyphs, ids); glyphs: glyphs,
ids: ids
};
} else if (format == 6) { } else if (format == 6) {
// Format 6 is a 2-bytes dense mapping, which means the font data // Format 6 is a 2-bytes dense mapping, which means the font data
// lives glue together even if they are pretty far in the unicode // lives glue together even if they are pretty far in the unicode
@ -1461,19 +1450,18 @@ var Font = (function Font() {
for (var j = 0; j < entryCount; j++) { for (var j = 0; j < entryCount; j++) {
var glyphCode = int16(font.getBytes(2)); var glyphCode = int16(font.getBytes(2));
var code = firstCode + j; var code = firstCode + j;
if (isAdaptedUnicode(glyphCode))
continue;
var unicode = adaptUnicode(code); glyphs.push({ unicode: code, code: code });
glyphs.push({ unicode: unicode, code: code });
ids.push(glyphCode); ids.push(glyphCode);
} }
createGlyphNameMap(glyphs, ids, properties); return {
return cmap.data = createCMapTable(glyphs, ids); glyphs: glyphs,
ids: ids
};
} }
} }
return cmap.data; error('Unsupported cmap table format');
}; };
function sanitizeMetrics(font, header, metrics, numGlyphs) { function sanitizeMetrics(font, header, metrics, numGlyphs) {
@ -1712,17 +1700,60 @@ var Font = (function Font() {
tables.push(cmap); tables.push(cmap);
} }
var glyphs = []; var glyphs = [], ids = [];
var usedUnicodes = [], unusedUnicode = kCmapGlyphOffset;
var cidToGidMap = properties.cidToGidMap;
for (i = 1; i < numGlyphs; i++) { for (i = 1; i < numGlyphs; i++) {
if (isAdaptedUnicode(i)) var cid = cidToGidMap ? cidToGidMap.indexOf(i) : i;
continue; var unicode = this.toUnicode[cid];
if (!unicode || isSpecialUnicode(unicode) ||
glyphs.push({ unicode: adaptUnicode(i) }); unicode in usedUnicodes) {
// overriding the special special symbols mapping
while (unusedUnicode in usedUnicodes)
unusedUnicode++;
this.toUnicode[cid] = unicode = unusedUnicode++;
if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) {
// overflow of the user defined symblos range
// using symbols that a little bit lower than this range
unusedUnicode = kCmapGlyphOffset - numGlyphs;
}
}
usedUnicodes[unicode] = true;
glyphs.push({ unicode: unicode, code: cid });
ids.push(i);
} }
cmap.data = createCMapTable(glyphs); cmap.data = createCMapTable(glyphs, ids);
} else { } else {
replaceCMapTable(cmap, font, properties); var cmapTable = readCMapTable(cmap, font);
var glyphs = cmapTable.glyphs;
var ids = cmapTable.ids;
var hasShortCmap = !!cmapTable.hasShortCmap;
var toUnicode = this.toUnicode;
if (hasShortCmap && toUnicode) {
// checking if cmap is just identity map
var isIdentity = true;
for (var i = 0, ii = glyphs.length; i < ii; i++) {
if (glyphs[i].unicode != i + 1) {
isIdentity = false;
break;
}
}
// if it is, replacing with meaningful toUnicode values
if (isIdentity) {
for (var i = 0, ii = glyphs.length; i < ii; i++) {
var unicode = toUnicode[i + 1] || i + 1;
glyphs[i].unicode = unicode;
}
this.useToUnicode = true;
}
}
properties.hasShortCmap = hasShortCmap;
createGlyphNameMap(glyphs, ids, properties);
this.glyphNameMap = properties.glyphNameMap; this.glyphNameMap = properties.glyphNameMap;
cmap.data = createCMapTable(glyphs, ids);
} }
// Rewrite the 'post' table if needed // Rewrite the 'post' table if needed
@ -1812,6 +1843,14 @@ var Font = (function Font() {
} }
properties.baseEncoding = encoding; properties.baseEncoding = encoding;
} }
if (properties.subtype == 'CIDFontType0C') {
var toUnicode = [];
for (var i = 0; i < charstrings.length; ++i) {
var charstring = charstrings[i];
toUnicode[charstring.code] = charstring.unicode;
}
this.toUnicode = toUnicode;
}
var fields = { var fields = {
// PostScript Font Program // PostScript Font Program
@ -1872,8 +1911,11 @@ var Font = (function Font() {
// Horizontal metrics // Horizontal metrics
'hmtx': (function fontFieldsHmtx() { 'hmtx': (function fontFieldsHmtx() {
var hmtx = '\x00\x00\x00\x00'; // Fake .notdef var hmtx = '\x00\x00\x00\x00'; // Fake .notdef
for (var i = 0, ii = charstrings.length; i < ii; i++) for (var i = 0, ii = charstrings.length; i < ii; i++) {
hmtx += string16(charstrings[i].width) + string16(0); var charstring = charstrings[i];
var width = 'width' in charstring ? charstring.width : 0;
hmtx += string16(width) + string16(0);
}
return stringToArray(hmtx); return stringToArray(hmtx);
})(), })(),
@ -1903,20 +1945,22 @@ var Font = (function Font() {
}, },
rebuildToUnicode: function font_rebuildToUnicode(properties) { rebuildToUnicode: function font_rebuildToUnicode(properties) {
var firstChar = properties.firstChar, lastChar = properties.lastChar;
var map = []; var map = [];
if (properties.composite) { if (properties.composite) {
for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) { var isIdentityMap = this.cidToUnicode.length == 0;
for (var i = firstChar, ii = lastChar; i <= ii; i++) {
// TODO missing map the character according font's CMap // TODO missing map the character according font's CMap
var cid = i; var cid = i;
map[i] = this.cidToUnicode[cid]; map[i] = isIdentityMap ? cid : this.cidToUnicode[cid];
} }
} else { } else {
for (var i = properties.firstChar, ii = properties.lastChar; i <= ii; i++) { for (var i = firstChar, ii = lastChar; i <= ii; i++) {
var glyph = properties.differences[i]; var glyph = properties.differences[i];
if (!glyph) if (!glyph)
glyph = properties.baseEncoding[i]; glyph = properties.baseEncoding[i];
if (!!glyph && (glyph in GlyphsUnicode)) if (!!glyph && (glyph in GlyphsUnicode))
map[i] = GlyphsUnicode[glyph] map[i] = GlyphsUnicode[glyph];
} }
} }
this.toUnicode = map; this.toUnicode = map;
@ -1926,16 +1970,12 @@ var Font = (function Font() {
}, },
loadCidToUnicode: function font_loadCidToUnicode(properties) { loadCidToUnicode: function font_loadCidToUnicode(properties) {
if (properties.cidToGidMap) {
this.cidToUnicode = properties.cidToGidMap;
return;
}
if (!properties.cidSystemInfo) if (!properties.cidSystemInfo)
return; return;
var cidToUnicodeMap = []; var cidToUnicodeMap = [], unicodeToCIDMap = [];
this.cidToUnicode = cidToUnicodeMap; this.cidToUnicode = cidToUnicodeMap;
this.unicodeToCID = unicodeToCIDMap;
var cidSystemInfo = properties.cidSystemInfo; var cidSystemInfo = properties.cidSystemInfo;
var cidToUnicode; var cidToUnicode;
@ -1947,28 +1987,34 @@ var Font = (function Font() {
if (!cidToUnicode) if (!cidToUnicode)
return; // identity encoding return; // identity encoding
var glyph = 1, i, j, k, ii; var cid = 1, i, j, k, ii;
for (i = 0, ii = cidToUnicode.length; i < ii; ++i) { for (i = 0, ii = cidToUnicode.length; i < ii; ++i) {
var unicode = cidToUnicode[i]; var unicode = cidToUnicode[i];
if (isArray(unicode)) { if (isArray(unicode)) {
var length = unicode.length; var length = unicode.length;
for (j = 0; j < length; j++) for (j = 0; j < length; j++) {
cidToUnicodeMap[unicode[j]] = glyph; cidToUnicodeMap[cid] = unicode[j];
glyph++; unicodeToCIDMap[unicode[j]] = cid;
}
cid++;
} else if (typeof unicode === 'object') { } else if (typeof unicode === 'object') {
var fillLength = unicode.f; var fillLength = unicode.f;
if (fillLength) { if (fillLength) {
k = unicode.c; k = unicode.c;
for (j = 0; j < fillLength; ++j) { for (j = 0; j < fillLength; ++j) {
cidToUnicodeMap[k] = glyph++; cidToUnicodeMap[cid] = k;
unicodeToCIDMap[k] = cid;
cid++;
k++; k++;
} }
} else } else
glyph += unicode.s; cid += unicode.s;
} else if (unicode) { } else if (unicode) {
cidToUnicodeMap[unicode] = glyph++; cidToUnicodeMap[cid] = unicode;
unicodeToCIDMap[unicode] = cid;
cid++;
} else } else
glyph++; cid++;
} }
}, },
@ -2008,19 +2054,19 @@ var Font = (function Font() {
switch (this.type) { switch (this.type) {
case 'CIDFontType0': case 'CIDFontType0':
if (this.noUnicodeAdaptation) { if (this.noUnicodeAdaptation) {
width = this.widths[this.cidToUnicode[charcode]]; width = this.widths[this.unicodeToCID[charcode] || charcode];
unicode = charcode; unicode = charcode;
break; break;
} }
unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); unicode = this.toUnicode[charcode] || charcode;
break; break;
case 'CIDFontType2': case 'CIDFontType2':
if (this.noUnicodeAdaptation) { if (this.noUnicodeAdaptation) {
width = this.widths[this.cidToUnicode[charcode]]; width = this.widths[this.unicodeToCID[charcode] || charcode];
unicode = charcode; unicode = charcode;
break; break;
} }
unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); unicode = this.toUnicode[charcode] || charcode;
break; break;
case 'Type1': case 'Type1':
var glyphName = this.differences[charcode] || this.encoding[charcode]; var glyphName = this.differences[charcode] || this.encoding[charcode];
@ -2031,7 +2077,7 @@ var Font = (function Font() {
break; break;
} }
unicode = this.glyphNameMap[glyphName] || unicode = this.glyphNameMap[glyphName] ||
adaptUnicode(GlyphsUnicode[glyphName] || charcode); GlyphsUnicode[glyphName] || charcode;
break; break;
case 'Type3': case 'Type3':
var glyphName = this.differences[charcode] || this.encoding[charcode]; var glyphName = this.differences[charcode] || this.encoding[charcode];
@ -2049,16 +2095,16 @@ var Font = (function Font() {
break; break;
} }
if (!this.hasEncoding) { if (!this.hasEncoding) {
unicode = adaptUnicode(charcode); unicode = this.useToUnicode ? this.toUnicode[charcode] : charcode;
break; break;
} }
if (this.hasShortCmap) { if (this.hasShortCmap && false) {
var j = Encodings.MacRomanEncoding.indexOf(glyphName); var j = Encodings.MacRomanEncoding.indexOf(glyphName);
unicode = j >= 0 && !isSpecialUnicode(j) ? j : unicode = j >= 0 ? j :
this.glyphNameMap[glyphName]; this.glyphNameMap[glyphName];
} else { } else {
unicode = glyphName in GlyphsUnicode ? unicode = glyphName in GlyphsUnicode ?
adaptUnicode(GlyphsUnicode[glyphName]) : GlyphsUnicode[glyphName] :
this.glyphNameMap[glyphName]; this.glyphNameMap[glyphName];
} }
break; break;
@ -2068,12 +2114,8 @@ var Font = (function Font() {
} }
var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode; var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode;
if (typeof unicodeChars === 'number') { if (typeof unicodeChars === 'number')
unicodeChars = (unicodeChars >= 0x10000) ? unicodeChars = String.fromCharCode(unicodeChars);
String.fromCharCode(0xD800 | ((unicodeChars - 0x10000) >> 10),
0xDC00 | (unicodeChars & 0x3FF)) : String.fromCharCode(unicodeChars);
// TODO we probably don't need convert high/low surrogate... keeping for now
}
return { return {
fontChar: String.fromCharCode(unicode), fontChar: String.fromCharCode(unicode),
@ -2790,22 +2832,13 @@ CFF.prototype = {
getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs, getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs,
properties) { properties) {
var charstrings = []; var charstrings = [];
var reverseMapping = {};
var encoding = properties.baseEncoding;
var i, length, glyphName; var i, length, glyphName;
for (i = 0, length = encoding.length; i < length; ++i) {
glyphName = encoding[i];
if (!glyphName || isSpecialUnicode(i))
continue;
reverseMapping[glyphName] = i;
}
reverseMapping['.notdef'] = 0;
var unusedUnicode = kCmapGlyphOffset; var unusedUnicode = kCmapGlyphOffset;
for (i = 0, length = glyphs.length; i < length; i++) { for (i = 0, length = glyphs.length; i < length; i++) {
var item = glyphs[i]; var item = glyphs[i];
var glyphName = item.glyph; var glyphName = item.glyph;
var unicode = glyphName in reverseMapping ? var unicode = glyphName in GlyphsUnicode ?
reverseMapping[glyphName] : unusedUnicode++; GlyphsUnicode[glyphName] : unusedUnicode++;
charstrings.push({ charstrings.push({
glyph: glyphName, glyph: glyphName,
unicode: unicode, unicode: unicode,
@ -3092,16 +3125,14 @@ var Type2CFF = (function type2CFF() {
} }
var charStrings = this.parseIndex(topDict.CharStrings); var charStrings = this.parseIndex(topDict.CharStrings);
var charset = this.parseCharsets(topDict.charset,
charStrings.length, strings);
var encoding = this.parseEncoding(topDict.Encoding, properties,
strings, charset);
var charset, encoding; var charset, encoding;
var isCIDFont = properties.subtype == 'CIDFontType0C'; var isCIDFont = properties.subtype == 'CIDFontType0C';
if (isCIDFont) { if (isCIDFont) {
charset = []; charset = ['.notdef'];
charset.length = charStrings.length; for (var i = 1, ii = charStrings.length; i < ii; ++i)
charset.push('glyph' + i);
encoding = this.parseCidMap(topDict.charset, encoding = this.parseCidMap(topDict.charset,
charStrings.length); charStrings.length);
} else { } else {
@ -3170,38 +3201,44 @@ var Type2CFF = (function type2CFF() {
var charstrings = []; var charstrings = [];
var unicodeUsed = []; var unicodeUsed = [];
var unassignedUnicodeItems = []; var unassignedUnicodeItems = [];
var inverseEncoding = [];
for (var charcode in encoding)
inverseEncoding[encoding[charcode]] = charcode | 0;
for (var i = 0, ii = charsets.length; i < ii; i++) { for (var i = 0, ii = charsets.length; i < ii; i++) {
var glyph = charsets[i]; var glyph = charsets[i];
var encodingFound = false; if (glyph == '.notdef') {
for (var charcode in encoding) { charstrings.push({
if (encoding[charcode] == i) { unicode: 0,
var code = charcode | 0; code: 0,
charstrings.push({ gid: i,
unicode: adaptUnicode(code), glyph: glyph
code: code, });
gid: i, continue;
glyph: glyph
});
unicodeUsed[code] = true;
encodingFound = true;
break;
}
} }
if (!encodingFound) { var code = inverseEncoding[i];
if (!code || isSpecialUnicode(code)) {
unassignedUnicodeItems.push(i); unassignedUnicodeItems.push(i);
continue;
} }
charstrings.push({
unicode: code,
code: code,
gid: i,
glyph: glyph
});
unicodeUsed[code] = true;
} }
var nextUnusedUnicode = 0x21; var nextUnusedUnicode = kCmapGlyphOffset;
for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) { for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) {
var i = unassignedUnicodeItems[j]; var i = unassignedUnicodeItems[j];
// giving unicode value anyway // giving unicode value anyway
while (unicodeUsed[nextUnusedUnicode]) while (nextUnusedUnicode in unicodeUsed)
nextUnusedUnicode++; nextUnusedUnicode++;
var code = nextUnusedUnicode++; var unicode = nextUnusedUnicode++;
charstrings.push({ charstrings.push({
unicode: adaptUnicode(code), unicode: unicode,
code: code, code: inverseEncoding[i] || 0,
gid: i, gid: i,
glyph: charsets[i] glyph: charsets[i]
}); });