Clean up a bit the encoding/charset/glyphs dance

This commit is contained in:
Vivien Nicolas 2011-08-30 00:56:02 +02:00
parent 57b3d28f8f
commit 95ccb38283
2 changed files with 71 additions and 122 deletions

104
fonts.js
View File

@ -638,30 +638,28 @@ var Font = (function Font() {
var ulUnicodeRange3 = 0;
var ulUnicodeRange4 = 0;
var charset = properties.charset;
if (charset && charset.length) {
var firstCharIndex = null;
var lastCharIndex = 0;
var firstCharIndex = null;
var lastCharIndex = 0;
for (var i = 0; i < charset.length; i++) {
var code = GlyphsUnicode[charset[i]];
if (firstCharIndex > code || !firstCharIndex)
firstCharIndex = code;
if (lastCharIndex < code)
lastCharIndex = code;
var encoding = properties.encoding;
for (var index in encoding) {
var code = encoding[index];
if (firstCharIndex > code || !firstCharIndex)
firstCharIndex = code;
if (lastCharIndex < code)
lastCharIndex = code;
var position = getUnicodeRangeFor(code);
if (position < 32) {
ulUnicodeRange1 |= 1 << position;
} else if (position < 64) {
ulUnicodeRange2 |= 1 << position - 32;
} else if (position < 96) {
ulUnicodeRange3 |= 1 << position - 64;
} else if (position < 123) {
ulUnicodeRange4 |= 1 << position - 96;
} else {
error('Unicode ranges Bits > 123 are reserved for internal usage');
}
var position = getUnicodeRangeFor(code);
if (position < 32) {
ulUnicodeRange1 |= 1 << position;
} else if (position < 64) {
ulUnicodeRange2 |= 1 << position - 32;
} else if (position < 96) {
ulUnicodeRange3 |= 1 << position - 64;
} else if (position < 123) {
ulUnicodeRange4 |= 1 << position - 96;
} else {
error('Unicode ranges Bits > 123 are reserved for internal usage');
}
}
@ -847,7 +845,6 @@ var Font = (function Font() {
}
var encoding = properties.encoding;
var charset = properties.charset;
for (var i = 0; i < numRecords; i++) {
var table = records[i];
font.pos = start + table.offset;
@ -856,7 +853,9 @@ var Font = (function Font() {
var length = int16(font.getBytes(2));
var language = int16(font.getBytes(2));
if (format == 0) {
if (format == 4) {
return;
} else if (format == 0) {
// Characters below 0x20 are controls characters that are hardcoded
// into the platform so if some characters in the font are assigned
// under this limit they will not be displayed so let's rewrite the
@ -871,35 +870,15 @@ var Font = (function Font() {
}
}
var rewrite = false;
for (var code in encoding) {
if (code < 0x20 && encoding[code])
rewrite = true;
if (rewrite)
encoding[code] = parseInt(code) + 0x1F;
}
if (rewrite) {
if (properties.firstChar < 0x20)
var code = 0;
for (var j = 0; j < glyphs.length; j++) {
var glyph = glyphs[j];
glyphs[j].unicode += 0x1F;
}
properties.glyphs[glyph.glyph] = encoding[++code] = glyph.unicode;
}
cmap.data = createCMapTable(glyphs, deltas);
} else if (format == 6 && numRecords == 1 && !encoding.empty) {
// Format 0 alone is not allowed by the sanitizer so let's rewrite
// that to a 3-1-4 Unicode BMP table
TODO('Use an other source of informations than ' +
'charset here, it is not reliable');
var glyphs = [];
for (var j = 0; j < charset.length; j++) {
glyphs.push({
unicode: GlyphsUnicode[charset[j]] || 0
});
}
cmap.data = createCMapTable(glyphs);
} else if (format == 6 && numRecords == 1) {
return cmap.data = createCMapTable(glyphs, deltas);
} else if (format == 6) {
// Format 6 is a 2-bytes dense mapping, which means the font data
// lives glue together even if they are pretty far in the unicode
// table. (This looks weird, so I can have missed something), this
@ -912,6 +891,8 @@ var Font = (function Font() {
var min = 0xffff, max = 0;
for (var j = 0; j < entryCount; j++) {
var charcode = int16(font.getBytes(2));
if (!charcode)
continue;
glyphs.push(charcode);
if (charcode < min)
@ -939,7 +920,7 @@ var Font = (function Font() {
var index = firstCode;
for (var j = start; j <= end; j++)
encoding[index++] = glyphs[j - firstCode - 1].unicode;
cmap.data = createCMapTable(glyphs);
return cmap.data = createCMapTable(glyphs);
}
}
};
@ -1288,10 +1269,6 @@ var Font = (function Font() {
unicode = charcode;
}
// Check if the glyph has already been converted
if (!IsNum(unicode))
unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
// Handle surrogate pairs
if (unicode > 0xFFFF) {
str += String.fromCharCode(unicode & 0xFFFF);
@ -1715,9 +1692,6 @@ var Type1Parser = function() {
properties.textMatrix = matrix;
break;
case '/Encoding':
if (!properties.builtInEncoding)
break;
var size = parseInt(getToken());
getToken(); // read in 'array'
@ -1726,9 +1700,12 @@ var Type1Parser = function() {
if (token == 'dup') {
var index = parseInt(getToken());
var glyph = getToken();
properties.encoding[index] = GlyphsUnicode[glyph];
if (!properties.differences[j]) {
var code = GlyphsUnicode[glyph];
properties.glyphs[glyph] = properties.encoding[index] = code;
}
getToken(); // read the in 'put'
j = index;
}
}
break;
@ -1903,7 +1880,7 @@ CFF.prototype = {
missings.push(glyph.glyph);
} else {
charstrings.push({
glyph: glyph,
glyph: glyph.glyph,
unicode: unicode,
charstring: glyph.data,
width: glyph.width,
@ -2079,7 +2056,7 @@ CFF.prototype = {
var count = glyphs.length;
for (var i = 0; i < count; i++) {
var index = CFFStrings.indexOf(charstrings[i].glyph.glyph);
var index = CFFStrings.indexOf(charstrings[i].glyph);
// Some characters like asterikmath && circlecopyrt are
// missing from the original strings, for the moment let's
// map them to .notdef and see later if it cause any
@ -2176,7 +2153,6 @@ var Type2CFF = (function() {
var stringIndex = this.parseIndex(dictIndex.endPos);
var gsubrIndex = this.parseIndex(stringIndex.endPos);
var strings = this.getStrings(stringIndex);
var baseDict = this.parseDict(dictIndex.get(0));
@ -2219,7 +2195,7 @@ var Type2CFF = (function() {
var charstrings = [];
for (var i = 0, ii = charsets.length; i < ii; ++i) {
var charName = charsets[i];
var charCode = GlyphsUnicode[charName];
var charCode = properties.glyphs[charName];
if (charCode) {
var width = widths[charCode] || defaultWidth;
charstrings.push({unicode: charCode, width: width, gid: i});

89
pdf.js
View File

@ -4199,8 +4199,6 @@ var PartialEvaluator = (function() {
var builtInEncoding = false;
var encodingMap = {};
var glyphMap = {};
var charset = [];
if (compositeFont) {
// Special CIDFont support
// XXX only CIDFontType2 supported for now
@ -4242,69 +4240,61 @@ var PartialEvaluator = (function() {
if (fontDict.has('Encoding')) {
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsDict(encoding)) {
// Build a map of between codes and glyphs
// Load the base encoding
var baseName = encoding.get('BaseEncoding');
if (baseName) {
if (baseName)
baseEncoding = Encodings[baseName.name].slice();
}
// Load the differences between the base and original
var differences = encoding.get('Differences');
var index = 0;
for (var j = 0; j < differences.length; j++) {
var data = differences[j];
if (IsNum(data)) {
if (IsNum(data))
index = data;
} else {
else
diffEncoding[index++] = data.name;
}
}
} else if (IsName(encoding)) {
baseEncoding = Encodings[encoding.name].slice();
} else {
error("Encoding is not a Name nor a Dict");
}
}
var fontType = subType.name;
if (!baseEncoding) {
var type = subType.name;
if (type == 'TrueType') {
baseEncoding = Encodings.WinAnsiEncoding.slice();
} else if (type == 'Type1') {
baseEncoding = Encodings.StandardEncoding.slice();
if (!diffEncoding.length)
builtInEncoding = true;
} else {
error('Unknown type of font');
switch (fontType) {
case 'TrueType':
baseEncoding = Encodings.WinAnsiEncoding.slice();
break;
case 'Type1':
baseEncoding = Encodings.StandardEncoding.slice();
break;
default:
warn('Unknown type of font: ' + fontType);
break;
}
}
// firstChar and width are required
// (except for 14 standard fonts)
var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0;
var lastChar = xref.fetchIfRef(fontDict.get('LastChar')) || 0;
var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
// merge in the differences
var length = baseEncoding.length > diffEncoding.length ?
baseEncoding.length : diffEncoding.length;
for (var i = 0, ii = length; i < ii; ++i) {
var diffGlyph = diffEncoding[i];
var baseGlyph = baseEncoding[i];
if (diffGlyph) {
glyphMap[i] = diffGlyph;
encodingMap[i] = GlyphsUnicode[diffGlyph];
} else if (baseGlyph) {
glyphMap[i] = baseGlyph;
encodingMap[i] = GlyphsUnicode[baseGlyph];
}
var glyphsMap = {};
for (var i = firstChar; i <= lastChar; i++) {
var glyph = diffEncoding[i] || baseEncoding[i];
if (glyph)
glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph];
}
if (fontDict.has('ToUnicode')) {
encodingMap['empty'] = true;
var glyphsMap = {};
for (var p in glyphMap)
glyphsMap[glyphMap[p]] = encodingMap[p];
if (fontDict.has('ToUnicode') && differences) {
var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode'));
if (IsName(cmapObj)) {
error('ToUnicode file cmap translation not implemented');
} else if (IsStream(cmapObj)) {
var firstChar = xref.fetchIfRef(fontDict.get('FirstChar'));
var tokens = [];
var token = '';
@ -4334,6 +4324,8 @@ var PartialEvaluator = (function() {
var startRange = parseInt('0x' + tokens[j]);
var endRange = parseInt('0x' + tokens[j + 1]);
var code = parseInt('0x' + tokens[j + 2]);
for (var k = startRange; k < endRange; k++)
encodingMap[k] = code++;
}
break;
@ -4360,15 +4352,6 @@ var PartialEvaluator = (function() {
}
}
}
// firstChar and width are required
// (except for 14 standard fonts)
var firstChar = xref.fetchIfRef(fontDict.get('FirstChar'));
var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
for (var j = 0; j < widths.length; j++) {
if (widths[j])
charset.push(glyphMap[j + firstChar]);
}
}
if (!fd) {
@ -4396,7 +4379,6 @@ var PartialEvaluator = (function() {
}
var descriptor = xref.fetch(fd);
var fontName = fontDict.get('Name');
if (!fontName)
fontName = xref.fetchIfRef(descriptor.get('FontName'));;
@ -4414,14 +4396,6 @@ var PartialEvaluator = (function() {
}
}
if (descriptor.has('CharSet')) {
// Get the font charset if any (meaningful only in Type 1)
charset = descriptor.get('CharSet');
assertWellFormed(IsString(charset), 'invalid charset');
charset = charset.split('/');
charset.shift();
}
var widths = fontDict.get('Widths');
if (widths) {
var glyphWidths = {};
@ -4435,9 +4409,8 @@ var PartialEvaluator = (function() {
subtype: fileType,
widths: glyphWidths,
encoding: encodingMap,
differences: diffEncoding,
glyphs: glyphsMap || GlyphsUnicode,
builtInEncoding: builtInEncoding,
charset: charset,
firstChar: fontDict.get('FirstChar'),
lastChar: fontDict.get('LastChar'),
bbox: descriptor.get('FontBBox'),