Add more glue between glyph mapping and code mapping

This commit is contained in:
Vivien Nicolas 2011-09-08 03:16:33 +02:00
parent 92081af896
commit 567be29720
2 changed files with 71 additions and 56 deletions

View File

@ -711,7 +711,7 @@ var Font = (function Font() {
var encoding = properties.encoding;
for (var index in encoding) {
var code = encoding[index];
var code = encoding[index].unicode;
if (firstCharIndex > code || !firstCharIndex)
firstCharIndex = code;
if (lastCharIndex < code)
@ -970,15 +970,9 @@ var Font = (function Font() {
if (index) {
deltas.push(index);
var code = encoding[index];
for (var glyph in properties.glyphs) {
if (properties.glyphs[glyph] == code)
break;
}
var unicode = j + kCmapGlyphOffset;
properties.glyphs[glyph] = encoding[j] = unicode;
glyphs.push({ glyph: glyph, unicode: unicode });
encoding[j].unicode = unicode;
glyphs.push({ unicode: unicode });
}
}
@ -1023,8 +1017,10 @@ var Font = (function Font() {
var start = denseRange[0];
var end = denseRange[1];
var index = firstCode;
for (var j = start; j <= end; j++)
encoding[index++] = glyphs[j - firstCode - 1].unicode;
for (var j = start; j <= end; j++) {
var code = j - firstCode - 1;
encoding[index++] = { unicode: glyphs[code].unicode };
}
return cmap.data = createCMapTable(glyphs);
}
}
@ -1118,23 +1114,6 @@ var Font = (function Font() {
// U+00AD (soft hyphen) is not drawn.
// So, offset all the glyphs by 0xFF to avoid these cases and use
// the encoding to map incoming characters to the new glyph positions
var glyphs = [];
var encoding = properties.encoding;
for (var i = 1; i < numGlyphs; i++)
glyphs.push({ unicode: i + kCmapGlyphOffset });
if ('undefined' == typeof(encoding[0])) {
// the font is directly characters to glyphs with no encoding
// so create an identity encoding
for (i = 0; i < numGlyphs; i++)
encoding[i] = i + kCmapGlyphOffset;
} else {
for (var code in encoding)
encoding[code] += kCmapGlyphOffset;
}
if (!cmap) {
cmap = {
tag: 'cmap',
@ -1142,6 +1121,21 @@ var Font = (function Font() {
};
tables.push(cmap);
}
var encoding = properties.encoding;
if (!encoding[0]) {
// the font is directly characters to glyphs with no encoding
// so create an identity encoding
for (i = 0; i < numGlyphs; i++)
encoding[i] = { unicode: i + kCmapGlyphOffset };
} else {
for (var code in encoding)
encoding[code].unicode += kCmapGlyphOffset;
}
var glyphs = [];
for (var i = 1; i < numGlyphs; i++)
glyphs.push({ unicode: i + kCmapGlyphOffset });
cmap.data = createCMapTable(glyphs);
} else {
replaceCMapTable(cmap, font, properties);
@ -1361,14 +1355,14 @@ var Font = (function Font() {
// loop should never end on the last byte
for (var i = 0; i < length; i++) {
var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
var unicode = encoding[charcode];
var unicode = encoding[charcode].unicode;
str += String.fromCharCode(unicode);
}
}
else {
for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];
var unicode = encoding[charcode].unicode;
if ('undefined' == typeof(unicode)) {
warn('Unencoded charcode ' + charcode);
unicode = charcode;
@ -1376,7 +1370,7 @@ var Font = (function Font() {
// Check if the glyph has already been converted
if (!IsNum(unicode))
unicode = encoding[charcode] = this.glyphs[unicode];
unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode;
// Handle surrogate pairs
if (unicode > 0xFFFF) {
@ -1830,8 +1824,8 @@ var Type1Parser = function() {
var glyph = getToken();
if ('undefined' == typeof(properties.differences[index])) {
properties.encoding[index] = glyph;
properties.glyphs[glyph] = GlyphsUnicode[glyph] || index;
var mapping = { unicode: GlyphsUnicode[glyph] || j };
properties.glyphs[glyph] = properties.encoding[index] = mapping;
}
getToken(); // read the in 'put'
}
@ -2000,14 +1994,14 @@ CFF.prototype = {
for (var i = 0; i < glyphs.length; i++) {
var glyph = glyphs[i];
var unicode = properties.glyphs[glyph.glyph];
if (!unicode) {
var mapping = properties.glyphs[glyph.glyph];
if (!mapping) {
if (glyph.glyph != '.notdef')
missings.push(glyph.glyph);
} else {
charstrings.push({
glyph: glyph.glyph,
unicode: unicode,
unicode: mapping.unicode,
charstring: glyph.data,
width: glyph.width,
lsb: glyph.lsb
@ -2340,17 +2334,24 @@ var Type2CFF = (function() {
}
}
if (code == -1)
index = code = properties.glyphs[glyph] || index;
if (code == -1) {
var mapping = properties.glyphs[glyph] || {};
index = code = mapping.unicode || index;
}
var width = widths[code] || defaultWidth;
if (code <= 0x1f || (code >= 127 && code <= 255))
code += kCmapGlyphOffset;
properties.encoding[index] = code;
properties.glyphs[glyph] = properties.encoding[index] = {
unicode: code,
width: width
};
charstrings.push({
unicode: code,
width: width, gid: i
width: width,
gid: i
});
index++;
}

46
pdf.js
View File

@ -4194,13 +4194,19 @@ var PartialEvaluator = (function() {
var glyphsData = glyphsStream.getBytes(0);
// Glyph ids are big-endian 2-byte values
// Set this to 0 to verify the font has an encoding.
var encoding = properties.encoding;
encoding[0] = 0;
// Set encoding 0 to later verify the font has an encoding
encoding[0] = { unicode: 0 };
for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
if (glyphID != 0)
encoding[j >> 1] = glyphID;
if (glyphID == 0)
continue;
encoding[j >> 1] = {
unicode: glyphID,
width: 0
};
}
} else if (type == 'CIDFontType0') {
var encoding = xref.fetchIfRef(dict.get('Encoding'));
@ -4269,7 +4275,10 @@ var PartialEvaluator = (function() {
var glyph = differences[i] || baseEncoding[i];
if (glyph) {
var index = GlyphsUnicode[glyph] || i;
glyphs[glyph] = map[i] = index;
glyphs[glyph] = map[i] = {
unicode: index,
width: properties.widths[i - firstChar] || properties.defaultWidth
};
// If there is no file, the character mapping can't be modified
// but this is unlikely that there is any standard encoding with
@ -4278,7 +4287,7 @@ var PartialEvaluator = (function() {
continue;
if (index <= 0x1f || (index >= 127 && index <= 255))
glyphs[glyph] = map[i] += kCmapGlyphOffset;
map[i].unicode += kCmapGlyphOffset;
}
}
@ -4316,7 +4325,10 @@ var PartialEvaluator = (function() {
var endRange = tokens[j + 1];
var code = tokens[j + 2];
while (startRange < endRange) {
map[startRange] = code++;
map[startRange] = {
unicode: code++,
width: 0
}
++startRange;
}
}
@ -4327,7 +4339,10 @@ var PartialEvaluator = (function() {
for (var j = 0; j < tokens.length; j += 2) {
var index = tokens[j];
var code = tokens[j + 1];
map[index] = code;
map[index] = {
unicode: code,
width: 0
};
}
break;
@ -4478,19 +4493,18 @@ var PartialEvaluator = (function() {
descent: descriptor.get('Descent'),
xHeight: descriptor.get('XHeight'),
capHeight: descriptor.get('CapHeight'),
defaultWidth: descriptor.get('MissingWidth') || 0,
flags: descriptor.get('Flags'),
italicAngle: descriptor.get('ItalicAngle'),
differences: [],
widths: [],
widths: (function() {
var glyphWidths = {};
for (var i = 0; i <= widths.length; i++)
glyphWidths[firstChar++] = widths[i];
return glyphWidths;
})(),
encoding: {}
};
// XXX Encoding and Glyphs should point to the same object so it will
// be hard to be out of sync. The object could contains the unicode and
// the width of the glyph.
for (var i = 0; i <= widths.length; i++)
properties.widths[firstChar++] = widths[i];
properties.glyphs = this.extractEncoding(dict, xref, properties);
return {