Merge pull request #339 from ironymark/master

Fix Type 2 CID Font Encoding
This commit is contained in:
Chris Jones 2011-08-19 20:29:32 -07:00
commit 5d071fffcd
4 changed files with 618 additions and 39 deletions

View File

@ -789,6 +789,8 @@ var Font = (function Font() {
encoding: null,
checkAndRepair: function font_checkAndRepair(name, font, properties) {
var kCmapGlyphOffset = 0xFF;
function readTableEntry(file) {
// tag
var tag = file.getBytes(4);
@ -1027,26 +1029,27 @@ var Font = (function Font() {
if (properties.type == 'CIDFontType2') {
// Type2 composite fonts map characters directly to glyphs so the cmap
// table must be replaced.
// canvas fillText will reencode some characters even if the font has a
// glyph at that position - e.g. newline is converted to a space and U+00AD
// (soft hypen) is not drawn.
// So, offset all the glyphs by 0xFF to avoid these cases and use
// the encoding to map incoming characters to the new glyph positions
var glyphs = [];
var charset = properties.charset;
if (!charset.length) {
// Type2 composite fonts map characters directly to glyphs so the cmap
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({
unicode: i
});
}
} else {
for (var i = 1; i < charset.length; i++) {
var index = charset.indexOf(i);
if (index == -1)
break;
var encoding = properties.encoding;
glyphs.push({
unicode: index
});
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({ unicode: i + kCmapGlyphOffset });
}
if ('undefined' == typeof(encoding[0])) {
// the font is directly characters to glyphs with no encoding
// so create an identity encoding
for (i = 0; i < numGlyphs; i++)
encoding[i] = i + kCmapGlyphOffset;
} else {
for (var i in encoding)
encoding[i] = encoding[i] + kCmapGlyphOffset;
}
if (!cmap) {
@ -1260,31 +1263,26 @@ var Font = (function Font() {
if (!charsCache)
charsCache = this.charsCache = Object.create(null);
if (this.compositeFont) {
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte XXX assuming CIDFonts are two-byte - later
// need to extract the correct byte encoding according to the PDF spec
str = '';
var multiByteStr = '';
var length = chars.length;
for (var i = 0; i < length; i++) {
var byte1 = chars.charCodeAt(i++) & 0xFF;
var byte2;
if (i == length)
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1 << 8) | byte2);
}
str = multiByteStr;
}
else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
if (this.compositeFont) {
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte
// XXX assuming CIDFonts are two-byte - later need to extract the
// correct byte encoding according to the PDF spec
var length = chars.length - 1; // looping over two bytes at a time so
// loop should never end on the last byte
for (var i = 0; i < length; i++) {
var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
var unicode = encoding[charcode];
str += String.fromCharCode(unicode);
}
}
else {
for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];

7
pdf.js
View File

@ -4028,14 +4028,15 @@ var PartialEvaluator = (function() {
if (subType.name == 'CIDFontType2') {
var cidToGidMap = descendant.get('CIDToGIDMap');
if (cidToGidMap && IsRef(cidToGidMap)) {
// Extract the charset from the CIDToGIDMap
// Extract the encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0);
var i = 0;
// Glyph ids are big-endian 2-byte values
encodingMap[0] = 0; //set this to 0 to verify the font has an encoding
for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
charset.push(glyphID);
if (glyphID != 0)
encodingMap[j>>1] = glyphID;
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -64,6 +64,11 @@
"rounds": 1,
"type": "load"
},
{ "id": "complexttffont-pdf",
"file": "pdfs/complex_ttf_font.pdf",
"rounds": 1,
"type": "load"
},
{ "id": "i9-pdf",
"file": "pdfs/i9.pdf",
"link": true,