Change the way Type 2 CID fonts are encoded. Move the cmap glyphs above the first 255 unicode values so that fillText does not change them and create an encoding to map characters to the glyphs.

This commit is contained in:
Adil Allawi 2011-08-19 14:04:34 +01:00
parent 170871bbfa
commit 5a528944f0
4 changed files with 616 additions and 39 deletions

View File

@ -1032,26 +1032,27 @@ var Font = (function Font() {
if (properties.type == 'CIDFontType2') {
// Type2 composite fonts map characters directly to glyphs so the cmap
// table must be replaced.
// canvas fillText will reencode some characters even if the font has a
// glyph at that position - e.g. newline is converted to a space and U+00AD
// (soft hypen) is not drawn.
// So, offset all the glyphs by 0xFF to avoid these cases and use
// the encoding to map incoming characters to the new glyph positions
var glyphs = [];
var charset = properties.charset;
if (!charset.length) {
// Type2 composite fonts map characters directly to glyphs so the cmap
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({
unicode: i
});
}
} else {
for (var i = 1; i < charset.length; i++) {
var index = charset.indexOf(i);
if (index == -1)
break;
var encoding = properties.encoding;
glyphs.push({
unicode: index
});
}
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({ unicode: i + 0xFF });
}
if ('undefined' == typeof(encoding[0])) {
// the font is directly characters to glyphs with no encoding
// so create an identity encoding
for (i = 0; i < numGlyphs; i++)
encoding[i] = i + 0xFF;
} else {
for (var i in encoding)
encoding[i] = encoding[i] + 0xFF;
}
if (!cmap) {
@ -1274,31 +1275,26 @@ var Font = (function Font() {
if (!charsCache)
charsCache = this.charsCache = Object.create(null);
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
if (this.compositeFont) {
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte XXX assuming CIDFonts are two-byte - later
// need to extract the correct byte encoding according to the PDF spec
str = '';
var multiByteStr = '';
var length = chars.length;
// single-byte to multi-byte
// XXX assuming CIDFonts are two-byte - later need to extract the
// correct byte encoding according to the PDF spec
var length = chars.length - 1; // looping over two bytes at a time so
// loop should never end on the last byte
for (var i = 0; i < length; i++) {
var byte1 = chars.charCodeAt(i++) & 0xFF;
var byte2;
if (i == length)
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1 << 8) | byte2);
var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
var unicode = encoding[charcode];
str += String.fromCharCode(unicode);
}
str = multiByteStr;
}
else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];

7
pdf.js
View File

@ -4028,14 +4028,15 @@ var PartialEvaluator = (function() {
if (subType.name == 'CIDFontType2') {
var cidToGidMap = descendant.get('CIDToGIDMap');
if (cidToGidMap && IsRef(cidToGidMap)) {
// Extract the charset from the CIDToGIDMap
// Extract the encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0);
var i = 0;
// Glyph ids are big-endian 2-byte values
encodingMap[0] = 0; //set this to 0 to verify the font has an encoding
for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
charset.push(glyphID);
if (glyphID != 0)
encodingMap[j>>1] = glyphID;
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -64,6 +64,11 @@
"rounds": 1,
"type": "load"
},
{ "id": "complexttffont-pdf",
"file": "pdfs/complex_ttf_font.pdf",
"rounds": 1,
"type": "load"
},
{ "id": "i9-pdf",
"file": "pdfs/i9.pdf",
"link": true,