Change the way Type 2 CID fonts are encoded. Move the cmap glyphs above the first 255 unicode values so that fillText does not change them and create an encoding to map characters to the glyphs.

This commit is contained in:
Adil Allawi 2011-08-19 14:04:34 +01:00
parent 170871bbfa
commit 5a528944f0
4 changed files with 616 additions and 39 deletions

View File

@ -1032,26 +1032,27 @@ var Font = (function Font() {
if (properties.type == 'CIDFontType2') { if (properties.type == 'CIDFontType2') {
// Type2 composite fonts map characters directly to glyphs so the cmap // Type2 composite fonts map characters directly to glyphs so the cmap
// table must be replaced. // table must be replaced.
// canvas fillText will reencode some characters even if the font has a
// glyph at that position - e.g. newline is converted to a space and U+00AD
// (soft hypen) is not drawn.
// So, offset all the glyphs by 0xFF to avoid these cases and use
// the encoding to map incoming characters to the new glyph positions
var glyphs = []; var glyphs = [];
var charset = properties.charset; var encoding = properties.encoding;
if (!charset.length) {
// Type2 composite fonts map characters directly to glyphs so the cmap
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({
unicode: i
});
}
} else {
for (var i = 1; i < charset.length; i++) {
var index = charset.indexOf(i);
if (index == -1)
break;
glyphs.push({ for (var i = 1; i < numGlyphs; i++) {
unicode: index glyphs.push({ unicode: i + 0xFF });
}); }
}
if ('undefined' == typeof(encoding[0])) {
// the font is directly characters to glyphs with no encoding
// so create an identity encoding
for (i = 0; i < numGlyphs; i++)
encoding[i] = i + 0xFF;
} else {
for (var i in encoding)
encoding[i] = encoding[i] + 0xFF;
} }
if (!cmap) { if (!cmap) {
@ -1274,31 +1275,26 @@ var Font = (function Font() {
if (!charsCache) if (!charsCache)
charsCache = this.charsCache = Object.create(null); charsCache = this.charsCache = Object.create(null);
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
if (this.compositeFont) { if (this.compositeFont) {
// composite fonts have multi-byte strings convert the string from // composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte XXX assuming CIDFonts are two-byte - later // single-byte to multi-byte
// need to extract the correct byte encoding according to the PDF spec // XXX assuming CIDFonts are two-byte - later need to extract the
str = ''; // correct byte encoding according to the PDF spec
var multiByteStr = ''; var length = chars.length - 1; // looping over two bytes at a time so
var length = chars.length; // loop should never end on the last byte
for (var i = 0; i < length; i++) { for (var i = 0; i < length; i++) {
var byte1 = chars.charCodeAt(i++) & 0xFF; var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]);
var byte2; var unicode = encoding[charcode];
if (i == length) str += String.fromCharCode(unicode);
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1 << 8) | byte2);
} }
str = multiByteStr;
} }
else { else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
str = '';
for (var i = 0; i < chars.length; ++i) { for (var i = 0; i < chars.length; ++i) {
var charcode = chars.charCodeAt(i); var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode]; var unicode = encoding[charcode];

7
pdf.js
View File

@ -4028,14 +4028,15 @@ var PartialEvaluator = (function() {
if (subType.name == 'CIDFontType2') { if (subType.name == 'CIDFontType2') {
var cidToGidMap = descendant.get('CIDToGIDMap'); var cidToGidMap = descendant.get('CIDToGIDMap');
if (cidToGidMap && IsRef(cidToGidMap)) { if (cidToGidMap && IsRef(cidToGidMap)) {
// Extract the charset from the CIDToGIDMap // Extract the encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap); var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0); var glyphsData = glyphsStream.getBytes(0);
var i = 0;
// Glyph ids are big-endian 2-byte values // Glyph ids are big-endian 2-byte values
encodingMap[0] = 0; //set this to 0 to verify the font has an encoding
for (var j = 0; j < glyphsData.length; j++) { for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
charset.push(glyphID); if (glyphID != 0)
encodingMap[j>>1] = glyphID;
} }
} }
} }

File diff suppressed because one or more lines are too long

View File

@ -64,6 +64,11 @@
"rounds": 1, "rounds": 1,
"type": "load" "type": "load"
}, },
{ "id": "complexttffont-pdf",
"file": "pdfs/complex_ttf_font.pdf",
"rounds": 1,
"type": "load"
},
{ "id": "i9-pdf", { "id": "i9-pdf",
"file": "pdfs/i9.pdf", "file": "pdfs/i9.pdf",
"link": true, "link": true,