Merge pull request #262 from ironymark/CIDFont

First Pass CIDFont support
This commit is contained in:
Andreas Gal 2011-07-13 15:16:14 -07:00
commit bbc940724f
6 changed files with 244 additions and 99 deletions

253
fonts.js
View File

@ -7,7 +7,7 @@ var isWorker = (typeof window == 'undefined');
/** /**
* Maximum file size of the font. * Maximum file size of the font.
*/ */
var kMaxFontFileSize = 40000; var kMaxFontFileSize = 200000;
/** /**
* Maximum time to wait for a font to be loaded by font-face rules. * Maximum time to wait for a font to be loaded by font-face rules.
@ -389,6 +389,7 @@ var Font = (function() {
var data; var data;
switch (properties.type) { switch (properties.type) {
case 'Type1': case 'Type1':
case 'CIDFontType0':
var cff = new CFF(name, file, properties); var cff = new CFF(name, file, properties);
this.mimetype = 'font/opentype'; this.mimetype = 'font/opentype';
@ -397,6 +398,7 @@ var Font = (function() {
break; break;
case 'TrueType': case 'TrueType':
case 'CIDFontType2':
this.mimetype = 'font/opentype'; this.mimetype = 'font/opentype';
// Repair the TrueType file if it is can be damaged in the point of // Repair the TrueType file if it is can be damaged in the point of
@ -409,9 +411,10 @@ var Font = (function() {
break; break;
} }
this.data = data; this.data = data;
this.type = properties.type;
this.id = Fonts.registerFont(name, data, properties); this.id = Fonts.registerFont(name, data, properties);
this.loadedName = 'pdfFont' + this.id; this.loadedName = 'pdfFont' + this.id;
this.compositeFont = properties.compositeFont;
}; };
function stringToArray(str) { function stringToArray(str) {
@ -679,6 +682,65 @@ var Font = (function() {
'\x00\x00\x00\x00'; // maxMemType1 '\x00\x00\x00\x00'; // maxMemType1
}; };
function createNameTable(name) {
var strings = [
'Original licence', // 0.Copyright
name, // 1.Font family
'Unknown', // 2.Font subfamily (font weight)
'uniqueID', // 3.Unique ID
name, // 4.Full font name
'Version 0.11', // 5.Version
'', // 6.Postscript name
'Unknown', // 7.Trademark
'Unknown', // 8.Manufacturer
'Unknown' // 9.Designer
];
// Mac want 1-byte per character strings while Windows want
// 2-bytes per character, so duplicate the names table
var stringsUnicode = [];
for (var i = 0; i < strings.length; i++) {
var str = strings[i];
var strUnicode = '';
for (var j = 0; j < str.length; j++)
strUnicode += string16(str.charCodeAt(j));
stringsUnicode.push(strUnicode);
}
var names = [strings, stringsUnicode];
var platforms = ['\x00\x01', '\x00\x03'];
var encodings = ['\x00\x00', '\x00\x01'];
var languages = ['\x00\x00', '\x04\x09'];
var namesRecordCount = strings.length * platforms.length;
var nameTable =
'\x00\x00' + // format
string16(namesRecordCount) + // Number of names Record
string16(namesRecordCount * 12 + 6); // Storage
// Build the name records field
var strOffset = 0;
for (var i = 0; i < platforms.length; i++) {
var strs = names[i];
for (var j = 0; j < strs.length; j++) {
var str = strs[j];
var nameRecord =
platforms[i] + // platform ID
encodings[i] + // encoding ID
languages[i] + // language ID
string16(j) + // name ID
string16(str.length) +
string16(strOffset);
nameTable += nameRecord;
strOffset += str.length;
}
}
nameTable += strings.join('') + stringsUnicode.join('');
return nameTable;
}
constructor.prototype = { constructor.prototype = {
name: null, name: null,
font: null, font: null,
@ -811,7 +873,7 @@ var Font = (function() {
// This keep a reference to the CMap and the post tables since they can // This keep a reference to the CMap and the post tables since they can
// be rewritted // be rewritted
var cmap, post; var cmap, post, nameTable, maxp;
var tables = []; var tables = [];
for (var i = 0; i < numTables; i++) { for (var i = 0; i < numTables; i++) {
@ -822,6 +884,10 @@ var Font = (function() {
cmap = table; cmap = table;
else if (table.tag == 'post') else if (table.tag == 'post')
post = table; post = table;
else if (table.tag == 'name')
nameTable = table;
else if (table.tag == 'maxp')
maxp = table;
requiredTables.splice(index, 1); requiredTables.splice(index, 1);
} }
@ -857,7 +923,51 @@ var Font = (function() {
}); });
// Replace the old CMAP table with a shiny new one // Replace the old CMAP table with a shiny new one
replaceCMapTable(cmap, font, properties); if (properties.type == 'CIDFontType2') {
// Type2 composite fonts map characters directly to glyphs so the cmap
// table must be replaced.
var glyphs = [];
var charset = properties.charset;
if (!charset.length) {
// PDF did not contain a GIDMap for the font so create an identity cmap
// First get the number of glyphs from the maxp table
font.pos = (font.start ? font.start : 0) + maxp.length;
var version = int16(font.getBytes(4));
var numGlyphs = int16(font.getBytes(2));
// Now create an identity mapping
for (var i = 1; i < numGlyphs; i++) {
glyphs.push({
unicode: i
});
}
} else {
for (var i = 1; i < charset.length; i++) {
var index = charset.indexOf(i);
if (index != -1) {
glyphs.push({
unicode: index
});
} else {
break;
}
}
}
if (!cmap) {
// Font did not contain a cmap
tables.push({
tag: 'cmap',
data: createCMapTable(glyphs)
})
} else {
cmap.data = createCMapTable(glyphs);
}
} else {
replaceCMapTable(cmap, font, properties);
}
// Rewrite the 'post' table if needed // Rewrite the 'post' table if needed
if (!post) { if (!post) {
@ -867,6 +977,14 @@ var Font = (function() {
}); });
} }
// Rewrite the 'name' table if needed
if (!nameTable) {
tables.push({
tag: 'name',
data: stringToArray(createNameTable(this.name))
});
}
// Tables needs to be written by ascendant alphabetic order // Tables needs to be written by ascendant alphabetic order
tables.sort(function tables_sort(a, b) { tables.sort(function tables_sort(a, b) {
return (a.tag > b.tag) - (a.tag < b.tag); return (a.tag > b.tag) - (a.tag < b.tag);
@ -909,65 +1027,6 @@ var Font = (function() {
}, },
convert: function font_convert(fontName, font, properties) { convert: function font_convert(fontName, font, properties) {
function createNameTable(name) {
var strings = [
'Original licence', // 0.Copyright
name, // 1.Font family
'Unknown', // 2.Font subfamily (font weight)
'uniqueID', // 3.Unique ID
name, // 4.Full font name
'Version 0.11', // 5.Version
'', // 6.Postscript name
'Unknown', // 7.Trademark
'Unknown', // 8.Manufacturer
'Unknown' // 9.Designer
];
// Mac want 1-byte per character strings while Windows want
// 2-bytes per character, so duplicate the names table
var stringsUnicode = [];
for (var i = 0; i < strings.length; i++) {
var str = strings[i];
var strUnicode = '';
for (var j = 0; j < str.length; j++)
strUnicode += string16(str.charCodeAt(j));
stringsUnicode.push(strUnicode);
}
var names = [strings, stringsUnicode];
var platforms = ['\x00\x01', '\x00\x03'];
var encodings = ['\x00\x00', '\x00\x01'];
var languages = ['\x00\x00', '\x04\x09'];
var namesRecordCount = strings.length * platforms.length;
var nameTable =
'\x00\x00' + // format
string16(namesRecordCount) + // Number of names Record
string16(namesRecordCount * 12 + 6); // Storage
// Build the name records field
var strOffset = 0;
for (var i = 0; i < platforms.length; i++) {
var strs = names[i];
for (var j = 0; j < strs.length; j++) {
var str = strs[j];
var nameRecord =
platforms[i] + // platform ID
encodings[i] + // encoding ID
languages[i] + // language ID
string16(j) + // name ID
string16(str.length) +
string16(strOffset);
nameTable += nameRecord;
strOffset += str.length;
}
}
nameTable += strings.join('') + stringsUnicode.join('');
return nameTable;
}
function isFixedPitch(glyphs) { function isFixedPitch(glyphs) {
for (var i = 0; i < glyphs.length - 1; i++) { for (var i = 0; i < glyphs.length - 1; i++) {
if (glyphs[i] != glyphs[i + 1]) if (glyphs[i] != glyphs[i + 1])
@ -1110,44 +1169,66 @@ var Font = (function() {
charsToUnicode: function fonts_chars2Unicode(chars) { charsToUnicode: function fonts_chars2Unicode(chars) {
var charsCache = this.charsCache; var charsCache = this.charsCache;
var str;
// if we translated this string before, just grab it from the cache // if we translated this string before, just grab it from the cache
if (charsCache) { if (charsCache) {
var str = charsCache[chars]; str = charsCache[chars];
if (str) if (str)
return str; return str;
} }
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
// lazily create the translation cache // lazily create the translation cache
if (!charsCache) if (!charsCache)
charsCache = this.charsCache = Object.create(null); charsCache = this.charsCache = Object.create(null);
str = ''; if (this.compositeFont) {
for (var i = 0; i < chars.length; ++i) { // composite fonts have multi-byte strings
var charcode = chars.charCodeAt(i); // convert the string from single-byte to multi-byte
var unicode = encoding[charcode]; // XXX assuming CIDFonts are two-byte - later need to extract the correct byte encoding
if ('undefined' == typeof(unicode)) { // according to the PDF spec
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf str = '';
// at the moment, for unknown reasons. var multiByteStr = "";
warn('Unencoded charcode '+ charcode); var length = chars.length;
unicode = charcode; for (var i = 0; i < length; i++) {
var byte1 = chars.charCodeAt(i++) & 0xFF;
var byte2;
if (i == length)
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1 << 8) | byte2);
} }
str = multiByteStr;
}
else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
// Check if the glyph has already been converted str = '';
if (!IsNum(unicode)) for (var i = 0; i < chars.length; ++i) {
unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];
if ('undefined' == typeof(unicode)) {
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
// at the moment, for unknown reasons.
warn('Unencoded charcode '+ charcode);
unicode = charcode;
}
// Handle surrogate pairs // Check if the glyph has already been converted
if (unicode > 0xFFFF) { if (!IsNum(unicode))
str += String.fromCharCode(unicode & 0xFFFF); unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
unicode >>= 16;
// Handle surrogate pairs
if (unicode > 0xFFFF) {
str += String.fromCharCode(unicode & 0xFFFF);
unicode >>= 16;
}
str += String.fromCharCode(unicode);
} }
str += String.fromCharCode(unicode);
} }
// Enter the translated string into the cache // Enter the translated string into the cache

65
pdf.js
View File

@ -3624,11 +3624,31 @@ var PartialEvaluator = (function() {
}, },
translateFont: function(fontDict, xref, resources) { translateFont: function(fontDict, xref, resources) {
var fd = fontDict.get('FontDescriptor'); var fd;
var descendant = [];
var subType = fontDict.get('Subtype');
var compositeFont = false;
assertWellFormed(IsName(subType), 'invalid font Subtype');
// If font is a composite
// - get the descendant font
// - set the type according to the descendant font
// - get the FontDescriptor from the descendant font
if (subType.name == 'Type0') {
var df = fontDict.get('DescendantFonts');
if (!df)
return null;
compositeFont = true;
descendant = xref.fetch(df[0]);
subType = descendant.get('Subtype');
fd = descendant.get('FontDescriptor');
} else {
fd = fontDict.get('FontDescriptor');
}
if (!fd) if (!fd)
// XXX deprecated "special treatment" for standard
// fonts? What do we need to do here?
return null; return null;
var descriptor = xref.fetch(fd); var descriptor = xref.fetch(fd);
var fontName = descriptor.get('FontName'); var fontName = descriptor.get('FontName');
@ -3642,7 +3662,38 @@ var PartialEvaluator = (function() {
var encodingMap = {}; var encodingMap = {};
var charset = []; var charset = [];
if (fontDict.has('Encoding')) { if (compositeFont) {
// Special CIDFont support
// XXX only CIDFontType2 supported for now
if (subType.name == 'CIDFontType2') {
var cidToGidMap = descendant.get('CIDToGIDMap');
if (cidToGidMap && IsRef(cidToGidMap)) {
// Extract the charset from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0);
var i = 0;
// Glyph ids are big-endian 2-byte values
for (var j=0; j<glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
charset.push(glyphID);
}
}
}
else {
// XXX This is a placeholder for handling of the encoding of CIDFontType0 fonts
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsName(encoding)) {
// Encoding is a predefined CMap
if (encoding.name == 'Identity-H') {
TODO ('Need to create an identity cmap')
} else {
TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
}
} else {
TODO ('Need to support encoding streams see PDF 32000-1:2008 9.7.5.3');
}
}
} else if (fontDict.has('Encoding')) {
var encoding = xref.fetchIfRef(fontDict.get('Encoding')); var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsDict(encoding)) { if (IsDict(encoding)) {
// Build a map of between codes and glyphs // Build a map of between codes and glyphs
@ -3758,9 +3809,6 @@ var PartialEvaluator = (function() {
} }
} }
var subType = fontDict.get('Subtype');
assertWellFormed(IsName(subType), 'invalid font Subtype');
var properties = { var properties = {
type: subType.name, type: subType.name,
encoding: encodingMap, encoding: encodingMap,
@ -3775,7 +3823,8 @@ var PartialEvaluator = (function() {
flags: descriptor.get('Flags'), flags: descriptor.get('Flags'),
italicAngle: descriptor.get('ItalicAngle'), italicAngle: descriptor.get('ItalicAngle'),
fixedPitch: false, fixedPitch: false,
textMatrix: IDENTITY_MATRIX textMatrix: IDENTITY_MATRIX,
compositeFont: compositeFont
}; };
return { return {

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -48,5 +48,20 @@
"link": true, "link": true,
"rounds": 1, "rounds": 1,
"type": "load" "type": "load"
},
{ "id": "openofficecidtruetype-pdf",
"file": "pdfs/arial_unicode_en_cidfont.pdf",
"rounds": 1,
"type": "load"
},
{ "id": "openofficearabiccidtruetype-pdf",
"file": "pdfs/arial_unicode_ab_cidfont.pdf",
"rounds": 1,
"type": "load"
},
{ "id": "arabiccidtruetype-pdf",
"file": "pdfs/ArabicCIDTrueType.pdf",
"rounds": 1,
"type": "load"
} }
] ]