Second pass CIDFont support - make Arial Unicode in OpenOffice PDF

- supports PDF fonts with CIDtoGIDMap and no cmap
This commit is contained in:
Adil Allawi 2011-07-11 17:41:47 +01:00
parent ea2d651709
commit 7b8542c6a7
2 changed files with 125 additions and 43 deletions

View File

@ -404,12 +404,21 @@ var Font = (function() {
data = this.checkAndRepair(name, file, properties); data = this.checkAndRepair(name, file, properties);
break; break;
case 'Type0':
//this is a Truetype font
this.mimetype = 'font/opentype';
// Repair the TrueType file if it is can be damaged in the point of
// view of the sanitizer
data = this.checkAndRepair(name, file, properties);
break;
default: default:
warn('Font ' + properties.type + ' is not supported'); warn('Font ' + properties.type + ' is not supported');
break; break;
} }
this.data = data; this.data = data;
this.type = properties.type; //use the type to test if the string is single or multi-byte
this.id = Fonts.registerFont(name, data, properties); this.id = Fonts.registerFont(name, data, properties);
this.loadedName = 'pdfFont' + this.id; this.loadedName = 'pdfFont' + this.id;
}; };
@ -856,8 +865,26 @@ var Font = (function() {
data: stringToArray(createOS2Table(properties)) data: stringToArray(createOS2Table(properties))
}); });
// Replace the old CMAP table with a shiny new one if (!cmap) {
replaceCMapTable(cmap, font, properties); var glyphs = [];
var charset = properties.charset;
for (var i=1; i < charset.length; i++) {
if (charset.indexOf(i) != -1) {
glyphs.push({
unicode: charset.indexOf(i)
});
} else {
break;
}
}
tables.push({
tag: 'cmap',
data: createCMapTable(glyphs)
})
} else {
// Replace the old CMAP table with a shiny new one
replaceCMapTable(cmap, font, properties);
}
// Rewrite the 'post' table if needed // Rewrite the 'post' table if needed
if (!post) { if (!post) {
@ -1110,44 +1137,63 @@ var Font = (function() {
charsToUnicode: function fonts_chars2Unicode(chars) { charsToUnicode: function fonts_chars2Unicode(chars) {
var charsCache = this.charsCache; var charsCache = this.charsCache;
var str;
// if we translated this string before, just grab it from the cache // if we translated this string before, just grab it from the cache
if (charsCache) { if (charsCache) {
var str = charsCache[chars]; str = charsCache[chars];
if (str) if (str)
return str; return str;
} }
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
// lazily create the translation cache // lazily create the translation cache
if (!charsCache) if (!charsCache)
charsCache = this.charsCache = Object.create(null); charsCache = this.charsCache = Object.create(null);
str = ''; if (this.type == "Type0") {
for (var i = 0; i < chars.length; ++i) { //string needs to be converted from byte to multi-byte assume for now two-byte
var charcode = chars.charCodeAt(i); str = '';
var unicode = encoding[charcode]; var multiByteStr = "";
if ('undefined' == typeof(unicode)) { var length = chars.length;
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf for (var i = 0; i < length; i++) {
// at the moment, for unknown reasons. var byte1 = chars.charCodeAt(i++) & 0xFF;
warn('Unencoded charcode '+ charcode); var byte2;
unicode = charcode; if (i == length)
byte2 = 0;
else
byte2 = chars.charCodeAt(i) & 0xFF;
multiByteStr += String.fromCharCode((byte1<<8) | byte2);
} }
str = multiByteStr;
}
else {
// translate the string using the font's encoding
var encoding = this.encoding;
if (!encoding)
return chars;
// Check if the glyph has already been converted str = '';
if (!IsNum(unicode)) for (var i = 0; i < chars.length; ++i) {
unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; var charcode = chars.charCodeAt(i);
var unicode = encoding[charcode];
if ('undefined' == typeof(unicode)) {
// FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf
// at the moment, for unknown reasons.
warn('Unencoded charcode '+ charcode);
unicode = charcode;
}
// Handle surrogate pairs // Check if the glyph has already been converted
if (unicode > 0xFFFF) { if (!IsNum(unicode))
str += String.fromCharCode(unicode & 0xFFFF); unicode = encoding[unicode] = GlyphsUnicode[unicode.name];
unicode >>= 16;
// Handle surrogate pairs
if (unicode > 0xFFFF) {
str += String.fromCharCode(unicode & 0xFFFF);
unicode >>= 16;
}
str += String.fromCharCode(unicode);
} }
str += String.fromCharCode(unicode);
} }
// Enter the translated string into the cache // Enter the translated string into the cache

60
pdf.js
View File

@ -64,6 +64,14 @@ function stringToBytes(str) {
return bytes; return bytes;
} }
function singleByteToMultiByteString (str) {
var multiByteStr = "";
var bytes = stringToBytes(e);
for (var j = 0; j<bytes.length; j++) {
multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]);
}
return multiByteStr;
}
var Stream = (function() { var Stream = (function() {
function constructor(arrayBuffer, start, length, dict) { function constructor(arrayBuffer, start, length, dict) {
this.bytes = new Uint8Array(arrayBuffer); this.bytes = new Uint8Array(arrayBuffer);
@ -3624,19 +3632,26 @@ var PartialEvaluator = (function() {
}, },
translateFont: function(fontDict, xref, resources) { translateFont: function(fontDict, xref, resources) {
var fd = fontDict.get('FontDescriptor'); var fd;
if (!fd) var descendant = [];
var subType = fontDict.get('Subtype');
assertWellFormed(IsName(subType), 'invalid font Subtype');
//If font is a composite get the FontDescriptor from the descendant font
if (subType.name == "Type0")
{ {
//If font is a composite get the FontDescriptor from the descendant
var df = fontDict.get("DescendantFonts"); var df = fontDict.get("DescendantFonts");
if (!df) if (!df)
return null; return null;
var descendant = xref.fetch(df[0]); descendant = xref.fetch(df[0]);
fd = descendant.get("FontDescriptor"); fd = descendant.get("FontDescriptor");
if (!fd) } else {
return null; fd = fontDict.get('FontDescriptor');
fontDict.set("FontDescriptor", fd);
} }
if (!fd)
return null;
var descriptor = xref.fetch(fd); var descriptor = xref.fetch(fd);
var fontName = descriptor.get('FontName'); var fontName = descriptor.get('FontName');
@ -3650,7 +3665,32 @@ var PartialEvaluator = (function() {
var encodingMap = {}; var encodingMap = {};
var charset = []; var charset = [];
if (fontDict.has('Encoding')) { if (subType.name == 'Type0') {
//XXX CIDFont support - only identity CID Encoding for now
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsName(encoding)) {
//Encoding is a predefined CMap
if (encoding.name == 'Identity-H') {
if (descendant.get('Subtype').name == 'CIDFontType2')
{
//Extract an encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap'));
var glyphsData = glyphsStream.getBytes(0);
var i = 0;
for (var j=0; j<glyphsData.length; j++) {
var glyphID = (glyphsData[j++]*0x100)+glyphsData[j];
//encodingMap[glyphID] = i++;
charset.push(glyphID);
}
encoding[0] = 0;
}
} else {
TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps')
}
} else {
TODO ('Need to support encoding streams see PDF 32000-1:2008 9.7.5.3');
}
} else if (fontDict.has('Encoding')) {
var encoding = xref.fetchIfRef(fontDict.get('Encoding')); var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsDict(encoding)) { if (IsDict(encoding)) {
// Build a map of between codes and glyphs // Build a map of between codes and glyphs
@ -3682,7 +3722,6 @@ var PartialEvaluator = (function() {
} }
} else if (IsName(encoding)) { } else if (IsName(encoding)) {
var encoding = Encodings[encoding.name]; var encoding = Encodings[encoding.name];
//XXX CIDFont support - get the CID Encoding especially support japan1 and identity
if (!encoding) if (!encoding)
error('Unknown font encoding'); error('Unknown font encoding');
@ -3767,9 +3806,6 @@ var PartialEvaluator = (function() {
} }
} }
var subType = fontDict.get('Subtype');
assertWellFormed(IsName(subType), 'invalid font Subtype');
var properties = { var properties = {
type: subType.name, type: subType.name,
encoding: encodingMap, encoding: encodingMap,