Merge pull request #454 from vingtetun/refactor

Refactor the translateFont() method to be more readable and simpler
This commit is contained in:
notmasteryet 2011-09-07 17:30:56 -07:00
commit 768554afd8
2 changed files with 242 additions and 230 deletions

View File

@ -510,7 +510,7 @@ var Font = (function Font() {
this.type = properties.type;
this.textMatrix = properties.textMatrix;
this.loadedName = getUniqueName();
this.compositeFont = properties.compositeFont;
this.composite = properties.composite;
this.loading = true;
};
@ -1131,8 +1131,8 @@ var Font = (function Font() {
for (i = 0; i < numGlyphs; i++)
encoding[i] = i + kCmapGlyphOffset;
} else {
for (var i in encoding)
encoding[i] = encoding[i] + kCmapGlyphOffset;
for (var code in encoding)
encoding[code] += kCmapGlyphOffset;
}
if (!cmap) {
@ -1352,7 +1352,7 @@ var Font = (function Font() {
return chars;
str = '';
if (this.compositeFont) {
if (this.composite) {
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte
// XXX assuming CIDFonts are two-byte - later need to extract the

464
pdf.js
View File

@ -3339,7 +3339,7 @@ var Page = (function() {
});
for (var i = 0, ii = fonts.length; i < ii; ++i)
fonts[i].fontDict.fontObj = fontObjs[i];
fonts[i].dict.fontObj = fontObjs[i];
},
@ -4180,59 +4180,30 @@ var PartialEvaluator = (function() {
};
},
translateFont: function(fontDict, xref, resources) {
var fd;
var descendant = [];
var subType = fontDict.get('Subtype');
var compositeFont = false;
assertWellFormed(IsName(subType), 'invalid font Subtype');
// If font is a composite
// - get the descendant font
// - set the type according to the descendant font
// - get the FontDescriptor from the descendant font
if (subType.name == 'Type0') {
var df = fontDict.get('DescendantFonts');
if (!df)
return null;
compositeFont = true;
if (IsRef(df)) {
df = xref.fetch(df);
}
descendant = xref.fetch(IsRef(df) ? df : df[0]);
subType = descendant.get('Subtype');
fd = descendant.get('FontDescriptor');
} else {
fd = fontDict.get('FontDescriptor');
}
var builtInEncoding = false;
var encodingMap = {};
if (compositeFont) {
// Special CIDFont support
extractEncoding: function(dict, xref, properties) {
var type = properties.type;
if (properties.composite) {
// XXX only CIDFontType2 supported for now
if (subType.name == 'CIDFontType2') {
var cidToGidMap = descendant.get('CIDToGIDMap');
if (cidToGidMap && IsRef(cidToGidMap)) {
// Extract the encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0);
// Glyph ids are big-endian 2-byte values
// Set this to 0 to verify the font has an encoding.
encodingMap[0] = 0;
for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
if (glyphID != 0)
encodingMap[j >> 1] = glyphID;
}
if (type == 'CIDFontType2') {
var cidToGidMap = dict.get('CIDToGIDMap');
if (!cidToGidMap || !IsRef(cidToGidMap))
return GlyphsUnicode;
// Extract the encoding from the CIDToGIDMap
var glyphsStream = xref.fetchIfRef(cidToGidMap);
var glyphsData = glyphsStream.getBytes(0);
// Glyph ids are big-endian 2-byte values
// Set this to 0 to verify the font has an encoding.
var encoding = properties.encoding;
encoding[0] = 0;
for (var j = 0; j < glyphsData.length; j++) {
var glyphID = (glyphsData[j++] << 8) | glyphsData[j];
if (glyphID != 0)
encoding[j >> 1] = glyphID;
}
}
else {
// XXX This is a placeholder for handling of the encoding of
// CIDFontType0 fonts
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
} else if (type == 'CIDFontType0') {
var encoding = xref.fetchIfRef(dict.get('Encoding'));
if (IsName(encoding)) {
// Encoding is a predefined CMap
if (encoding.name == 'Identity-H') {
@ -4246,227 +4217,262 @@ var PartialEvaluator = (function() {
'9.7.5.3');
}
}
} else {
var baseEncoding = null, diffEncoding = [];
if (fontDict.has('Encoding')) {
var encoding = xref.fetchIfRef(fontDict.get('Encoding'));
if (IsDict(encoding)) {
var baseName = encoding.get('BaseEncoding');
if (baseName)
baseEncoding = Encodings[baseName.name].slice();
return GlyphsUnicode;
}
// Load the differences between the base and original
var differences = encoding.get('Differences');
var index = 0;
for (var j = 0; j < differences.length; j++) {
var data = differences[j];
if (IsNum(data))
index = data;
else
diffEncoding[index++] = data.name;
}
} else if (IsName(encoding)) {
baseEncoding = Encodings[encoding.name].slice();
} else {
error('Encoding is not a Name nor a Dict');
var differences = properties.differences;
var map = properties.encoding;
var baseEncoding = null;
if (dict.has('Encoding')) {
var encoding = xref.fetchIfRef(dict.get('Encoding'));
if (IsDict(encoding)) {
var baseName = encoding.get('BaseEncoding');
if (baseName)
baseEncoding = Encodings[baseName.name].slice();
// Load the differences between the base and original
var diffEncoding = encoding.get('Differences');
var index = 0;
for (var j = 0; j < diffEncoding.length; j++) {
var data = diffEncoding[j];
if (IsNum(data))
index = data;
else
differences[index++] = data.name;
}
} else if (IsName(encoding)) {
baseEncoding = Encodings[encoding.name].slice();
} else {
error('Encoding is not a Name nor a Dict');
}
}
var fontType = subType.name;
if (!baseEncoding) {
switch (fontType) {
case 'TrueType':
baseEncoding = Encodings.WinAnsiEncoding.slice();
break;
case 'Type1':
baseEncoding = Encodings.StandardEncoding.slice();
break;
default:
warn('Unknown type of font: ' + fontType);
break;
}
if (!baseEncoding) {
switch (type) {
case 'TrueType':
baseEncoding = Encodings.WinAnsiEncoding.slice();
break;
case 'Type1':
baseEncoding = Encodings.StandardEncoding.slice();
break;
default:
warn('Unknown type of font: ' + type);
break;
}
}
// firstChar and width are required
// (except for 14 standard fonts)
var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0;
var widths = xref.fetchIfRef(fontDict.get('Widths')) || [];
// merge in the differences
var firstChar = properties.firstChar;
var lastChar = properties.lastChar;
var glyphs = {};
for (var i = firstChar; i <= lastChar; i++) {
var glyph = differences[i] || baseEncoding[i];
if (glyph) {
var index = GlyphsUnicode[glyph] || i;
glyphs[glyph] = map[i] = index;
var lastChar = xref.fetchIfRef(fontDict.get('LastChar'));
if (!lastChar)
lastChar = diffEncoding.length || baseEncoding.length;
// If there is no file, the character mapping can't be modified
// but this is unlikely that there is any standard encoding with
// chars below 0x1f, so that's fine.
if (!properties.file)
continue;
// merge in the differences
var glyphsMap = {};
for (var i = firstChar; i <= lastChar; i++) {
var glyph = diffEncoding[i] || baseEncoding[i];
if (glyph) {
var index = GlyphsUnicode[glyph] || i;
glyphsMap[glyph] = encodingMap[i] = index;
if (!fontFile)
continue;
if (index <= 0x1f || (index >= 127 && index <= 255))
glyphsMap[glyph] = encodingMap[i] += kCmapGlyphOffset;
}
if (index <= 0x1f || (index >= 127 && index <= 255))
glyphs[glyph] = map[i] += kCmapGlyphOffset;
}
}
if (fontType == 'TrueType' && fontDict.has('ToUnicode') &&
differences) {
var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode'));
if (IsName(cmapObj)) {
error('ToUnicode file cmap translation not implemented');
} else if (IsStream(cmapObj)) {
var tokens = [];
var token = '';
var beginArrayToken = {};
if (type == 'TrueType' && dict.has('ToUnicode') && differences) {
var cmapObj = xref.fetchIfRef(dict.get('ToUnicode'));
if (IsName(cmapObj)) {
error('ToUnicode file cmap translation not implemented');
} else if (IsStream(cmapObj)) {
var tokens = [];
var token = '';
var beginArrayToken = {};
var cmap = cmapObj.getBytes(cmapObj.length);
for (var i = 0; i < cmap.length; i++) {
var byte = cmap[i];
if (byte == 0x20 || byte == 0x0D || byte == 0x0A ||
byte == 0x3C || byte == 0x5B || byte == 0x5D) {
switch (token) {
case 'usecmap':
error('usecmap is not implemented');
break;
var cmap = cmapObj.getBytes(cmapObj.length);
for (var i = 0; i < cmap.length; i++) {
var byte = cmap[i];
if (byte == 0x20 || byte == 0x0D || byte == 0x0A ||
byte == 0x3C || byte == 0x5B || byte == 0x5D) {
switch (token) {
case 'usecmap':
error('usecmap is not implemented');
break;
case 'beginbfchar':
case 'beginbfrange':
case 'begincidchar':
case 'begincidrange':
token = '';
tokens = [];
break;
case 'beginbfchar':
case 'beginbfrange':
case 'begincidchar':
case 'begincidrange':
token = '';
tokens = [];
break;
case 'endcidrange':
case 'endbfrange':
for (var j = 0; j < tokens.length; j += 3) {
var startRange = tokens[j];
var endRange = tokens[j + 1];
var code = tokens[j + 2];
while (startRange < endRange) {
encodingMap[startRange] = code++;
++startRange;
}
case 'endcidrange':
case 'endbfrange':
for (var j = 0; j < tokens.length; j += 3) {
var startRange = tokens[j];
var endRange = tokens[j + 1];
var code = tokens[j + 2];
while (startRange < endRange) {
map[startRange] = code++;
++startRange;
}
break;
}
break;
case 'endcidchar':
case 'endbfchar':
for (var j = 0; j < tokens.length; j += 2) {
var index = tokens[j];
var code = tokens[j + 1];
encodingMap[index] = code;
}
break;
case 'endcidchar':
case 'endbfchar':
for (var j = 0; j < tokens.length; j += 2) {
var index = tokens[j];
var code = tokens[j + 1];
map[index] = code;
}
break;
case '':
break;
case '':
break;
default:
if (token[0] >= '0' && token[0] <= '9')
token = parseInt(token, 10); // a number
tokens.push(token);
token = '';
break;
}
switch (byte) {
case 0x5B:
// begin list parsing
tokens.push(beginArrayToken);
break;
case 0x5D:
// collect array items
var items = [], item;
while (tokens.length &&
(item = tokens.pop()) != beginArrayToken)
default:
if (token[0] >= '0' && token[0] <= '9')
token = parseInt(token, 10); // a number
tokens.push(token);
token = '';
break;
}
switch (byte) {
case 0x5B:
// begin list parsing
tokens.push(beginArrayToken);
break;
case 0x5D:
// collect array items
var items = [], item;
while (tokens.length &&
(item = tokens.pop()) != beginArrayToken)
items.unshift(item);
tokens.push(items);
break;
}
} else if (byte == 0x3E) {
if (token.length) {
// parsing hex number
tokens.push(parseInt(token, 16));
token = '';
}
} else {
token += String.fromCharCode(byte);
break;
}
} else if (byte == 0x3E) {
if (token.length) {
// parsing hex number
tokens.push(parseInt(token, 16));
token = '';
}
} else {
token += String.fromCharCode(byte);
}
}
}
}
return glyphs;
},
if (!fd) {
var baseFontName = fontDict.get('BaseFont');
translateFont: function(dict, xref, resources) {
var baseDict = dict;
var type = dict.get('Subtype');
assertWellFormed(IsName(type), 'invalid font Subtype');
var composite = false
if (type.name == 'Type0') {
// If font is a composite
// - get the descendant font
// - set the type according to the descendant font
// - get the FontDescriptor from the descendant font
var df = dict.get('DescendantFonts');
if (!df)
return null;
if (IsRef(df))
df = xref.fetch(df);
dict = xref.fetch(IsRef(df) ? df : df[0]);
type = dict.get('Subtype');
assertWellFormed(IsName(type), 'invalid font Subtype');
composite = true;
}
// Before PDF 1.5 if the font was one of the base 14 fonts, having a
// FontDescriptor was not required. This case is here for compatibility.
var descriptor = xref.fetchIfRef(dict.get('FontDescriptor'));
if (!descriptor) {
var baseFontName = dict.get('BaseFont');
if (!IsName(baseFontName))
return null;
// Using base font name as a font name.
baseFontName = baseFontName.name;
var map = {};
if (/^Symbol(-?(Bold|Italic))*$/.test(baseFontName)) {
// special case for symbols
var encoding = Encodings.symbolsEncoding;
for (var i = 0, n = encoding.length, j; i < n; i++) {
if (!(j = encoding[i]))
continue;
encodingMap[i] = GlyphsUnicode[j] || 0;
map[i] = GlyphsUnicode[j] || 0;
}
}
var properties = {
type: type.name,
encoding: map,
differences: [],
firstChar: 0,
lastChar: 256
};
this.extractEncoding(dict, xref, properties);
return {
name: baseFontName,
fontDict: fontDict,
properties: {
encoding: encodingMap
}
dict: baseDict,
properties: properties
};
}
var descriptor = xref.fetch(fd);
// According to the spec if 'FontDescriptor' is declared, 'FirstChar',
// 'LastChar' and 'Widths' should exists too, but some PDF encoders seems
// to ignore this rule when a variant of a standart font is used.
// TODO Fill the width array depending on which of the base font this is
// a variant.
var firstChar = xref.fetchIfRef(dict.get('FirstChar')) || 0;
var lastChar = xref.fetchIfRef(dict.get('LastChar')) || 256;
var widths = xref.fetchIfRef(dict.get('Widths')) || [];
var fontName = xref.fetchIfRef(descriptor.get('FontName'));
assertWellFormed(IsName(fontName), 'invalid font name');
fontName = fontName.name;
var fontFile = descriptor.get('FontFile', 'FontFile2', 'FontFile3');
var length1, length2;
if (fontFile) {
fontFile = xref.fetchIfRef(fontFile);
if (fontFile.dict) {
var fileType = fontFile.dict.get('Subtype');
if (fileType)
fileType = fileType.name;
var subtype = fontFile.dict.get('Subtype');
if (subtype)
subtype = subtype.name;
var length1 = fontFile.dict.get('Length1');
if (!IsInt(length1))
length1 = xref.fetchIfRef(length1);
var length2 = fontFile.dict.get('Length2');
if (!IsInt(length2))
length2 = xref.fetchIfRef(length2);
}
length1 = fontFile.dict.get('Length1');
if (!IsInt(length1))
length1 = xref.fetchIfRef(length1);
length2 = fontFile.dict.get('Length2');
if (!IsInt(length2))
length2 = xref.fetchIfRef(length2);
}
var widths = fontDict.get('Widths');
if (widths) {
var glyphWidths = {};
var unicode = fontDict.get('FirstChar');
for (var i = 0, ii = widths.length; i < ii; ++i)
glyphWidths[unicode++] = widths[i];
}
var properties = {
type: subType.name,
subtype: fileType,
widths: glyphWidths,
encoding: encodingMap,
differences: diffEncoding,
glyphs: glyphsMap || GlyphsUnicode,
firstChar: fontDict.get('FirstChar'),
lastChar: fontDict.get('LastChar'),
type: type.name,
subtype: subtype,
file: fontFile,
length1: length1,
length2: length2,
composite: composite,
fixedPitch: false,
textMatrix: IDENTITY_MATRIX,
firstChar: firstChar || 0,
lastChar: lastChar || 256,
bbox: descriptor.get('FontBBox'),
ascent: descriptor.get('Ascent'),
descent: descriptor.get('Descent'),
@ -4474,16 +4480,22 @@ var PartialEvaluator = (function() {
capHeight: descriptor.get('CapHeight'),
flags: descriptor.get('Flags'),
italicAngle: descriptor.get('ItalicAngle'),
fixedPitch: false,
textMatrix: IDENTITY_MATRIX,
compositeFont: compositeFont,
length1: length1,
length2: length2
differences: [],
widths: [],
encoding: {}
};
// XXX Encoding and Glyphs should point to the same object so it will
// be hard to be out of sync. The object could contains the unicode and
// the width of the glyph.
for (var i = 0; i <= widths.length; i++)
properties.widths[firstChar++] = widths[i];
properties.glyphs = this.extractEncoding(dict, xref, properties);
return {
name: fontName,
fontDict: fontDict,
name: fontName.name,
dict: baseDict,
file: fontFile,
properties: properties
};