Fix several issues with glyph id mappings.

The initial issue with #8255 was I added a missing glyphs
check to adjustMapping, but this caused us to skip re-mapping
a glyph if the fontCharCode was a missingGlyph which in turn
caused us to overwrite a valid glyph id with an invalid one. While
fixing this, I also added a warning if the private use area is full since
this also accidentally happened when I made a different mistake.

This brought to light a number of issues where we map
missing glyphs to notdef, but often the notdef is actually defined
and then ends up being drawn. Now the glyphs don't get
mapped in toFontChar and so they are not drawn by the canvas.

Fixing the above brought up another issue though in bug1050040.pdf.
In this PDF, the font fails to load by the browser and before we were still
drawing the glyphs because it looked like the font had them, but with the fixes
above the glyphs showed up as missing so we didn't attempt draw them. To
fix this, I now throw an error when the loca table is in really bad shape and
we fall back to trying to use a system font. We now also use this fall back if
there are any format errors during converting fonts.
This commit is contained in:
Brendan Dahl 2017-07-20 17:30:22 -07:00
parent 4a74cc418c
commit ac33358e1f

View File

@ -481,7 +481,7 @@ var ProblematicCharRanges = new Int32Array([
*/
var Font = (function FontClosure() {
function Font(name, file, properties) {
var charCode, glyphName, unicode;
var charCode;
this.name = name;
this.loadedName = properties.loadedName;
@ -498,6 +498,7 @@ var Font = (function FontClosure() {
var type = properties.type;
var subtype = properties.subtype;
this.type = type;
this.subtype = subtype;
this.fallbackName = (this.isMonospace ? 'monospace' :
(this.isSerifFont ? 'serif' : 'sans-serif'));
@ -512,6 +513,7 @@ var Font = (function FontClosure() {
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
this.fontMatrix = properties.fontMatrix;
this.bbox = properties.bbox;
this.defaultEncoding = properties.defaultEncoding;
this.toUnicode = properties.toUnicode;
@ -532,88 +534,14 @@ var Font = (function FontClosure() {
this.vmetrics = properties.vmetrics;
this.defaultVMetrics = properties.defaultVMetrics;
}
var glyphsUnicodeMap;
if (!file || file.isEmpty) {
if (file) {
// Some bad PDF generators will include empty font files,
// attempting to recover by assuming that no file exists.
warn('Font file is empty in "' + name + '" (' + this.loadedName + ')');
}
this.missingFile = true;
// The file data is not specified. Trying to fix the font name
// to be used with the canvas.font.
var fontName = name.replace(/[,_]/g, '-');
var stdFontMap = getStdFontMap(), nonStdFontMap = getNonStdFontMap();
var isStandardFont = !!stdFontMap[fontName] ||
!!(nonStdFontMap[fontName] && stdFontMap[nonStdFontMap[fontName]]);
fontName = stdFontMap[fontName] || nonStdFontMap[fontName] || fontName;
this.bold = (fontName.search(/bold/gi) !== -1);
this.italic = ((fontName.search(/oblique/gi) !== -1) ||
(fontName.search(/italic/gi) !== -1));
// Use 'name' instead of 'fontName' here because the original
// name ArialBlack for example will be replaced by Helvetica.
this.black = (name.search(/Black/g) !== -1);
// if at least one width is present, remeasure all chars when exists
this.remeasure = Object.keys(this.widths).length > 0;
if (isStandardFont && type === 'CIDFontType2' &&
properties.cidEncoding.indexOf('Identity-') === 0) {
var GlyphMapForStandardFonts = getGlyphMapForStandardFonts();
// Standard fonts might be embedded as CID font without glyph mapping.
// Building one based on GlyphMapForStandardFonts.
var map = [];
for (charCode in GlyphMapForStandardFonts) {
map[+charCode] = GlyphMapForStandardFonts[charCode];
}
if (/Arial-?Black/i.test(name)) {
var SupplementalGlyphMapForArialBlack =
getSupplementalGlyphMapForArialBlack();
for (charCode in SupplementalGlyphMapForArialBlack) {
map[+charCode] = SupplementalGlyphMapForArialBlack[charCode];
}
}
var isIdentityUnicode = this.toUnicode instanceof IdentityToUnicodeMap;
if (!isIdentityUnicode) {
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
map[+charCode] = unicodeCharCode;
});
}
this.toFontChar = map;
this.toUnicode = new ToUnicodeMap(map);
} else if (/Symbol/i.test(fontName)) {
this.toFontChar = buildToFontChar(SymbolSetEncoding, getGlyphsUnicode(),
properties.differences);
} else if (/Dingbats/i.test(fontName)) {
if (/Wingdings/i.test(name)) {
warn('Non-embedded Wingdings font, falling back to ZapfDingbats.');
}
this.toFontChar = buildToFontChar(ZapfDingbatsEncoding,
getDingbatsGlyphsUnicode(),
properties.differences);
} else if (isStandardFont) {
this.toFontChar = buildToFontChar(properties.defaultEncoding,
getGlyphsUnicode(),
properties.differences);
} else {
glyphsUnicodeMap = getGlyphsUnicode();
this.toUnicode.forEach((charCode, unicodeCharCode) => {
if (!this.composite) {
glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]);
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
unicodeCharCode = unicode;
}
}
this.toFontChar[charCode] = unicodeCharCode;
});
}
this.loadedName = fontName.split('-')[0];
this.loading = false;
this.fontType = getFontType(type, subtype);
this.fallbackToSystemFont();
return;
}
@ -649,41 +577,51 @@ var Font = (function FontClosure() {
type = 'OpenType';
}
var data;
switch (type) {
case 'MMType1':
info('MMType1 font (' + name + '), falling back to Type1.');
/* falls through */
case 'Type1':
case 'CIDFontType0':
this.mimetype = 'font/opentype';
try {
var data;
switch (type) {
case 'MMType1':
info('MMType1 font (' + name + '), falling back to Type1.');
/* falls through */
case 'Type1':
case 'CIDFontType0':
this.mimetype = 'font/opentype';
var cff = (subtype === 'Type1C' || subtype === 'CIDFontType0C') ?
new CFFFont(file, properties) : new Type1Font(name, file, properties);
var cff = (subtype === 'Type1C' || subtype === 'CIDFontType0C') ?
new CFFFont(file, properties) :
new Type1Font(name, file, properties);
adjustWidths(properties);
// Wrap the CFF data inside an OTF font file
data = this.convert(name, cff, properties);
break;
case 'OpenType':
case 'TrueType':
case 'CIDFontType2':
this.mimetype = 'font/opentype';
// Repair the TrueType file. It is can be damaged in the point of
// view of the sanitizer
data = this.checkAndRepair(name, file, properties);
if (this.isOpenType) {
adjustWidths(properties);
type = 'OpenType';
}
break;
// Wrap the CFF data inside an OTF font file
data = this.convert(name, cff, properties);
break;
default:
throw new FormatError(`Font ${type} is not supported`);
case 'OpenType':
case 'TrueType':
case 'CIDFontType2':
this.mimetype = 'font/opentype';
// Repair the TrueType file. It is can be damaged in the point of
// view of the sanitizer
data = this.checkAndRepair(name, file, properties);
if (this.isOpenType) {
adjustWidths(properties);
type = 'OpenType';
}
break;
default:
throw new FormatError(`Font ${type} is not supported`);
}
} catch (e) {
if (!(e instanceof FormatError)) {
throw e;
}
warn(e);
this.fallbackToSystemFont();
return;
}
this.data = data;
@ -812,6 +750,11 @@ var Font = (function FontClosure() {
for (var originalCharCode in charCodeToGlyphId) {
originalCharCode |= 0;
var glyphId = charCodeToGlyphId[originalCharCode];
// For missing glyphs don't create the mappings so the glyph isn't
// drawn.
if (missingGlyphs[glyphId]) {
continue;
}
var fontCharCode = originalCharCode;
// First try to map the value to a unicode position if a non identity map
// was created.
@ -830,13 +773,15 @@ var Font = (function FontClosure() {
// font was symbolic and there is only an identity unicode map since the
// characters probably aren't in the correct position (fixes an issue
// with firefox and thuluthfont).
if (!missingGlyphs[glyphId] &&
(usedFontCharCodes[fontCharCode] !== undefined ||
if ((usedFontCharCodes[fontCharCode] !== undefined ||
isProblematicUnicodeLocation(fontCharCode) ||
(isSymbolic && !hasUnicodeValue)) &&
nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END) { // Room left.
(isSymbolic && !hasUnicodeValue))) {
// Loop to try and find a free spot in the private use area.
do {
if (nextAvailableFontCharCode > PRIVATE_USE_OFFSET_END) {
warn('Ran out of space in font private use area.');
break;
}
fontCharCode = nextAvailableFontCharCode++;
if (SKIP_PRIVATE_USE_RANGE_F000_TO_F01F && fontCharCode === 0xF000) {
@ -844,8 +789,7 @@ var Font = (function FontClosure() {
nextAvailableFontCharCode = fontCharCode + 1;
}
} while (usedFontCharCodes[fontCharCode] !== undefined &&
nextAvailableFontCharCode <= PRIVATE_USE_OFFSET_END);
} while (usedFontCharCodes[fontCharCode] !== undefined);
}
newMap[fontCharCode] = glyphId;
@ -870,6 +814,11 @@ var Font = (function FontClosure() {
}
codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode], });
}
// Some fonts have zero glyphs and are used only for text selection, but
// there needs to be at least one to build a valid cmap table.
if (codes.length === 0) {
codes.push({ fontCharCode: 0, glyphId: 0, });
}
codes.sort(function fontGetRangesSort(a, b) {
return a.fontCharCode - b.fontCharCode;
});
@ -1248,6 +1197,87 @@ var Font = (function FontClosure() {
return data;
},
fallbackToSystemFont: function Font_fallbackToSystemFont() {
this.missingFile = true;
var charCode, unicode;
// The file data is not specified. Trying to fix the font name
// to be used with the canvas.font.
var name = this.name;
var type = this.type;
var subtype = this.subtype;
var fontName = name.replace(/[,_]/g, '-');
var stdFontMap = getStdFontMap(), nonStdFontMap = getNonStdFontMap();
var isStandardFont = !!stdFontMap[fontName] ||
!!(nonStdFontMap[fontName] && stdFontMap[nonStdFontMap[fontName]]);
fontName = stdFontMap[fontName] || nonStdFontMap[fontName] || fontName;
this.bold = (fontName.search(/bold/gi) !== -1);
this.italic = ((fontName.search(/oblique/gi) !== -1) ||
(fontName.search(/italic/gi) !== -1));
// Use 'name' instead of 'fontName' here because the original
// name ArialBlack for example will be replaced by Helvetica.
this.black = (name.search(/Black/g) !== -1);
// if at least one width is present, remeasure all chars when exists
this.remeasure = Object.keys(this.widths).length > 0;
if (isStandardFont && type === 'CIDFontType2' &&
this.cidEncoding.indexOf('Identity-') === 0) {
var GlyphMapForStandardFonts = getGlyphMapForStandardFonts();
// Standard fonts might be embedded as CID font without glyph mapping.
// Building one based on GlyphMapForStandardFonts.
var map = [];
for (charCode in GlyphMapForStandardFonts) {
map[+charCode] = GlyphMapForStandardFonts[charCode];
}
if (/Arial-?Black/i.test(name)) {
var SupplementalGlyphMapForArialBlack =
getSupplementalGlyphMapForArialBlack();
for (charCode in SupplementalGlyphMapForArialBlack) {
map[+charCode] = SupplementalGlyphMapForArialBlack[charCode];
}
}
var isIdentityUnicode = this.toUnicode instanceof IdentityToUnicodeMap;
if (!isIdentityUnicode) {
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
map[+charCode] = unicodeCharCode;
});
}
this.toFontChar = map;
this.toUnicode = new ToUnicodeMap(map);
} else if (/Symbol/i.test(fontName)) {
this.toFontChar = buildToFontChar(SymbolSetEncoding, getGlyphsUnicode(),
this.differences);
} else if (/Dingbats/i.test(fontName)) {
if (/Wingdings/i.test(name)) {
warn('Non-embedded Wingdings font, falling back to ZapfDingbats.');
}
this.toFontChar = buildToFontChar(ZapfDingbatsEncoding,
getDingbatsGlyphsUnicode(),
this.differences);
} else if (isStandardFont) {
this.toFontChar = buildToFontChar(this.defaultEncoding,
getGlyphsUnicode(),
this.differences);
} else {
var glyphsUnicodeMap = getGlyphsUnicode();
this.toUnicode.forEach((charCode, unicodeCharCode) => {
if (!this.composite) {
var glyphName = (this.differences[charCode] ||
this.defaultEncoding[charCode]);
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
unicodeCharCode = unicode;
}
}
this.toFontChar[charCode] = unicodeCharCode;
});
}
this.loadedName = fontName.split('-')[0];
this.loading = false;
this.fontType = getFontType(type, subtype);
},
checkAndRepair: function Font_checkAndRepair(name, font, properties) {
function readTableEntry(file) {
var tag = bytesToString(file.getBytes(4));
@ -1641,7 +1671,8 @@ var Font = (function FontClosure() {
data[50] = 0;
data[51] = 1;
} else {
warn('Could not fix indexToLocFormat: ' + indexToLocFormat);
throw new FormatError('Could not fix indexToLocFormat: ' +
indexToLocFormat);
}
}
}
@ -1687,9 +1718,6 @@ var Font = (function FontClosure() {
var startOffset = itemDecode(locaData, 0);
var writeOffset = 0;
var missingGlyphData = Object.create(null);
// Glyph zero should be notdef which isn't drawn. Sometimes this is a
// valid glyph but, then it is duplicated.
missingGlyphData[0] = true;
itemEncode(locaData, 0, writeOffset);
var i, j;
// When called with dupFirstEntry the number of glyphs has already been
@ -2382,9 +2410,6 @@ var Font = (function FontClosure() {
found = true;
}
}
if (!found) {
charCodeToGlyphId[charCode] = 0; // notdef
}
}
} else if (cmapPlatformId === 0 && cmapEncodingId === 0) {
// Default Unicode semantics, use the charcodes as is.