Fix handling of symbolic fonts and unicode cmaps.

In issue 12120, the font has a 1,0 cmap and is marked symbolic which
according to the spec means we should directly use the cmap instead of
the extra steps that are defined in 9.6.6.4.

However, just fixing that caused bug 1057544 to break. The font in bug
1057544 has a 0,1 cmap (Unicode 1.1) which we were not using, but is
easy to support. We're also easily able to support some of the other
unicode cmaps, so I added those as well.

There was also a second issue with bug 1057544, the cmap doesn't have
a mapping for the "quoteright" glyph, but it is defined in the post
table. To handle this, I've moved post table as a  fallback for any
font that has an encoding.
This commit is contained in:
Brendan Dahl 2020-08-21 15:25:07 -07:00
parent 0d5ef5dd0a
commit 45e8a31cc0
5 changed files with 49 additions and 39 deletions

View File

@ -1610,7 +1610,12 @@ var Font = (function FontClosure() {
continue;
}
if (platformId === 0 && encodingId === 0) {
if (
platformId === 0 &&
(encodingId === /* Unicode Default */ 0 ||
encodingId === /* Unicode 1.1 */ 1 ||
encodingId === /* Unicode BMP */ 3)
) {
useTable = true;
// Continue the loop since there still may be a higher priority
// table.
@ -2792,32 +2797,24 @@ var Font = (function FontClosure() {
var cmapEncodingId = cmapTable.encodingId;
var cmapMappings = cmapTable.mappings;
var cmapMappingsLength = cmapMappings.length;
// The spec seems to imply that if the font is symbolic the encoding
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
// where the the font is symbolic and it has an encoding.
let baseEncoding = [];
if (
(properties.hasEncoding &&
((cmapPlatformId === 3 && cmapEncodingId === 1) ||
(cmapPlatformId === 1 && cmapEncodingId === 0))) ||
(cmapPlatformId === -1 &&
cmapEncodingId === -1 && // Temporary hack
!!getEncoding(properties.baseEncodingName))
properties.hasEncoding &&
(properties.baseEncodingName === "MacRomanEncoding" ||
properties.baseEncodingName === "WinAnsiEncoding")
) {
// Temporary hack
// When no preferred cmap table was found and |baseEncodingName| is
// one of the predefined encodings, we seem to obtain a better
// |charCodeToGlyphId| map from the code below (fixes bug 1057544).
// TODO: Note that this is a hack which should be removed as soon as
// we have proper support for more exotic cmap tables.
baseEncoding = getEncoding(properties.baseEncodingName);
}
var baseEncoding = [];
if (
properties.baseEncodingName === "MacRomanEncoding" ||
properties.baseEncodingName === "WinAnsiEncoding"
) {
baseEncoding = getEncoding(properties.baseEncodingName);
}
// If the font has an encoding and is not symbolic then follow the
// rules in section 9.6.6.4 of the spec on how to map 3,1 and 1,0
// cmaps.
if (
properties.hasEncoding &&
!this.isSymbolicFont &&
((cmapPlatformId === 3 && cmapEncodingId === 1) ||
(cmapPlatformId === 1 && cmapEncodingId === 0))
) {
var glyphsUnicodeMap = getGlyphsUnicode();
for (let charCode = 0; charCode < 256; charCode++) {
var glyphName, standardGlyphName;
@ -2845,29 +2842,15 @@ var Font = (function FontClosure() {
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
}
var found = false;
for (let i = 0; i < cmapMappingsLength; ++i) {
if (cmapMappings[i].charCode !== unicodeOrCharCode) {
continue;
}
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
found = true;
break;
}
if (!found && properties.glyphNames) {
// Try to map using the post table.
var glyphId = properties.glyphNames.indexOf(glyphName);
// The post table ought to use the same kind of glyph names as the
// `differences` array, but check the standard ones as a fallback.
if (glyphId === -1 && standardGlyphName !== glyphName) {
glyphId = properties.glyphNames.indexOf(standardGlyphName);
}
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[charCode] = glyphId;
}
}
}
} else if (cmapPlatformId === 0 && cmapEncodingId === 0) {
} else if (cmapPlatformId === 0) {
// Default Unicode semantics, use the charcodes as is.
for (let i = 0; i < cmapMappingsLength; ++i) {
charCodeToGlyphId[cmapMappings[i].charCode] =
@ -2897,6 +2880,19 @@ var Font = (function FontClosure() {
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
}
}
// Last, try to map any missing charcodes using the post table.
if (properties.glyphNames && baseEncoding.length) {
for (let i = 0; i < 256; ++i) {
if (charCodeToGlyphId[i] === undefined && baseEncoding[i]) {
glyphName = baseEncoding[i];
const glyphId = properties.glyphNames.indexOf(glyphName);
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[i] = glyphId;
}
}
}
}
}
if (charCodeToGlyphId.length === 0) {

View File

@ -88,6 +88,7 @@
!issue10665_reduced.pdf
!issue11016_reduced.pdf
!issue11045.pdf
!bug1057544.pdf
!issue11150_reduced.pdf
!issue11242_reduced.pdf
!issue11279.pdf
@ -192,6 +193,7 @@
!issue4260_reduced.pdf
!bug1250079.pdf
!bug1473809.pdf
!issue12120_reduced.pdf
!pdfjsbad1586.pdf
!freeculture.pdf
!issue6006.pdf

BIN
test/pdfs/bug1057544.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1991,6 +1991,12 @@
"type": "eq",
"about": "MediaBox and CropBox with indirect objects."
},
{ "id": "bug1057544",
"file": "pdfs/bug1057544.pdf",
"md5": "49ad71b82ead1ee0fe4ddb41aa9e30b4",
"rounds": 1,
"type": "eq"
},
{ "id": "issue2642",
"file": "pdfs/issue2642.pdf",
"md5": "b6679861fdce3bbab0c1fa51bb7f5077",
@ -4204,6 +4210,12 @@
"lastPage": 2,
"type": "eq"
},
{ "id": "issue12120_reduced",
"file": "pdfs/issue12120_reduced.pdf",
"md5": "b4570dcee26ac3121ad3322e19ed1a6a",
"rounds": 1,
"type": "eq"
},
{ "id": "issue4883",
"file": "pdfs/issue4883.pdf",
"md5": "2fac0d9a189ca5fcef8626153d050be8",