Merge pull request #12259 from brendandahl/cmap-fix

Fix handling of symbolic fonts and unicode cmaps.
This commit is contained in:
Tim van der Meij 2020-08-30 16:01:24 +02:00 committed by GitHub
commit 06b53d770a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 39 deletions

View File

@ -1610,7 +1610,12 @@ var Font = (function FontClosure() {
continue; continue;
} }
if (platformId === 0 && encodingId === 0) { if (
platformId === 0 &&
(encodingId === /* Unicode Default */ 0 ||
encodingId === /* Unicode 1.1 */ 1 ||
encodingId === /* Unicode BMP */ 3)
) {
useTable = true; useTable = true;
// Continue the loop since there still may be a higher priority // Continue the loop since there still may be a higher priority
// table. // table.
@ -2792,32 +2797,24 @@ var Font = (function FontClosure() {
var cmapEncodingId = cmapTable.encodingId; var cmapEncodingId = cmapTable.encodingId;
var cmapMappings = cmapTable.mappings; var cmapMappings = cmapTable.mappings;
var cmapMappingsLength = cmapMappings.length; var cmapMappingsLength = cmapMappings.length;
let baseEncoding = [];
// The spec seems to imply that if the font is symbolic the encoding
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
// where the the font is symbolic and it has an encoding.
if ( if (
(properties.hasEncoding && properties.hasEncoding &&
((cmapPlatformId === 3 && cmapEncodingId === 1) || (properties.baseEncodingName === "MacRomanEncoding" ||
(cmapPlatformId === 1 && cmapEncodingId === 0))) || properties.baseEncodingName === "WinAnsiEncoding")
(cmapPlatformId === -1 &&
cmapEncodingId === -1 && // Temporary hack
!!getEncoding(properties.baseEncodingName))
) { ) {
// Temporary hack baseEncoding = getEncoding(properties.baseEncodingName);
// When no preferred cmap table was found and |baseEncodingName| is }
// one of the predefined encodings, we seem to obtain a better
// |charCodeToGlyphId| map from the code below (fixes bug 1057544).
// TODO: Note that this is a hack which should be removed as soon as
// we have proper support for more exotic cmap tables.
var baseEncoding = []; // If the font has an encoding and is not symbolic then follow the
if ( // rules in section 9.6.6.4 of the spec on how to map 3,1 and 1,0
properties.baseEncodingName === "MacRomanEncoding" || // cmaps.
properties.baseEncodingName === "WinAnsiEncoding" if (
) { properties.hasEncoding &&
baseEncoding = getEncoding(properties.baseEncodingName); !this.isSymbolicFont &&
} ((cmapPlatformId === 3 && cmapEncodingId === 1) ||
(cmapPlatformId === 1 && cmapEncodingId === 0))
) {
var glyphsUnicodeMap = getGlyphsUnicode(); var glyphsUnicodeMap = getGlyphsUnicode();
for (let charCode = 0; charCode < 256; charCode++) { for (let charCode = 0; charCode < 256; charCode++) {
var glyphName, standardGlyphName; var glyphName, standardGlyphName;
@ -2845,29 +2842,15 @@ var Font = (function FontClosure() {
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName); unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
} }
var found = false;
for (let i = 0; i < cmapMappingsLength; ++i) { for (let i = 0; i < cmapMappingsLength; ++i) {
if (cmapMappings[i].charCode !== unicodeOrCharCode) { if (cmapMappings[i].charCode !== unicodeOrCharCode) {
continue; continue;
} }
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
found = true;
break; break;
} }
if (!found && properties.glyphNames) {
// Try to map using the post table.
var glyphId = properties.glyphNames.indexOf(glyphName);
// The post table ought to use the same kind of glyph names as the
// `differences` array, but check the standard ones as a fallback.
if (glyphId === -1 && standardGlyphName !== glyphName) {
glyphId = properties.glyphNames.indexOf(standardGlyphName);
}
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[charCode] = glyphId;
}
}
} }
} else if (cmapPlatformId === 0 && cmapEncodingId === 0) { } else if (cmapPlatformId === 0) {
// Default Unicode semantics, use the charcodes as is. // Default Unicode semantics, use the charcodes as is.
for (let i = 0; i < cmapMappingsLength; ++i) { for (let i = 0; i < cmapMappingsLength; ++i) {
charCodeToGlyphId[cmapMappings[i].charCode] = charCodeToGlyphId[cmapMappings[i].charCode] =
@ -2897,6 +2880,19 @@ var Font = (function FontClosure() {
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
} }
} }
// Last, try to map any missing charcodes using the post table.
if (properties.glyphNames && baseEncoding.length) {
for (let i = 0; i < 256; ++i) {
if (charCodeToGlyphId[i] === undefined && baseEncoding[i]) {
glyphName = baseEncoding[i];
const glyphId = properties.glyphNames.indexOf(glyphName);
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[i] = glyphId;
}
}
}
}
} }
if (charCodeToGlyphId.length === 0) { if (charCodeToGlyphId.length === 0) {

View File

@ -88,6 +88,7 @@
!issue10665_reduced.pdf !issue10665_reduced.pdf
!issue11016_reduced.pdf !issue11016_reduced.pdf
!issue11045.pdf !issue11045.pdf
!bug1057544.pdf
!issue11150_reduced.pdf !issue11150_reduced.pdf
!issue11242_reduced.pdf !issue11242_reduced.pdf
!issue11279.pdf !issue11279.pdf
@ -192,6 +193,7 @@
!issue4260_reduced.pdf !issue4260_reduced.pdf
!bug1250079.pdf !bug1250079.pdf
!bug1473809.pdf !bug1473809.pdf
!issue12120_reduced.pdf
!pdfjsbad1586.pdf !pdfjsbad1586.pdf
!freeculture.pdf !freeculture.pdf
!issue6006.pdf !issue6006.pdf

BIN
test/pdfs/bug1057544.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1991,6 +1991,12 @@
"type": "eq", "type": "eq",
"about": "MediaBox and CropBox with indirect objects." "about": "MediaBox and CropBox with indirect objects."
}, },
{ "id": "bug1057544",
"file": "pdfs/bug1057544.pdf",
"md5": "49ad71b82ead1ee0fe4ddb41aa9e30b4",
"rounds": 1,
"type": "eq"
},
{ "id": "issue2642", { "id": "issue2642",
"file": "pdfs/issue2642.pdf", "file": "pdfs/issue2642.pdf",
"md5": "b6679861fdce3bbab0c1fa51bb7f5077", "md5": "b6679861fdce3bbab0c1fa51bb7f5077",
@ -4204,6 +4210,12 @@
"lastPage": 2, "lastPage": 2,
"type": "eq" "type": "eq"
}, },
{ "id": "issue12120_reduced",
"file": "pdfs/issue12120_reduced.pdf",
"md5": "b4570dcee26ac3121ad3322e19ed1a6a",
"rounds": 1,
"type": "eq"
},
{ "id": "issue4883", { "id": "issue4883",
"file": "pdfs/issue4883.pdf", "file": "pdfs/issue4883.pdf",
"md5": "2fac0d9a189ca5fcef8626153d050be8", "md5": "2fac0d9a189ca5fcef8626153d050be8",