Merge pull request #12259 from brendandahl/cmap-fix

Fix handling of symbolic fonts and unicode cmaps.
This commit is contained in:
Tim van der Meij 2020-08-30 16:01:24 +02:00 committed by GitHub
commit 06b53d770a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 39 deletions

View File

@ -1610,7 +1610,12 @@ var Font = (function FontClosure() {
continue;
}
if (platformId === 0 && encodingId === 0) {
if (
platformId === 0 &&
(encodingId === /* Unicode Default */ 0 ||
encodingId === /* Unicode 1.1 */ 1 ||
encodingId === /* Unicode BMP */ 3)
) {
useTable = true;
// Continue the loop since there still may be a higher priority
// table.
@ -2792,32 +2797,24 @@ var Font = (function FontClosure() {
var cmapEncodingId = cmapTable.encodingId;
var cmapMappings = cmapTable.mappings;
var cmapMappingsLength = cmapMappings.length;
// The spec seems to imply that if the font is symbolic the encoding
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
// where the the font is symbolic and it has an encoding.
let baseEncoding = [];
if (
(properties.hasEncoding &&
((cmapPlatformId === 3 && cmapEncodingId === 1) ||
(cmapPlatformId === 1 && cmapEncodingId === 0))) ||
(cmapPlatformId === -1 &&
cmapEncodingId === -1 && // Temporary hack
!!getEncoding(properties.baseEncodingName))
properties.hasEncoding &&
(properties.baseEncodingName === "MacRomanEncoding" ||
properties.baseEncodingName === "WinAnsiEncoding")
) {
// Temporary hack
// When no preferred cmap table was found and |baseEncodingName| is
// one of the predefined encodings, we seem to obtain a better
// |charCodeToGlyphId| map from the code below (fixes bug 1057544).
// TODO: Note that this is a hack which should be removed as soon as
// we have proper support for more exotic cmap tables.
baseEncoding = getEncoding(properties.baseEncodingName);
}
var baseEncoding = [];
if (
properties.baseEncodingName === "MacRomanEncoding" ||
properties.baseEncodingName === "WinAnsiEncoding"
) {
baseEncoding = getEncoding(properties.baseEncodingName);
}
// If the font has an encoding and is not symbolic then follow the
// rules in section 9.6.6.4 of the spec on how to map 3,1 and 1,0
// cmaps.
if (
properties.hasEncoding &&
!this.isSymbolicFont &&
((cmapPlatformId === 3 && cmapEncodingId === 1) ||
(cmapPlatformId === 1 && cmapEncodingId === 0))
) {
var glyphsUnicodeMap = getGlyphsUnicode();
for (let charCode = 0; charCode < 256; charCode++) {
var glyphName, standardGlyphName;
@ -2845,29 +2842,15 @@ var Font = (function FontClosure() {
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
}
var found = false;
for (let i = 0; i < cmapMappingsLength; ++i) {
if (cmapMappings[i].charCode !== unicodeOrCharCode) {
continue;
}
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
found = true;
break;
}
if (!found && properties.glyphNames) {
// Try to map using the post table.
var glyphId = properties.glyphNames.indexOf(glyphName);
// The post table ought to use the same kind of glyph names as the
// `differences` array, but check the standard ones as a fallback.
if (glyphId === -1 && standardGlyphName !== glyphName) {
glyphId = properties.glyphNames.indexOf(standardGlyphName);
}
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[charCode] = glyphId;
}
}
}
} else if (cmapPlatformId === 0 && cmapEncodingId === 0) {
} else if (cmapPlatformId === 0) {
// Default Unicode semantics, use the charcodes as is.
for (let i = 0; i < cmapMappingsLength; ++i) {
charCodeToGlyphId[cmapMappings[i].charCode] =
@ -2897,6 +2880,19 @@ var Font = (function FontClosure() {
charCodeToGlyphId[charCode] = cmapMappings[i].glyphId;
}
}
// Last, try to map any missing charcodes using the post table.
if (properties.glyphNames && baseEncoding.length) {
for (let i = 0; i < 256; ++i) {
if (charCodeToGlyphId[i] === undefined && baseEncoding[i]) {
glyphName = baseEncoding[i];
const glyphId = properties.glyphNames.indexOf(glyphName);
if (glyphId > 0 && hasGlyph(glyphId)) {
charCodeToGlyphId[i] = glyphId;
}
}
}
}
}
if (charCodeToGlyphId.length === 0) {

View File

@ -88,6 +88,7 @@
!issue10665_reduced.pdf
!issue11016_reduced.pdf
!issue11045.pdf
!bug1057544.pdf
!issue11150_reduced.pdf
!issue11242_reduced.pdf
!issue11279.pdf
@ -192,6 +193,7 @@
!issue4260_reduced.pdf
!bug1250079.pdf
!bug1473809.pdf
!issue12120_reduced.pdf
!pdfjsbad1586.pdf
!freeculture.pdf
!issue6006.pdf

BIN
test/pdfs/bug1057544.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1991,6 +1991,12 @@
"type": "eq",
"about": "MediaBox and CropBox with indirect objects."
},
{ "id": "bug1057544",
"file": "pdfs/bug1057544.pdf",
"md5": "49ad71b82ead1ee0fe4ddb41aa9e30b4",
"rounds": 1,
"type": "eq"
},
{ "id": "issue2642",
"file": "pdfs/issue2642.pdf",
"md5": "b6679861fdce3bbab0c1fa51bb7f5077",
@ -4204,6 +4210,12 @@
"lastPage": 2,
"type": "eq"
},
{ "id": "issue12120_reduced",
"file": "pdfs/issue12120_reduced.pdf",
"md5": "b4570dcee26ac3121ad3322e19ed1a6a",
"rounds": 1,
"type": "eq"
},
{ "id": "issue4883",
"file": "pdfs/issue4883.pdf",
"md5": "2fac0d9a189ca5fcef8626153d050be8",