Merge pull request #15934 from Snuffleupagus/issue-15910-2

Fallback to using the `name` table to infer the encoding for TrueType fonts missing such data (issue 15910)
This commit is contained in:
Jonas Jenwald 2023-01-17 23:01:23 +01:00 committed by GitHub
commit 42b1cff7b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 13 deletions

View File

@ -45,6 +45,7 @@ import {
MacRomanEncoding,
StandardEncoding,
SymbolSetEncoding,
WinAnsiEncoding,
ZapfDingbatsEncoding,
} from "./encodings.js";
import {
@ -133,7 +134,56 @@ function adjustWidths(properties) {
properties.defaultWidth *= scale;
}
function adjustToUnicode(properties, builtInEncoding) {
function adjustTrueTypeToUnicode(properties, isSymbolicFont, nameRecords) {
if (properties.isInternalFont) {
return;
}
if (properties.hasIncludedToUnicodeMap) {
return; // The font dictionary has a `ToUnicode` entry.
}
if (properties.hasEncoding) {
return; // The font dictionary has an `Encoding` entry.
}
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
return;
}
if (!isSymbolicFont) {
return; // A non-symbolic font should default to `StandardEncoding`.
}
if (nameRecords.length === 0) {
return;
}
// Try to infer if the fallback encoding should really be `WinAnsiEncoding`.
if (properties.defaultEncoding === WinAnsiEncoding) {
return;
}
for (const r of nameRecords) {
if (!isWinNameRecord(r)) {
return; // Not Windows, hence `WinAnsiEncoding` wouldn't make sense.
}
}
const encoding = WinAnsiEncoding;
const toUnicode = [],
glyphsUnicodeMap = getGlyphsUnicode();
for (const charCode in encoding) {
const glyphName = encoding[charCode];
if (glyphName === "") {
continue;
}
const unicode = glyphsUnicodeMap[glyphName];
if (unicode === undefined) {
continue;
}
toUnicode[charCode] = String.fromCharCode(unicode);
}
if (toUnicode.length > 0) {
properties.toUnicode.amend(toUnicode);
}
}
function adjustType1ToUnicode(properties, builtInEncoding) {
if (properties.isInternalFont) {
return;
}
@ -170,7 +220,7 @@ function adjustToUnicode(properties, builtInEncoding) {
/**
* NOTE: This function should only be called at the *end* of font-parsing,
* after e.g. `adjustToUnicode` has run, to prevent any issues.
* after e.g. `adjustType1ToUnicode` has run, to prevent any issues.
*/
function amendFallbackToUnicode(properties) {
if (!properties.fallbackToUnicode) {
@ -409,6 +459,19 @@ function buildToFontChar(encoding, glyphsUnicodeMap, differences) {
return toFontChar;
}
// Please refer to:
// - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
function isMacNameRecord(r) {
return r.platform === 1 && r.encoding === 0 && r.language === 0;
}
// Please refer to:
// - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
// - https://learn.microsoft.com/en-us/typography/opentype/spec/name#windows-language-ids
function isWinNameRecord(r) {
return r.platform === 3 && r.encoding === 1 && r.language === 0x409;
}
function convertCidString(charCode, cid, shouldThrow = false) {
switch (cid.length) {
case 1:
@ -1377,7 +1440,7 @@ class Font {
'TrueType Collection font must contain a "name" table.'
);
}
const nameTable = readNameTable(potentialTables.name);
const [nameTable] = readNameTable(potentialTables.name);
for (let j = 0, jj = nameTable.length; j < jj; j++) {
for (let k = 0, kk = nameTable[j].length; k < kk; k++) {
@ -2186,18 +2249,18 @@ class Font {
const start = (font.start || 0) + nameTable.offset;
font.pos = start;
const names = [[], []];
const names = [[], []],
records = [];
const length = nameTable.length,
end = start + length;
const format = font.getUint16();
const FORMAT_0_HEADER_LENGTH = 6;
if (format !== 0 || length < FORMAT_0_HEADER_LENGTH) {
// unsupported name table format or table "too" small
return names;
return [names, records];
}
const numRecords = font.getUint16();
const stringsStart = font.getUint16();
const records = [];
const NAME_RECORD_LENGTH = 12;
let i, ii;
@ -2211,10 +2274,7 @@ class Font {
offset: font.getUint16(),
};
// using only Macintosh and Windows platform/encoding names
if (
(r.platform === 1 && r.encoding === 0 && r.language === 0) ||
(r.platform === 3 && r.encoding === 1 && r.language === 0x409)
) {
if (isMacNameRecord(r) || isWinNameRecord(r)) {
records.push(r);
}
}
@ -2240,7 +2300,7 @@ class Font {
names[0][nameIndex] = font.getString(record.length);
}
}
return names;
return [names, records];
}
// prettier-ignore
@ -2996,9 +3056,16 @@ class Font {
};
} else {
// ... using existing 'name' table as prototype
const namePrototype = readNameTable(tables.name);
const [namePrototype, nameRecords] = readNameTable(tables.name);
tables.name.data = createNameTable(name, namePrototype);
this.psName = namePrototype[0][6] || null;
if (!properties.composite) {
// For TrueType fonts that do not include `ToUnicode` or `Encoding`
// data, attempt to use the name-table to improve text selection.
adjustTrueTypeToUnicode(properties, this.isSymbolicFont, nameRecords);
}
}
const builder = new OpenTypeFileBuilder(header.version);
@ -3015,7 +3082,7 @@ class Font {
if (properties.builtInEncoding) {
// For Type1 fonts that do not include either `ToUnicode` or `Encoding`
// data, attempt to use the `builtInEncoding` to improve text selection.
adjustToUnicode(properties, properties.builtInEncoding);
adjustType1ToUnicode(properties, properties.builtInEncoding);
}
// Type 1 fonts have a notdef inserted at the beginning, so glyph 0

View File

@ -242,6 +242,7 @@
!bug1108301.pdf
!issue10301.pdf
!bug1157493.pdf
!issue15910.pdf
!issue4260_reduced.pdf
!bug1250079.pdf
!bug1473809.pdf

BIN
test/pdfs/issue15910.pdf Normal file

Binary file not shown.

View File

@ -3056,6 +3056,12 @@
"link": true,
"type": "eq"
},
{ "id": "issue15910",
"file": "pdfs/issue15910.pdf",
"md5": "6429d8490e11e226b1cbdf2033c04237",
"rounds": 1,
"type": "text"
},
{ "id": "issue7872",
"file": "pdfs/issue7872.pdf",
"md5": "81781dfecfcb7e9cd9cc7e60f8b747b7",