diff --git a/src/core/fonts.js b/src/core/fonts.js index b0c35cd98..964a80df2 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -45,6 +45,7 @@ import { MacRomanEncoding, StandardEncoding, SymbolSetEncoding, + WinAnsiEncoding, ZapfDingbatsEncoding, } from "./encodings.js"; import { @@ -133,7 +134,56 @@ function adjustWidths(properties) { properties.defaultWidth *= scale; } -function adjustToUnicode(properties, builtInEncoding) { +function adjustTrueTypeToUnicode(properties, isSymbolicFont, nameRecords) { + if (properties.isInternalFont) { + return; + } + if (properties.hasIncludedToUnicodeMap) { + return; // The font dictionary has a `ToUnicode` entry. + } + if (properties.hasEncoding) { + return; // The font dictionary has an `Encoding` entry. + } + if (properties.toUnicode instanceof IdentityToUnicodeMap) { + return; + } + if (!isSymbolicFont) { + return; // A non-symbolic font should default to `StandardEncoding`. + } + if (nameRecords.length === 0) { + return; + } + + // Try to infer if the fallback encoding should really be `WinAnsiEncoding`. + if (properties.defaultEncoding === WinAnsiEncoding) { + return; + } + for (const r of nameRecords) { + if (!isWinNameRecord(r)) { + return; // Not Windows, hence `WinAnsiEncoding` wouldn't make sense. + } + } + const encoding = WinAnsiEncoding; + + const toUnicode = [], + glyphsUnicodeMap = getGlyphsUnicode(); + for (const charCode in encoding) { + const glyphName = encoding[charCode]; + if (glyphName === "") { + continue; + } + const unicode = glyphsUnicodeMap[glyphName]; + if (unicode === undefined) { + continue; + } + toUnicode[charCode] = String.fromCharCode(unicode); + } + if (toUnicode.length > 0) { + properties.toUnicode.amend(toUnicode); + } +} + +function adjustType1ToUnicode(properties, builtInEncoding) { if (properties.isInternalFont) { return; } @@ -170,7 +220,7 @@ function adjustToUnicode(properties, builtInEncoding) { /** * NOTE: This function should only be called at the *end* of font-parsing, - * after e.g. `adjustToUnicode` has run, to prevent any issues. + * after e.g. `adjustType1ToUnicode` has run, to prevent any issues. */ function amendFallbackToUnicode(properties) { if (!properties.fallbackToUnicode) { @@ -409,6 +459,19 @@ function buildToFontChar(encoding, glyphsUnicodeMap, differences) { return toFontChar; } +// Please refer to: +// - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html +function isMacNameRecord(r) { + return r.platform === 1 && r.encoding === 0 && r.language === 0; +} + +// Please refer to: +// - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html +// - https://learn.microsoft.com/en-us/typography/opentype/spec/name#windows-language-ids +function isWinNameRecord(r) { + return r.platform === 3 && r.encoding === 1 && r.language === 0x409; +} + function convertCidString(charCode, cid, shouldThrow = false) { switch (cid.length) { case 1: @@ -1377,7 +1440,7 @@ class Font { 'TrueType Collection font must contain a "name" table.' ); } - const nameTable = readNameTable(potentialTables.name); + const [nameTable] = readNameTable(potentialTables.name); for (let j = 0, jj = nameTable.length; j < jj; j++) { for (let k = 0, kk = nameTable[j].length; k < kk; k++) { @@ -2186,18 +2249,18 @@ class Font { const start = (font.start || 0) + nameTable.offset; font.pos = start; - const names = [[], []]; + const names = [[], []], + records = []; const length = nameTable.length, end = start + length; const format = font.getUint16(); const FORMAT_0_HEADER_LENGTH = 6; if (format !== 0 || length < FORMAT_0_HEADER_LENGTH) { // unsupported name table format or table "too" small - return names; + return [names, records]; } const numRecords = font.getUint16(); const stringsStart = font.getUint16(); - const records = []; const NAME_RECORD_LENGTH = 12; let i, ii; @@ -2211,10 +2274,7 @@ class Font { offset: font.getUint16(), }; // using only Macintosh and Windows platform/encoding names - if ( - (r.platform === 1 && r.encoding === 0 && r.language === 0) || - (r.platform === 3 && r.encoding === 1 && r.language === 0x409) - ) { + if (isMacNameRecord(r) || isWinNameRecord(r)) { records.push(r); } } @@ -2240,7 +2300,7 @@ class Font { names[0][nameIndex] = font.getString(record.length); } } - return names; + return [names, records]; } // prettier-ignore @@ -2996,9 +3056,16 @@ class Font { }; } else { // ... using existing 'name' table as prototype - const namePrototype = readNameTable(tables.name); + const [namePrototype, nameRecords] = readNameTable(tables.name); + tables.name.data = createNameTable(name, namePrototype); this.psName = namePrototype[0][6] || null; + + if (!properties.composite) { + // For TrueType fonts that do not include `ToUnicode` or `Encoding` + // data, attempt to use the name-table to improve text selection. + adjustTrueTypeToUnicode(properties, this.isSymbolicFont, nameRecords); + } } const builder = new OpenTypeFileBuilder(header.version); @@ -3015,7 +3082,7 @@ class Font { if (properties.builtInEncoding) { // For Type1 fonts that do not include either `ToUnicode` or `Encoding` // data, attempt to use the `builtInEncoding` to improve text selection. - adjustToUnicode(properties, properties.builtInEncoding); + adjustType1ToUnicode(properties, properties.builtInEncoding); } // Type 1 fonts have a notdef inserted at the beginning, so glyph 0 diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 4f04657ec..a7aed6e8b 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -242,6 +242,7 @@ !bug1108301.pdf !issue10301.pdf !bug1157493.pdf +!issue15910.pdf !issue4260_reduced.pdf !bug1250079.pdf !bug1473809.pdf diff --git a/test/pdfs/issue15910.pdf b/test/pdfs/issue15910.pdf new file mode 100644 index 000000000..86205a62e Binary files /dev/null and b/test/pdfs/issue15910.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 39b297caa..accc73184 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3056,6 +3056,12 @@ "link": true, "type": "eq" }, + { "id": "issue15910", + "file": "pdfs/issue15910.pdf", + "md5": "6429d8490e11e226b1cbdf2033c04237", + "rounds": 1, + "type": "text" + }, { "id": "issue7872", "file": "pdfs/issue7872.pdf", "md5": "81781dfecfcb7e9cd9cc7e60f8b747b7",