Merge pull request #15934 from Snuffleupagus/issue-15910-2
Fallback to using the `name` table to infer the encoding for TrueType fonts missing such data (issue 15910)
This commit is contained in:
		
						commit
						42b1cff7b0
					
				@ -45,6 +45,7 @@ import {
 | 
			
		||||
  MacRomanEncoding,
 | 
			
		||||
  StandardEncoding,
 | 
			
		||||
  SymbolSetEncoding,
 | 
			
		||||
  WinAnsiEncoding,
 | 
			
		||||
  ZapfDingbatsEncoding,
 | 
			
		||||
} from "./encodings.js";
 | 
			
		||||
import {
 | 
			
		||||
@ -133,7 +134,56 @@ function adjustWidths(properties) {
 | 
			
		||||
  properties.defaultWidth *= scale;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function adjustToUnicode(properties, builtInEncoding) {
 | 
			
		||||
function adjustTrueTypeToUnicode(properties, isSymbolicFont, nameRecords) {
 | 
			
		||||
  if (properties.isInternalFont) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  if (properties.hasIncludedToUnicodeMap) {
 | 
			
		||||
    return; // The font dictionary has a `ToUnicode` entry.
 | 
			
		||||
  }
 | 
			
		||||
  if (properties.hasEncoding) {
 | 
			
		||||
    return; // The font dictionary has an `Encoding` entry.
 | 
			
		||||
  }
 | 
			
		||||
  if (properties.toUnicode instanceof IdentityToUnicodeMap) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  if (!isSymbolicFont) {
 | 
			
		||||
    return; // A non-symbolic font should default to `StandardEncoding`.
 | 
			
		||||
  }
 | 
			
		||||
  if (nameRecords.length === 0) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Try to infer if the fallback encoding should really be `WinAnsiEncoding`.
 | 
			
		||||
  if (properties.defaultEncoding === WinAnsiEncoding) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  for (const r of nameRecords) {
 | 
			
		||||
    if (!isWinNameRecord(r)) {
 | 
			
		||||
      return; // Not Windows, hence `WinAnsiEncoding` wouldn't make sense.
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  const encoding = WinAnsiEncoding;
 | 
			
		||||
 | 
			
		||||
  const toUnicode = [],
 | 
			
		||||
    glyphsUnicodeMap = getGlyphsUnicode();
 | 
			
		||||
  for (const charCode in encoding) {
 | 
			
		||||
    const glyphName = encoding[charCode];
 | 
			
		||||
    if (glyphName === "") {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    const unicode = glyphsUnicodeMap[glyphName];
 | 
			
		||||
    if (unicode === undefined) {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    toUnicode[charCode] = String.fromCharCode(unicode);
 | 
			
		||||
  }
 | 
			
		||||
  if (toUnicode.length > 0) {
 | 
			
		||||
    properties.toUnicode.amend(toUnicode);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function adjustType1ToUnicode(properties, builtInEncoding) {
 | 
			
		||||
  if (properties.isInternalFont) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
@ -170,7 +220,7 @@ function adjustToUnicode(properties, builtInEncoding) {
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * NOTE: This function should only be called at the *end* of font-parsing,
 | 
			
		||||
 *       after e.g. `adjustToUnicode` has run, to prevent any issues.
 | 
			
		||||
 *       after e.g. `adjustType1ToUnicode` has run, to prevent any issues.
 | 
			
		||||
 */
 | 
			
		||||
function amendFallbackToUnicode(properties) {
 | 
			
		||||
  if (!properties.fallbackToUnicode) {
 | 
			
		||||
@ -409,6 +459,19 @@ function buildToFontChar(encoding, glyphsUnicodeMap, differences) {
 | 
			
		||||
  return toFontChar;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Please refer to:
 | 
			
		||||
//  - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
 | 
			
		||||
function isMacNameRecord(r) {
 | 
			
		||||
  return r.platform === 1 && r.encoding === 0 && r.language === 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Please refer to:
 | 
			
		||||
//  - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
 | 
			
		||||
//  - https://learn.microsoft.com/en-us/typography/opentype/spec/name#windows-language-ids
 | 
			
		||||
function isWinNameRecord(r) {
 | 
			
		||||
  return r.platform === 3 && r.encoding === 1 && r.language === 0x409;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function convertCidString(charCode, cid, shouldThrow = false) {
 | 
			
		||||
  switch (cid.length) {
 | 
			
		||||
    case 1:
 | 
			
		||||
@ -1377,7 +1440,7 @@ class Font {
 | 
			
		||||
            'TrueType Collection font must contain a "name" table.'
 | 
			
		||||
          );
 | 
			
		||||
        }
 | 
			
		||||
        const nameTable = readNameTable(potentialTables.name);
 | 
			
		||||
        const [nameTable] = readNameTable(potentialTables.name);
 | 
			
		||||
 | 
			
		||||
        for (let j = 0, jj = nameTable.length; j < jj; j++) {
 | 
			
		||||
          for (let k = 0, kk = nameTable[j].length; k < kk; k++) {
 | 
			
		||||
@ -2186,18 +2249,18 @@ class Font {
 | 
			
		||||
      const start = (font.start || 0) + nameTable.offset;
 | 
			
		||||
      font.pos = start;
 | 
			
		||||
 | 
			
		||||
      const names = [[], []];
 | 
			
		||||
      const names = [[], []],
 | 
			
		||||
        records = [];
 | 
			
		||||
      const length = nameTable.length,
 | 
			
		||||
        end = start + length;
 | 
			
		||||
      const format = font.getUint16();
 | 
			
		||||
      const FORMAT_0_HEADER_LENGTH = 6;
 | 
			
		||||
      if (format !== 0 || length < FORMAT_0_HEADER_LENGTH) {
 | 
			
		||||
        // unsupported name table format or table "too" small
 | 
			
		||||
        return names;
 | 
			
		||||
        return [names, records];
 | 
			
		||||
      }
 | 
			
		||||
      const numRecords = font.getUint16();
 | 
			
		||||
      const stringsStart = font.getUint16();
 | 
			
		||||
      const records = [];
 | 
			
		||||
      const NAME_RECORD_LENGTH = 12;
 | 
			
		||||
      let i, ii;
 | 
			
		||||
 | 
			
		||||
@ -2211,10 +2274,7 @@ class Font {
 | 
			
		||||
          offset: font.getUint16(),
 | 
			
		||||
        };
 | 
			
		||||
        // using only Macintosh and Windows platform/encoding names
 | 
			
		||||
        if (
 | 
			
		||||
          (r.platform === 1 && r.encoding === 0 && r.language === 0) ||
 | 
			
		||||
          (r.platform === 3 && r.encoding === 1 && r.language === 0x409)
 | 
			
		||||
        ) {
 | 
			
		||||
        if (isMacNameRecord(r) || isWinNameRecord(r)) {
 | 
			
		||||
          records.push(r);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
@ -2240,7 +2300,7 @@ class Font {
 | 
			
		||||
          names[0][nameIndex] = font.getString(record.length);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      return names;
 | 
			
		||||
      return [names, records];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // prettier-ignore
 | 
			
		||||
@ -2996,9 +3056,16 @@ class Font {
 | 
			
		||||
      };
 | 
			
		||||
    } else {
 | 
			
		||||
      // ... using existing 'name' table as prototype
 | 
			
		||||
      const namePrototype = readNameTable(tables.name);
 | 
			
		||||
      const [namePrototype, nameRecords] = readNameTable(tables.name);
 | 
			
		||||
 | 
			
		||||
      tables.name.data = createNameTable(name, namePrototype);
 | 
			
		||||
      this.psName = namePrototype[0][6] || null;
 | 
			
		||||
 | 
			
		||||
      if (!properties.composite) {
 | 
			
		||||
        // For TrueType fonts that do not include `ToUnicode` or `Encoding`
 | 
			
		||||
        // data, attempt to use the name-table to improve text selection.
 | 
			
		||||
        adjustTrueTypeToUnicode(properties, this.isSymbolicFont, nameRecords);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const builder = new OpenTypeFileBuilder(header.version);
 | 
			
		||||
@ -3015,7 +3082,7 @@ class Font {
 | 
			
		||||
    if (properties.builtInEncoding) {
 | 
			
		||||
      // For Type1 fonts that do not include either `ToUnicode` or `Encoding`
 | 
			
		||||
      // data, attempt to use the `builtInEncoding` to improve text selection.
 | 
			
		||||
      adjustToUnicode(properties, properties.builtInEncoding);
 | 
			
		||||
      adjustType1ToUnicode(properties, properties.builtInEncoding);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Type 1 fonts have a notdef inserted at the beginning, so glyph 0
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							@ -242,6 +242,7 @@
 | 
			
		||||
!bug1108301.pdf
 | 
			
		||||
!issue10301.pdf
 | 
			
		||||
!bug1157493.pdf
 | 
			
		||||
!issue15910.pdf
 | 
			
		||||
!issue4260_reduced.pdf
 | 
			
		||||
!bug1250079.pdf
 | 
			
		||||
!bug1473809.pdf
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								test/pdfs/issue15910.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								test/pdfs/issue15910.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -3056,6 +3056,12 @@
 | 
			
		||||
       "link": true,
 | 
			
		||||
       "type": "eq"
 | 
			
		||||
    },
 | 
			
		||||
    {  "id": "issue15910",
 | 
			
		||||
       "file": "pdfs/issue15910.pdf",
 | 
			
		||||
       "md5": "6429d8490e11e226b1cbdf2033c04237",
 | 
			
		||||
       "rounds": 1,
 | 
			
		||||
       "type": "text"
 | 
			
		||||
    },
 | 
			
		||||
    {  "id": "issue7872",
 | 
			
		||||
       "file": "pdfs/issue7872.pdf",
 | 
			
		||||
       "md5": "81781dfecfcb7e9cd9cc7e60f8b747b7",
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user