From 680c293c3416a7c33274340641abbd9fafb1a66c Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Sun, 10 Jul 2022 19:52:44 +0200 Subject: [PATCH] Add unicode mapping in the font cmap to have correct chars when printing in pdf (bug 1778484) It aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1778484. --- src/core/fonts.js | 60 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/src/core/fonts.js b/src/core/fonts.js index 759a7acad..4c05af460 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -433,11 +433,14 @@ function convertCidString(charCode, cid, shouldThrow = false) { * font that we build * 'charCodeToGlyphId' - maps the new font char codes to glyph ids */ -function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) { +function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId, toUnicode) { const newMap = Object.create(null); + const toUnicodeExtraMap = new Map(); const toFontChar = []; + const usedGlyphIds = new Set(); let privateUseAreaIndex = 0; - let nextAvailableFontCharCode = PRIVATE_USE_AREAS[privateUseAreaIndex][0]; + const privateUseOffetStart = PRIVATE_USE_AREAS[privateUseAreaIndex][0]; + let nextAvailableFontCharCode = privateUseOffetStart; let privateUseOffetEnd = PRIVATE_USE_AREAS[privateUseAreaIndex][1]; for (let originalCharCode in charCodeToGlyphId) { originalCharCode |= 0; @@ -461,17 +464,37 @@ function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) { glyphId = newGlyphZeroId; } + // Fix for bug 1778484: + // The charcodes are moved into a private use area to fix some rendering + // issues (https://github.com/mozilla/pdf.js/pull/9340) but when printing + // to PDF the generated font will contain wrong chars. We can avoid that by + // adding the unicode to the cmap and the print backend will then map the + // glyph ids to the correct unicode. + let unicode = toUnicode.get(originalCharCode); + if (typeof unicode === "string") { + unicode = unicode.codePointAt(0); + } + if ( + unicode && + unicode < privateUseOffetStart && + !usedGlyphIds.has(glyphId) + ) { + toUnicodeExtraMap.set(unicode, glyphId); + usedGlyphIds.add(glyphId); + } + newMap[fontCharCode] = glyphId; toFontChar[originalCharCode] = fontCharCode; } return { toFontChar, charCodeToGlyphId: newMap, + toUnicodeExtraMap, nextAvailableFontCharCode, }; } -function getRanges(glyphs, numGlyphs) { +function getRanges(glyphs, toUnicodeExtraMap, numGlyphs) { // Array.sort() sorts by characters, not numerically, so convert to an // array of characters. const codes = []; @@ -482,6 +505,14 @@ function getRanges(glyphs, numGlyphs) { } codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] }); } + if (toUnicodeExtraMap) { + for (const [unicode, glyphId] of toUnicodeExtraMap) { + if (glyphId >= numGlyphs) { + continue; + } + codes.push({ fontCharCode: unicode, glyphId }); + } + } // Some fonts have zero glyphs and are used only for text selection, but // there needs to be at least one to build a valid cmap table. if (codes.length === 0) { @@ -513,8 +544,8 @@ function getRanges(glyphs, numGlyphs) { return ranges; } -function createCmapTable(glyphs, numGlyphs) { - const ranges = getRanges(glyphs, numGlyphs); +function createCmapTable(glyphs, toUnicodeExtraMap, numGlyphs) { + const ranges = getRanges(glyphs, toUnicodeExtraMap, numGlyphs); const numTables = ranges.at(-1)[1] > 0xffff ? 2 : 1; let cmap = "\x00\x00" + // version @@ -2914,12 +2945,17 @@ class Font { const newMapping = adjustMapping( charCodeToGlyphId, hasGlyph, - glyphZeroId + glyphZeroId, + this.toUnicode ); this.toFontChar = newMapping.toFontChar; tables.cmap = { tag: "cmap", - data: createCmapTable(newMapping.charCodeToGlyphId, numGlyphsOut), + data: createCmapTable( + newMapping.charCodeToGlyphId, + newMapping.toUnicodeExtraMap, + numGlyphsOut + ), }; if (!tables["OS/2"] || !validateOS2Table(tables["OS/2"], font)) { @@ -2992,6 +3028,7 @@ class Font { const mapping = font.getGlyphMapping(properties); let newMapping = null; let newCharCodeToGlyphId = mapping; + let toUnicodeExtraMap = null; // When `cssFontInfo` is set, the font is used to render text in the HTML // view (e.g. with Xfa) so nothing must be moved in the private use area. @@ -2999,10 +3036,12 @@ class Font { newMapping = adjustMapping( mapping, font.hasGlyphId.bind(font), - glyphZeroId + glyphZeroId, + this.toUnicode ); this.toFontChar = newMapping.toFontChar; newCharCodeToGlyphId = newMapping.charCodeToGlyphId; + toUnicodeExtraMap = newMapping.toUnicodeExtraMap; } const numGlyphs = font.numGlyphs; @@ -3087,7 +3126,10 @@ class Font { // OS/2 and Windows Specific metrics builder.addTable("OS/2", createOS2Table(properties, newCharCodeToGlyphId)); // Character to glyphs mapping - builder.addTable("cmap", createCmapTable(newCharCodeToGlyphId, numGlyphs)); + builder.addTable( + "cmap", + createCmapTable(newCharCodeToGlyphId, toUnicodeExtraMap, numGlyphs) + ); // Font header builder.addTable( "head",