Add unicode mapping in the font cmap to have correct chars when printing in pdf (bug 1778484)

It aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1778484.
This commit is contained in:
Calixte Denizet 2022-07-10 19:52:44 +02:00
parent 220f980e12
commit 680c293c34

View File

@ -433,11 +433,14 @@ function convertCidString(charCode, cid, shouldThrow = false) {
* font that we build * font that we build
* 'charCodeToGlyphId' - maps the new font char codes to glyph ids * 'charCodeToGlyphId' - maps the new font char codes to glyph ids
*/ */
function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) { function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId, toUnicode) {
const newMap = Object.create(null); const newMap = Object.create(null);
const toUnicodeExtraMap = new Map();
const toFontChar = []; const toFontChar = [];
const usedGlyphIds = new Set();
let privateUseAreaIndex = 0; let privateUseAreaIndex = 0;
let nextAvailableFontCharCode = PRIVATE_USE_AREAS[privateUseAreaIndex][0]; const privateUseOffetStart = PRIVATE_USE_AREAS[privateUseAreaIndex][0];
let nextAvailableFontCharCode = privateUseOffetStart;
let privateUseOffetEnd = PRIVATE_USE_AREAS[privateUseAreaIndex][1]; let privateUseOffetEnd = PRIVATE_USE_AREAS[privateUseAreaIndex][1];
for (let originalCharCode in charCodeToGlyphId) { for (let originalCharCode in charCodeToGlyphId) {
originalCharCode |= 0; originalCharCode |= 0;
@ -461,17 +464,37 @@ function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) {
glyphId = newGlyphZeroId; glyphId = newGlyphZeroId;
} }
// Fix for bug 1778484:
// The charcodes are moved into a private use area to fix some rendering
// issues (https://github.com/mozilla/pdf.js/pull/9340) but when printing
// to PDF the generated font will contain wrong chars. We can avoid that by
// adding the unicode to the cmap and the print backend will then map the
// glyph ids to the correct unicode.
let unicode = toUnicode.get(originalCharCode);
if (typeof unicode === "string") {
unicode = unicode.codePointAt(0);
}
if (
unicode &&
unicode < privateUseOffetStart &&
!usedGlyphIds.has(glyphId)
) {
toUnicodeExtraMap.set(unicode, glyphId);
usedGlyphIds.add(glyphId);
}
newMap[fontCharCode] = glyphId; newMap[fontCharCode] = glyphId;
toFontChar[originalCharCode] = fontCharCode; toFontChar[originalCharCode] = fontCharCode;
} }
return { return {
toFontChar, toFontChar,
charCodeToGlyphId: newMap, charCodeToGlyphId: newMap,
toUnicodeExtraMap,
nextAvailableFontCharCode, nextAvailableFontCharCode,
}; };
} }
function getRanges(glyphs, numGlyphs) { function getRanges(glyphs, toUnicodeExtraMap, numGlyphs) {
// Array.sort() sorts by characters, not numerically, so convert to an // Array.sort() sorts by characters, not numerically, so convert to an
// array of characters. // array of characters.
const codes = []; const codes = [];
@ -482,6 +505,14 @@ function getRanges(glyphs, numGlyphs) {
} }
codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] }); codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] });
} }
if (toUnicodeExtraMap) {
for (const [unicode, glyphId] of toUnicodeExtraMap) {
if (glyphId >= numGlyphs) {
continue;
}
codes.push({ fontCharCode: unicode, glyphId });
}
}
// Some fonts have zero glyphs and are used only for text selection, but // Some fonts have zero glyphs and are used only for text selection, but
// there needs to be at least one to build a valid cmap table. // there needs to be at least one to build a valid cmap table.
if (codes.length === 0) { if (codes.length === 0) {
@ -513,8 +544,8 @@ function getRanges(glyphs, numGlyphs) {
return ranges; return ranges;
} }
function createCmapTable(glyphs, numGlyphs) { function createCmapTable(glyphs, toUnicodeExtraMap, numGlyphs) {
const ranges = getRanges(glyphs, numGlyphs); const ranges = getRanges(glyphs, toUnicodeExtraMap, numGlyphs);
const numTables = ranges.at(-1)[1] > 0xffff ? 2 : 1; const numTables = ranges.at(-1)[1] > 0xffff ? 2 : 1;
let cmap = let cmap =
"\x00\x00" + // version "\x00\x00" + // version
@ -2914,12 +2945,17 @@ class Font {
const newMapping = adjustMapping( const newMapping = adjustMapping(
charCodeToGlyphId, charCodeToGlyphId,
hasGlyph, hasGlyph,
glyphZeroId glyphZeroId,
this.toUnicode
); );
this.toFontChar = newMapping.toFontChar; this.toFontChar = newMapping.toFontChar;
tables.cmap = { tables.cmap = {
tag: "cmap", tag: "cmap",
data: createCmapTable(newMapping.charCodeToGlyphId, numGlyphsOut), data: createCmapTable(
newMapping.charCodeToGlyphId,
newMapping.toUnicodeExtraMap,
numGlyphsOut
),
}; };
if (!tables["OS/2"] || !validateOS2Table(tables["OS/2"], font)) { if (!tables["OS/2"] || !validateOS2Table(tables["OS/2"], font)) {
@ -2992,6 +3028,7 @@ class Font {
const mapping = font.getGlyphMapping(properties); const mapping = font.getGlyphMapping(properties);
let newMapping = null; let newMapping = null;
let newCharCodeToGlyphId = mapping; let newCharCodeToGlyphId = mapping;
let toUnicodeExtraMap = null;
// When `cssFontInfo` is set, the font is used to render text in the HTML // When `cssFontInfo` is set, the font is used to render text in the HTML
// view (e.g. with Xfa) so nothing must be moved in the private use area. // view (e.g. with Xfa) so nothing must be moved in the private use area.
@ -2999,10 +3036,12 @@ class Font {
newMapping = adjustMapping( newMapping = adjustMapping(
mapping, mapping,
font.hasGlyphId.bind(font), font.hasGlyphId.bind(font),
glyphZeroId glyphZeroId,
this.toUnicode
); );
this.toFontChar = newMapping.toFontChar; this.toFontChar = newMapping.toFontChar;
newCharCodeToGlyphId = newMapping.charCodeToGlyphId; newCharCodeToGlyphId = newMapping.charCodeToGlyphId;
toUnicodeExtraMap = newMapping.toUnicodeExtraMap;
} }
const numGlyphs = font.numGlyphs; const numGlyphs = font.numGlyphs;
@ -3087,7 +3126,10 @@ class Font {
// OS/2 and Windows Specific metrics // OS/2 and Windows Specific metrics
builder.addTable("OS/2", createOS2Table(properties, newCharCodeToGlyphId)); builder.addTable("OS/2", createOS2Table(properties, newCharCodeToGlyphId));
// Character to glyphs mapping // Character to glyphs mapping
builder.addTable("cmap", createCmapTable(newCharCodeToGlyphId, numGlyphs)); builder.addTable(
"cmap",
createCmapTable(newCharCodeToGlyphId, toUnicodeExtraMap, numGlyphs)
);
// Font header // Font header
builder.addTable( builder.addTable(
"head", "head",