Merge pull request #15157 from calixteman/1778484

Add unicode mapping in the font cmap to have correct chars when printing in pdf (bug 1778484)
This commit is contained in:
calixteman 2022-07-13 14:45:12 +02:00 committed by GitHub
commit 41b2f52f70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -433,11 +433,14 @@ function convertCidString(charCode, cid, shouldThrow = false) {
* font that we build
* 'charCodeToGlyphId' - maps the new font char codes to glyph ids
*/
function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) {
function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId, toUnicode) {
const newMap = Object.create(null);
const toUnicodeExtraMap = new Map();
const toFontChar = [];
const usedGlyphIds = new Set();
let privateUseAreaIndex = 0;
let nextAvailableFontCharCode = PRIVATE_USE_AREAS[privateUseAreaIndex][0];
const privateUseOffetStart = PRIVATE_USE_AREAS[privateUseAreaIndex][0];
let nextAvailableFontCharCode = privateUseOffetStart;
let privateUseOffetEnd = PRIVATE_USE_AREAS[privateUseAreaIndex][1];
for (let originalCharCode in charCodeToGlyphId) {
originalCharCode |= 0;
@ -461,17 +464,37 @@ function adjustMapping(charCodeToGlyphId, hasGlyph, newGlyphZeroId) {
glyphId = newGlyphZeroId;
}
// Fix for bug 1778484:
// The charcodes are moved into a private use area to fix some rendering
// issues (https://github.com/mozilla/pdf.js/pull/9340) but when printing
// to PDF the generated font will contain wrong chars. We can avoid that by
// adding the unicode to the cmap and the print backend will then map the
// glyph ids to the correct unicode.
let unicode = toUnicode.get(originalCharCode);
if (typeof unicode === "string") {
unicode = unicode.codePointAt(0);
}
if (
unicode &&
unicode < privateUseOffetStart &&
!usedGlyphIds.has(glyphId)
) {
toUnicodeExtraMap.set(unicode, glyphId);
usedGlyphIds.add(glyphId);
}
newMap[fontCharCode] = glyphId;
toFontChar[originalCharCode] = fontCharCode;
}
return {
toFontChar,
charCodeToGlyphId: newMap,
toUnicodeExtraMap,
nextAvailableFontCharCode,
};
}
function getRanges(glyphs, numGlyphs) {
function getRanges(glyphs, toUnicodeExtraMap, numGlyphs) {
// Array.sort() sorts by characters, not numerically, so convert to an
// array of characters.
const codes = [];
@ -482,6 +505,14 @@ function getRanges(glyphs, numGlyphs) {
}
codes.push({ fontCharCode: charCode | 0, glyphId: glyphs[charCode] });
}
if (toUnicodeExtraMap) {
for (const [unicode, glyphId] of toUnicodeExtraMap) {
if (glyphId >= numGlyphs) {
continue;
}
codes.push({ fontCharCode: unicode, glyphId });
}
}
// Some fonts have zero glyphs and are used only for text selection, but
// there needs to be at least one to build a valid cmap table.
if (codes.length === 0) {
@ -513,8 +544,8 @@ function getRanges(glyphs, numGlyphs) {
return ranges;
}
function createCmapTable(glyphs, numGlyphs) {
const ranges = getRanges(glyphs, numGlyphs);
function createCmapTable(glyphs, toUnicodeExtraMap, numGlyphs) {
const ranges = getRanges(glyphs, toUnicodeExtraMap, numGlyphs);
const numTables = ranges.at(-1)[1] > 0xffff ? 2 : 1;
let cmap =
"\x00\x00" + // version
@ -2914,12 +2945,17 @@ class Font {
const newMapping = adjustMapping(
charCodeToGlyphId,
hasGlyph,
glyphZeroId
glyphZeroId,
this.toUnicode
);
this.toFontChar = newMapping.toFontChar;
tables.cmap = {
tag: "cmap",
data: createCmapTable(newMapping.charCodeToGlyphId, numGlyphsOut),
data: createCmapTable(
newMapping.charCodeToGlyphId,
newMapping.toUnicodeExtraMap,
numGlyphsOut
),
};
if (!tables["OS/2"] || !validateOS2Table(tables["OS/2"], font)) {
@ -2992,6 +3028,7 @@ class Font {
const mapping = font.getGlyphMapping(properties);
let newMapping = null;
let newCharCodeToGlyphId = mapping;
let toUnicodeExtraMap = null;
// When `cssFontInfo` is set, the font is used to render text in the HTML
// view (e.g. with Xfa) so nothing must be moved in the private use area.
@ -2999,10 +3036,12 @@ class Font {
newMapping = adjustMapping(
mapping,
font.hasGlyphId.bind(font),
glyphZeroId
glyphZeroId,
this.toUnicode
);
this.toFontChar = newMapping.toFontChar;
newCharCodeToGlyphId = newMapping.charCodeToGlyphId;
toUnicodeExtraMap = newMapping.toUnicodeExtraMap;
}
const numGlyphs = font.numGlyphs;
@ -3087,7 +3126,10 @@ class Font {
// OS/2 and Windows Specific metrics
builder.addTable("OS/2", createOS2Table(properties, newCharCodeToGlyphId));
// Character to glyphs mapping
builder.addTable("cmap", createCmapTable(newCharCodeToGlyphId, numGlyphs));
builder.addTable(
"cmap",
createCmapTable(newCharCodeToGlyphId, toUnicodeExtraMap, numGlyphs)
);
// Font header
builder.addTable(
"head",