From 567be2972025e209cccaaa3d74c32486a21cc6e5 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 8 Sep 2011 03:16:33 +0200 Subject: [PATCH 1/4] Add more glue between glyph mapping and code mapping --- fonts.js | 81 ++++++++++++++++++++++++++++---------------------------- pdf.js | 46 +++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 56 deletions(-) diff --git a/fonts.js b/fonts.js index 659c5157e..bfdbb0b4a 100755 --- a/fonts.js +++ b/fonts.js @@ -711,7 +711,7 @@ var Font = (function Font() { var encoding = properties.encoding; for (var index in encoding) { - var code = encoding[index]; + var code = encoding[index].unicode; if (firstCharIndex > code || !firstCharIndex) firstCharIndex = code; if (lastCharIndex < code) @@ -970,15 +970,9 @@ var Font = (function Font() { if (index) { deltas.push(index); - var code = encoding[index]; - for (var glyph in properties.glyphs) { - if (properties.glyphs[glyph] == code) - break; - } - var unicode = j + kCmapGlyphOffset; - properties.glyphs[glyph] = encoding[j] = unicode; - glyphs.push({ glyph: glyph, unicode: unicode }); + encoding[j].unicode = unicode; + glyphs.push({ unicode: unicode }); } } @@ -1023,8 +1017,10 @@ var Font = (function Font() { var start = denseRange[0]; var end = denseRange[1]; var index = firstCode; - for (var j = start; j <= end; j++) - encoding[index++] = glyphs[j - firstCode - 1].unicode; + for (var j = start; j <= end; j++) { + var code = j - firstCode - 1; + encoding[index++] = { unicode: glyphs[code].unicode }; + } return cmap.data = createCMapTable(glyphs); } } @@ -1118,23 +1114,6 @@ var Font = (function Font() { // U+00AD (soft hyphen) is not drawn. // So, offset all the glyphs by 0xFF to avoid these cases and use // the encoding to map incoming characters to the new glyph positions - - var glyphs = []; - var encoding = properties.encoding; - - for (var i = 1; i < numGlyphs; i++) - glyphs.push({ unicode: i + kCmapGlyphOffset }); - - if ('undefined' == typeof(encoding[0])) { - // the font is directly characters to glyphs with no encoding - // so create an identity encoding - for (i = 0; i < numGlyphs; i++) - encoding[i] = i + kCmapGlyphOffset; - } else { - for (var code in encoding) - encoding[code] += kCmapGlyphOffset; - } - if (!cmap) { cmap = { tag: 'cmap', @@ -1142,6 +1121,21 @@ var Font = (function Font() { }; tables.push(cmap); } + + var encoding = properties.encoding; + if (!encoding[0]) { + // the font is directly characters to glyphs with no encoding + // so create an identity encoding + for (i = 0; i < numGlyphs; i++) + encoding[i] = { unicode: i + kCmapGlyphOffset }; + } else { + for (var code in encoding) + encoding[code].unicode += kCmapGlyphOffset; + } + + var glyphs = []; + for (var i = 1; i < numGlyphs; i++) + glyphs.push({ unicode: i + kCmapGlyphOffset }); cmap.data = createCMapTable(glyphs); } else { replaceCMapTable(cmap, font, properties); @@ -1361,14 +1355,14 @@ var Font = (function Font() { // loop should never end on the last byte for (var i = 0; i < length; i++) { var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]); - var unicode = encoding[charcode]; + var unicode = encoding[charcode].unicode; str += String.fromCharCode(unicode); } } else { for (var i = 0; i < chars.length; ++i) { var charcode = chars.charCodeAt(i); - var unicode = encoding[charcode]; + var unicode = encoding[charcode].unicode; if ('undefined' == typeof(unicode)) { warn('Unencoded charcode ' + charcode); unicode = charcode; @@ -1376,7 +1370,7 @@ var Font = (function Font() { // Check if the glyph has already been converted if (!IsNum(unicode)) - unicode = encoding[charcode] = this.glyphs[unicode]; + unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode; // Handle surrogate pairs if (unicode > 0xFFFF) { @@ -1830,8 +1824,8 @@ var Type1Parser = function() { var glyph = getToken(); if ('undefined' == typeof(properties.differences[index])) { - properties.encoding[index] = glyph; - properties.glyphs[glyph] = GlyphsUnicode[glyph] || index; + var mapping = { unicode: GlyphsUnicode[glyph] || j }; + properties.glyphs[glyph] = properties.encoding[index] = mapping; } getToken(); // read the in 'put' } @@ -2000,14 +1994,14 @@ CFF.prototype = { for (var i = 0; i < glyphs.length; i++) { var glyph = glyphs[i]; - var unicode = properties.glyphs[glyph.glyph]; - if (!unicode) { + var mapping = properties.glyphs[glyph.glyph]; + if (!mapping) { if (glyph.glyph != '.notdef') missings.push(glyph.glyph); } else { charstrings.push({ glyph: glyph.glyph, - unicode: unicode, + unicode: mapping.unicode, charstring: glyph.data, width: glyph.width, lsb: glyph.lsb @@ -2340,17 +2334,24 @@ var Type2CFF = (function() { } } - if (code == -1) - index = code = properties.glyphs[glyph] || index; + if (code == -1) { + var mapping = properties.glyphs[glyph] || {}; + index = code = mapping.unicode || index; + } var width = widths[code] || defaultWidth; if (code <= 0x1f || (code >= 127 && code <= 255)) code += kCmapGlyphOffset; - properties.encoding[index] = code; + properties.glyphs[glyph] = properties.encoding[index] = { + unicode: code, + width: width + }; + charstrings.push({ unicode: code, - width: width, gid: i + width: width, + gid: i }); index++; } diff --git a/pdf.js b/pdf.js index 5b0558940..73575a4f3 100644 --- a/pdf.js +++ b/pdf.js @@ -4194,13 +4194,19 @@ var PartialEvaluator = (function() { var glyphsData = glyphsStream.getBytes(0); // Glyph ids are big-endian 2-byte values - // Set this to 0 to verify the font has an encoding. var encoding = properties.encoding; - encoding[0] = 0; + + // Set encoding 0 to later verify the font has an encoding + encoding[0] = { unicode: 0 }; for (var j = 0; j < glyphsData.length; j++) { var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; - if (glyphID != 0) - encoding[j >> 1] = glyphID; + if (glyphID == 0) + continue; + + encoding[j >> 1] = { + unicode: glyphID, + width: 0 + }; } } else if (type == 'CIDFontType0') { var encoding = xref.fetchIfRef(dict.get('Encoding')); @@ -4269,7 +4275,10 @@ var PartialEvaluator = (function() { var glyph = differences[i] || baseEncoding[i]; if (glyph) { var index = GlyphsUnicode[glyph] || i; - glyphs[glyph] = map[i] = index; + glyphs[glyph] = map[i] = { + unicode: index, + width: properties.widths[i - firstChar] || properties.defaultWidth + }; // If there is no file, the character mapping can't be modified // but this is unlikely that there is any standard encoding with @@ -4278,7 +4287,7 @@ var PartialEvaluator = (function() { continue; if (index <= 0x1f || (index >= 127 && index <= 255)) - glyphs[glyph] = map[i] += kCmapGlyphOffset; + map[i].unicode += kCmapGlyphOffset; } } @@ -4316,7 +4325,10 @@ var PartialEvaluator = (function() { var endRange = tokens[j + 1]; var code = tokens[j + 2]; while (startRange < endRange) { - map[startRange] = code++; + map[startRange] = { + unicode: code++, + width: 0 + } ++startRange; } } @@ -4327,7 +4339,10 @@ var PartialEvaluator = (function() { for (var j = 0; j < tokens.length; j += 2) { var index = tokens[j]; var code = tokens[j + 1]; - map[index] = code; + map[index] = { + unicode: code, + width: 0 + }; } break; @@ -4478,19 +4493,18 @@ var PartialEvaluator = (function() { descent: descriptor.get('Descent'), xHeight: descriptor.get('XHeight'), capHeight: descriptor.get('CapHeight'), + defaultWidth: descriptor.get('MissingWidth') || 0, flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), differences: [], - widths: [], + widths: (function() { + var glyphWidths = {}; + for (var i = 0; i <= widths.length; i++) + glyphWidths[firstChar++] = widths[i]; + return glyphWidths; + })(), encoding: {} }; - - // XXX Encoding and Glyphs should point to the same object so it will - // be hard to be out of sync. The object could contains the unicode and - // the width of the glyph. - for (var i = 0; i <= widths.length; i++) - properties.widths[firstChar++] = widths[i]; - properties.glyphs = this.extractEncoding(dict, xref, properties); return { From a7ef696fea4d4abdb2e70cda0c1814b255c56a93 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 8 Sep 2011 03:21:27 +0200 Subject: [PATCH 2/4] Add an empty 'widths' to the properties object for base font --- pdf.js | 1 + 1 file changed, 1 insertion(+) diff --git a/pdf.js b/pdf.js index 5960ab19d..40ffde688 100644 --- a/pdf.js +++ b/pdf.js @@ -4435,6 +4435,7 @@ var PartialEvaluator = (function() { type: type.name, encoding: map, differences: [], + widths: {}, firstChar: 0, lastChar: 256 }; From 71d0f0d55c4584e2731a9f0e573c183a87f39ea9 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 8 Sep 2011 13:03:30 +0200 Subject: [PATCH 3/4] Remove a useless check in charsToUnicode --- fonts.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fonts.js b/fonts.js index bfdbb0b4a..5622e84e4 100755 --- a/fonts.js +++ b/fonts.js @@ -444,7 +444,6 @@ var Font = (function Font() { var constructor = function font_constructor(name, file, properties) { this.name = name; this.encoding = properties.encoding; - this.glyphs = properties.glyphs; this.sizes = []; var names = name.split("+"); @@ -1368,10 +1367,6 @@ var Font = (function Font() { unicode = charcode; } - // Check if the glyph has already been converted - if (!IsNum(unicode)) - unicode = encoding[charcode].unicode = this.glyphs[unicode].unicode; - // Handle surrogate pairs if (unicode > 0xFFFF) { str += String.fromCharCode(unicode & 0xFFFF); From 81d7d1a72515450b25b07eaf482a3591820db46f Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 8 Sep 2011 17:57:37 +0200 Subject: [PATCH 4/4] Add widths information for the most common fonts cases --- fonts.js | 28 +++++++++++++++++-------- pdf.js | 64 ++++++++++++++++++++++++++------------------------------ 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/fonts.js b/fonts.js index 5622e84e4..cca1d816c 100755 --- a/fonts.js +++ b/fonts.js @@ -140,11 +140,21 @@ var FontMeasure = (function FontMeasure() { ctx.font = rule; current = font; }, - measureText: function fonts_measureText(text) { + measureText: function fonts_measureText(text, encoding, size) { var width; if (measureCache && (width = measureCache[text])) return width; - width = ctx.measureText(text).width / kScalePrecision; + + try { + width = 0.0; + for (var i = 0; i < text.length; i++) { + var charWidth = encoding[text.charCodeAt(i)].width; + width += parseFloat(charWidth); + } + width = width * size / 1000; + } catch(e) { + width = ctx.measureText(text).width / kScalePrecision; + } if (measureCache) measureCache[text] = width; return width; @@ -468,8 +478,7 @@ var Font = (function Font() { (fontName.indexOf('Italic') != -1); // Use 'name' instead of 'fontName' here because the original - // name ArialNarrow for example will be replaced by Helvetica. - this.narrow = (name.indexOf("Narrow") != -1) + // name ArialBlack for example will be replaced by Helvetica. this.black = (name.indexOf("Black") != -1) this.loadedName = fontName.split('-')[0]; @@ -1018,7 +1027,9 @@ var Font = (function Font() { var index = firstCode; for (var j = start; j <= end; j++) { var code = j - firstCode - 1; - encoding[index++] = { unicode: glyphs[code].unicode }; + var mapping = encoding[index + 1] || {}; + mapping.unicode = glyphs[code].unicode; + encoding[index++] = mapping; } return cmap.data = createCMapTable(glyphs); } @@ -2329,12 +2340,11 @@ var Type2CFF = (function() { } } - if (code == -1) { - var mapping = properties.glyphs[glyph] || {}; + var mapping = properties.glyphs[glyph] || {}; + if (code == -1) index = code = mapping.unicode || index; - } - var width = widths[code] || defaultWidth; + var width = mapping.width || defaultWidth; if (code <= 0x1f || (code >= 127 && code <= 255)) code += kCmapGlyphOffset; diff --git a/pdf.js b/pdf.js index 40ffde688..7fff8ae62 100644 --- a/pdf.js +++ b/pdf.js @@ -4273,22 +4273,23 @@ var PartialEvaluator = (function() { var glyphs = {}; for (var i = firstChar; i <= lastChar; i++) { var glyph = differences[i] || baseEncoding[i]; - if (glyph) { - var index = GlyphsUnicode[glyph] || i; - glyphs[glyph] = map[i] = { - unicode: index, - width: properties.widths[i - firstChar] || properties.defaultWidth - }; + var index = GlyphsUnicode[glyph] || i; + map[i] = { + unicode: index, + width: properties.widths[i] || properties.defaultWidth + }; - // If there is no file, the character mapping can't be modified - // but this is unlikely that there is any standard encoding with - // chars below 0x1f, so that's fine. - if (!properties.file) - continue; + if (glyph) + glyphs[glyph] = map[i]; - if (index <= 0x1f || (index >= 127 && index <= 255)) - map[i].unicode += kCmapGlyphOffset; - } + // If there is no file, the character mapping can't be modified + // but this is unlikely that there is any standard encoding with + // chars below 0x1f, so that's fine. + if (!properties.file) + continue; + + if (index <= 0x1f || (index >= 127 && index <= 255)) + map[i].unicode += kCmapGlyphOffset; } if (type == 'TrueType' && dict.has('ToUnicode') && differences) { @@ -4325,10 +4326,9 @@ var PartialEvaluator = (function() { var endRange = tokens[j + 1]; var code = tokens[j + 2]; while (startRange < endRange) { - map[startRange] = { - unicode: code++, - width: 0 - } + var mapping = map[startRange] || {}; + mapping.unicode = code++; + map[startRange] = mapping; ++startRange; } } @@ -4339,10 +4339,9 @@ var PartialEvaluator = (function() { for (var j = 0; j < tokens.length; j += 2) { var index = tokens[j]; var code = tokens[j + 1]; - map[index] = { - unicode: code, - width: 0 - }; + var mapping = map[index] || {}; + mapping.unicode = code; + map[index] = mapping; } break; @@ -4494,13 +4493,13 @@ var PartialEvaluator = (function() { descent: descriptor.get('Descent'), xHeight: descriptor.get('XHeight'), capHeight: descriptor.get('CapHeight'), - defaultWidth: descriptor.get('MissingWidth') || 0, + defaultWidth: parseFloat(descriptor.get('MissingWidth')) || 0, flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), differences: [], widths: (function() { var glyphWidths = {}; - for (var i = 0; i <= widths.length; i++) + for (var i = 0; i < widths.length; i++) glyphWidths[firstChar++] = widths[i]; return glyphWidths; })(), @@ -4898,6 +4897,7 @@ var CanvasGraphics = (function() { var scaleFactorX = 1, scaleFactorY = 1; var font = current.font; + var baseText= text; if (font) { if (current.fontSize <= kRasterizerMin) { scaleFactorX = scaleFactorY = kScalePrecision; @@ -4907,26 +4907,22 @@ var CanvasGraphics = (function() { text = font.charsToUnicode(text); } + var encoding = current.font.encoding; + var size = current.fontSize; var charSpacing = current.charSpacing; var wordSpacing = current.wordSpacing; var textHScale = current.textHScale; - // This is a poor simulation for Arial Narrow while font-stretch - // is not implemented (bug 3512) - if (current.font.narrow) { - textHScale += 0.2; - charSpacing -= (0.09 * current.fontSize); - } - if (charSpacing != 0 || wordSpacing != 0 || textHScale != 1) { scaleFactorX *= textHScale; ctx.scale(1 / textHScale, 1); var width = 0; for (var i = 0, ii = text.length; i < ii; ++i) { - var c = text.charAt(i); + var c = baseText.charAt(i); ctx.fillText(c, 0, 0); - var charWidth = FontMeasure.measureText(c) + charSpacing; + var charWidth = FontMeasure.measureText(c, encoding, size); + charWidth += charSpacing; if (c.charCodeAt(0) == 32) charWidth += wordSpacing; ctx.translate(charWidth * scaleFactorX, 0); @@ -4935,7 +4931,7 @@ var CanvasGraphics = (function() { current.x += width; } else { ctx.fillText(text, 0, 0); - current.x += FontMeasure.measureText(text); + current.x += FontMeasure.measureText(baseText, encoding, size); } this.ctx.restore();