From 95ccb3828311bd15d095866c40cfe9be6a7ec842 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 00:56:02 +0200 Subject: [PATCH 01/15] Clean up a bit the encoding/charset/glyphs dance --- fonts.js | 104 +++++++++++++++++++++---------------------------------- pdf.js | 89 +++++++++++++++++------------------------------ 2 files changed, 71 insertions(+), 122 deletions(-) diff --git a/fonts.js b/fonts.js index 2d7ff94f5..c40b9f192 100755 --- a/fonts.js +++ b/fonts.js @@ -638,30 +638,28 @@ var Font = (function Font() { var ulUnicodeRange3 = 0; var ulUnicodeRange4 = 0; - var charset = properties.charset; - if (charset && charset.length) { - var firstCharIndex = null; - var lastCharIndex = 0; + var firstCharIndex = null; + var lastCharIndex = 0; - for (var i = 0; i < charset.length; i++) { - var code = GlyphsUnicode[charset[i]]; - if (firstCharIndex > code || !firstCharIndex) - firstCharIndex = code; - if (lastCharIndex < code) - lastCharIndex = code; + var encoding = properties.encoding; + for (var index in encoding) { + var code = encoding[index]; + if (firstCharIndex > code || !firstCharIndex) + firstCharIndex = code; + if (lastCharIndex < code) + lastCharIndex = code; - var position = getUnicodeRangeFor(code); - if (position < 32) { - ulUnicodeRange1 |= 1 << position; - } else if (position < 64) { - ulUnicodeRange2 |= 1 << position - 32; - } else if (position < 96) { - ulUnicodeRange3 |= 1 << position - 64; - } else if (position < 123) { - ulUnicodeRange4 |= 1 << position - 96; - } else { - error('Unicode ranges Bits > 123 are reserved for internal usage'); - } + var position = getUnicodeRangeFor(code); + if (position < 32) { + ulUnicodeRange1 |= 1 << position; + } else if (position < 64) { + ulUnicodeRange2 |= 1 << position - 32; + } else if (position < 96) { + ulUnicodeRange3 |= 1 << position - 64; + } else if (position < 123) { + ulUnicodeRange4 |= 1 << position - 96; + } else { + error('Unicode ranges Bits > 123 are reserved for internal usage'); } } @@ -847,7 +845,6 @@ var Font = (function Font() { } var encoding = properties.encoding; - var charset = properties.charset; for (var i = 0; i < numRecords; i++) { var table = records[i]; font.pos = start + table.offset; @@ -856,7 +853,9 @@ var Font = (function Font() { var length = int16(font.getBytes(2)); var language = int16(font.getBytes(2)); - if (format == 0) { + if (format == 4) { + return; + } else if (format == 0) { // Characters below 0x20 are controls characters that are hardcoded // into the platform so if some characters in the font are assigned // under this limit they will not be displayed so let's rewrite the @@ -871,35 +870,15 @@ var Font = (function Font() { } } - var rewrite = false; - for (var code in encoding) { - if (code < 0x20 && encoding[code]) - rewrite = true; - - if (rewrite) - encoding[code] = parseInt(code) + 0x1F; - } - - if (rewrite) { + if (properties.firstChar < 0x20) + var code = 0; for (var j = 0; j < glyphs.length; j++) { + var glyph = glyphs[j]; glyphs[j].unicode += 0x1F; - } + properties.glyphs[glyph.glyph] = encoding[++code] = glyph.unicode; } - cmap.data = createCMapTable(glyphs, deltas); - } else if (format == 6 && numRecords == 1 && !encoding.empty) { - // Format 0 alone is not allowed by the sanitizer so let's rewrite - // that to a 3-1-4 Unicode BMP table - TODO('Use an other source of informations than ' + - 'charset here, it is not reliable'); - var glyphs = []; - for (var j = 0; j < charset.length; j++) { - glyphs.push({ - unicode: GlyphsUnicode[charset[j]] || 0 - }); - } - - cmap.data = createCMapTable(glyphs); - } else if (format == 6 && numRecords == 1) { + return cmap.data = createCMapTable(glyphs, deltas); + } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode // table. (This looks weird, so I can have missed something), this @@ -912,6 +891,8 @@ var Font = (function Font() { var min = 0xffff, max = 0; for (var j = 0; j < entryCount; j++) { var charcode = int16(font.getBytes(2)); + if (!charcode) + continue; glyphs.push(charcode); if (charcode < min) @@ -939,7 +920,7 @@ var Font = (function Font() { var index = firstCode; for (var j = start; j <= end; j++) encoding[index++] = glyphs[j - firstCode - 1].unicode; - cmap.data = createCMapTable(glyphs); + return cmap.data = createCMapTable(glyphs); } } }; @@ -1288,10 +1269,6 @@ var Font = (function Font() { unicode = charcode; } - // Check if the glyph has already been converted - if (!IsNum(unicode)) - unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; - // Handle surrogate pairs if (unicode > 0xFFFF) { str += String.fromCharCode(unicode & 0xFFFF); @@ -1715,9 +1692,6 @@ var Type1Parser = function() { properties.textMatrix = matrix; break; case '/Encoding': - if (!properties.builtInEncoding) - break; - var size = parseInt(getToken()); getToken(); // read in 'array' @@ -1726,9 +1700,12 @@ var Type1Parser = function() { if (token == 'dup') { var index = parseInt(getToken()); var glyph = getToken(); - properties.encoding[index] = GlyphsUnicode[glyph]; + + if (!properties.differences[j]) { + var code = GlyphsUnicode[glyph]; + properties.glyphs[glyph] = properties.encoding[index] = code; + } getToken(); // read the in 'put' - j = index; } } break; @@ -1903,7 +1880,7 @@ CFF.prototype = { missings.push(glyph.glyph); } else { charstrings.push({ - glyph: glyph, + glyph: glyph.glyph, unicode: unicode, charstring: glyph.data, width: glyph.width, @@ -2079,7 +2056,7 @@ CFF.prototype = { var count = glyphs.length; for (var i = 0; i < count; i++) { - var index = CFFStrings.indexOf(charstrings[i].glyph.glyph); + var index = CFFStrings.indexOf(charstrings[i].glyph); // Some characters like asterikmath && circlecopyrt are // missing from the original strings, for the moment let's // map them to .notdef and see later if it cause any @@ -2176,7 +2153,6 @@ var Type2CFF = (function() { var stringIndex = this.parseIndex(dictIndex.endPos); var gsubrIndex = this.parseIndex(stringIndex.endPos); - var strings = this.getStrings(stringIndex); var baseDict = this.parseDict(dictIndex.get(0)); @@ -2219,7 +2195,7 @@ var Type2CFF = (function() { var charstrings = []; for (var i = 0, ii = charsets.length; i < ii; ++i) { var charName = charsets[i]; - var charCode = GlyphsUnicode[charName]; + var charCode = properties.glyphs[charName]; if (charCode) { var width = widths[charCode] || defaultWidth; charstrings.push({unicode: charCode, width: width, gid: i}); diff --git a/pdf.js b/pdf.js index e7095b692..32e13817f 100644 --- a/pdf.js +++ b/pdf.js @@ -4199,8 +4199,6 @@ var PartialEvaluator = (function() { var builtInEncoding = false; var encodingMap = {}; - var glyphMap = {}; - var charset = []; if (compositeFont) { // Special CIDFont support // XXX only CIDFontType2 supported for now @@ -4242,69 +4240,61 @@ var PartialEvaluator = (function() { if (fontDict.has('Encoding')) { var encoding = xref.fetchIfRef(fontDict.get('Encoding')); if (IsDict(encoding)) { - // Build a map of between codes and glyphs - // Load the base encoding var baseName = encoding.get('BaseEncoding'); - if (baseName) { + if (baseName) baseEncoding = Encodings[baseName.name].slice(); - } // Load the differences between the base and original var differences = encoding.get('Differences'); var index = 0; for (var j = 0; j < differences.length; j++) { var data = differences[j]; - if (IsNum(data)) { + if (IsNum(data)) index = data; - } else { + else diffEncoding[index++] = data.name; - } } } else if (IsName(encoding)) { baseEncoding = Encodings[encoding.name].slice(); + } else { + error("Encoding is not a Name nor a Dict"); } } + var fontType = subType.name; if (!baseEncoding) { - var type = subType.name; - if (type == 'TrueType') { - baseEncoding = Encodings.WinAnsiEncoding.slice(); - } else if (type == 'Type1') { - baseEncoding = Encodings.StandardEncoding.slice(); - if (!diffEncoding.length) - builtInEncoding = true; - } else { - error('Unknown type of font'); + switch (fontType) { + case 'TrueType': + baseEncoding = Encodings.WinAnsiEncoding.slice(); + break; + case 'Type1': + baseEncoding = Encodings.StandardEncoding.slice(); + break; + default: + warn('Unknown type of font: ' + fontType); + break; } } + // firstChar and width are required + // (except for 14 standard fonts) + var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0; + var lastChar = xref.fetchIfRef(fontDict.get('LastChar')) || 0; + var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; + // merge in the differences - var length = baseEncoding.length > diffEncoding.length ? - baseEncoding.length : diffEncoding.length; - for (var i = 0, ii = length; i < ii; ++i) { - var diffGlyph = diffEncoding[i]; - var baseGlyph = baseEncoding[i]; - if (diffGlyph) { - glyphMap[i] = diffGlyph; - encodingMap[i] = GlyphsUnicode[diffGlyph]; - } else if (baseGlyph) { - glyphMap[i] = baseGlyph; - encodingMap[i] = GlyphsUnicode[baseGlyph]; - } + var glyphsMap = {}; + for (var i = firstChar; i <= lastChar; i++) { + var glyph = diffEncoding[i] || baseEncoding[i]; + if (glyph) + glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph]; } - if (fontDict.has('ToUnicode')) { - encodingMap['empty'] = true; - var glyphsMap = {}; - for (var p in glyphMap) - glyphsMap[glyphMap[p]] = encodingMap[p]; - + if (fontDict.has('ToUnicode') && differences) { var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); if (IsName(cmapObj)) { error('ToUnicode file cmap translation not implemented'); } else if (IsStream(cmapObj)) { - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var tokens = []; var token = ''; @@ -4334,6 +4324,8 @@ var PartialEvaluator = (function() { var startRange = parseInt('0x' + tokens[j]); var endRange = parseInt('0x' + tokens[j + 1]); var code = parseInt('0x' + tokens[j + 2]); + for (var k = startRange; k < endRange; k++) + encodingMap[k] = code++; } break; @@ -4360,15 +4352,6 @@ var PartialEvaluator = (function() { } } } - - // firstChar and width are required - // (except for 14 standard fonts) - var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; - for (var j = 0; j < widths.length; j++) { - if (widths[j]) - charset.push(glyphMap[j + firstChar]); - } } if (!fd) { @@ -4396,7 +4379,6 @@ var PartialEvaluator = (function() { } var descriptor = xref.fetch(fd); - var fontName = fontDict.get('Name'); if (!fontName) fontName = xref.fetchIfRef(descriptor.get('FontName'));; @@ -4414,14 +4396,6 @@ var PartialEvaluator = (function() { } } - if (descriptor.has('CharSet')) { - // Get the font charset if any (meaningful only in Type 1) - charset = descriptor.get('CharSet'); - assertWellFormed(IsString(charset), 'invalid charset'); - charset = charset.split('/'); - charset.shift(); - } - var widths = fontDict.get('Widths'); if (widths) { var glyphWidths = {}; @@ -4435,9 +4409,8 @@ var PartialEvaluator = (function() { subtype: fileType, widths: glyphWidths, encoding: encodingMap, + differences: diffEncoding, glyphs: glyphsMap || GlyphsUnicode, - builtInEncoding: builtInEncoding, - charset: charset, firstChar: fontDict.get('FirstChar'), lastChar: fontDict.get('LastChar'), bbox: descriptor.get('FontBBox'), From e58b076eab0b6e31a1ee8bd70ed5b154f141f548 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 00:59:37 +0200 Subject: [PATCH 02/15] Fix a little typo --- fonts.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fonts.js b/fonts.js index 2172d12b2..a51786c3e 100755 --- a/fonts.js +++ b/fonts.js @@ -870,13 +870,15 @@ var Font = (function Font() { } } - if (properties.firstChar < 0x20) + if (properties.firstChar < 0x20) { var code = 0; for (var j = 0; j < glyphs.length; j++) { var glyph = glyphs[j]; glyphs[j].unicode += 0x1F; properties.glyphs[glyph.glyph] = encoding[++code] = glyph.unicode; + } } + return cmap.data = createCMapTable(glyphs, deltas); } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data From 6a7b37ab68e6612cbda567aaef790cf9d0c75b59 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 01:03:39 +0200 Subject: [PATCH 03/15] Fix a little typo --- fonts.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fonts.js b/fonts.js index a51786c3e..94aee8445 100755 --- a/fonts.js +++ b/fonts.js @@ -1703,7 +1703,7 @@ var Type1Parser = function() { var index = parseInt(getToken()); var glyph = getToken(); - if (!properties.differences[j]) { + if (!properties.encoding[index]) { var code = GlyphsUnicode[glyph]; properties.glyphs[glyph] = properties.encoding[index] = code; } From 341de1ff42ed9f17f097af39f248a6a91f120f4f Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 02:34:35 +0200 Subject: [PATCH 04/15] Clean up encoding parsing --- fonts.js | 11 ++++++++--- pdf.js | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fonts.js b/fonts.js index 94aee8445..39936d432 100755 --- a/fonts.js +++ b/fonts.js @@ -385,6 +385,7 @@ var Font = (function Font() { var constructor = function font_constructor(name, file, properties) { this.name = name; this.encoding = properties.encoding; + this.glyphs = properties.glyphs; this.sizes = []; // If the font is to be ignored, register it like an already loaded font @@ -1271,6 +1272,10 @@ var Font = (function Font() { unicode = charcode; } + // Check if the glyph has already been converted + if (!IsNum(unicode)) + unicode = encoding[charcode] = this.glyphs[unicode]; + // Handle surrogate pairs if (unicode > 0xFFFF) { str += String.fromCharCode(unicode & 0xFFFF); @@ -1703,9 +1708,9 @@ var Type1Parser = function() { var index = parseInt(getToken()); var glyph = getToken(); - if (!properties.encoding[index]) { - var code = GlyphsUnicode[glyph]; - properties.glyphs[glyph] = properties.encoding[index] = code; + if ('undefined' == typeof(properties.differences[index])) { + properties.encoding[index] = glyph; + properties.glyphs[glyph] = GlyphsUnicode[glyph]; } getToken(); // read the in 'put' } diff --git a/pdf.js b/pdf.js index 597e8b23f..550e72920 100644 --- a/pdf.js +++ b/pdf.js @@ -4290,7 +4290,7 @@ var PartialEvaluator = (function() { glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph]; } - if (fontDict.has('ToUnicode') && differences) { + if (fontType == 'TrueType' && fontDict.has('ToUnicode') && differences) { var cmapObj = xref.fetchIfRef(fontDict.get('ToUnicode')); if (IsName(cmapObj)) { error('ToUnicode file cmap translation not implemented'); @@ -4358,6 +4358,7 @@ var PartialEvaluator = (function() { var baseFontName = fontDict.get('BaseFont'); if (!IsName(baseFontName)) return null; + // Using base font name as a font name. baseFontName = baseFontName.name.replace(/[\+,\-]/g, '_'); if (/^Symbol(_?(Bold|Italic))*$/.test(baseFontName)) { From 55d04c0cd9144e8153fc28482dbb68e5dec231ac Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 04:00:03 +0200 Subject: [PATCH 05/15] Adjust precision to have correct widths for arial tests documents --- fonts.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fonts.js b/fonts.js index 39936d432..4aa2fb142 100755 --- a/fonts.js +++ b/fonts.js @@ -57,7 +57,7 @@ var stdFontMap = { }; var FontMeasure = (function FontMeasure() { - var kScalePrecision = 50; + var kScalePrecision = 30; var ctx = document.createElement('canvas').getContext('2d'); ctx.scale(1 / kScalePrecision, 1); From 37f88291d0b91d8bce07d26cd67aac304e239e00 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 04:13:41 +0200 Subject: [PATCH 06/15] Fix a little issue with 'ff' on the pdf spec page 629 --- pdf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf.js b/pdf.js index 550e72920..0302e7c1a 100644 --- a/pdf.js +++ b/pdf.js @@ -4287,7 +4287,7 @@ var PartialEvaluator = (function() { for (var i = firstChar; i <= lastChar; i++) { var glyph = diffEncoding[i] || baseEncoding[i]; if (glyph) - glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph]; + glyphsMap[glyph] = encodingMap[i] = GlyphsUnicode[glyph] || i; } if (fontType == 'TrueType' && fontDict.has('ToUnicode') && differences) { From 395a46c85e69c653162ac4cadb67409fbdfdb6dc Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Tue, 30 Aug 2011 19:52:24 +0200 Subject: [PATCH 07/15] Support Type1C built-in encoding - part1 --- fonts.js | 99 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 29 deletions(-) diff --git a/fonts.js b/fonts.js index 4aa2fb142..48e53d0c1 100755 --- a/fonts.js +++ b/fonts.js @@ -1019,9 +1019,8 @@ var Font = (function Font() { var glyphs = []; var encoding = properties.encoding; - for (var i = 1; i < numGlyphs; i++) { + for (var i = 1; i < numGlyphs; i++) glyphs.push({ unicode: i + kCmapGlyphOffset }); - } if ('undefined' == typeof(encoding[0])) { // the font is directly characters to glyphs with no encoding @@ -2133,7 +2132,6 @@ CFF.prototype = { }; var Type2CFF = (function() { - // TODO: replace parsing code with the Type2Parser in font_utils.js function constructor(file, properties) { var bytes = file.getBytes(); @@ -2146,11 +2144,11 @@ var Type2CFF = (function() { data.push(bytes[i]); this.data = data; - this.parse(); + this.parse(properties); }; constructor.prototype = { - parse: function cff_parse() { + parse: function cff_parse(properties) { var header = this.parseHeader(); var nameIndex = this.parseIndex(header.endPos); @@ -2174,26 +2172,25 @@ var Type2CFF = (function() { baseDict = this.parseDict(privBytes); var privDict = this.getPrivDict(baseDict, strings); - TODO('Parse encoding'); var charStrings = this.parseIndex(topDict['CharStrings']); - var charset = this.parseCharsets(topDict['charset'], charStrings.length, - strings); + var charset = this.parseCharsets(topDict['charset'], charStrings.length, strings); + var encoding = this.parseEncoding(topDict['Encoding'], properties, strings, charset); // charstrings contains info about glyphs (one element per glyph // containing mappings for {unicode, width}) - var charstrings = this.getCharStrings(charset, charStrings, + var charstrings = this.getCharStrings(charset, charStrings, encoding, privDict, this.properties); // create the mapping between charstring and glyph id var glyphIds = []; - for (var i = 0, ii = charstrings.length; i < ii; ++i) { + for (var i = 0; i < charstrings.length; i++) glyphIds.push(charstrings[i].gid); - } this.charstrings = charstrings; this.glyphIds = glyphIds; }, - getCharStrings: function cff_charstrings(charsets, charStrings, + + getCharStrings: function cff_charstrings(charsets, charStrings, encoding, privDict, properties) { var widths = properties.widths; @@ -2201,31 +2198,75 @@ var Type2CFF = (function() { var nominalWidth = privDict['nominalWidthX']; var charstrings = []; - for (var i = 0, ii = charsets.length; i < ii; ++i) { - var charName = charsets[i]; - var charCode = properties.glyphs[charName]; - if (charCode) { - var width = widths[charCode] || defaultWidth; - charstrings.push({unicode: charCode, width: width, gid: i}); - } else { - if (charName !== '.notdef') - warn('Cannot find unicode for glyph ' + charName); - } + for (var code in encoding) { + var gid = encoding[code]; + var width = widths[code] || defaultWidth; + charstrings.push({unicode: code, width: width, gid: gid}); } - // sort the arry by the unicode value + // sort the array by the unicode value charstrings.sort(function(a, b) {return a.unicode - b.unicode}); return charstrings; }, - parseEncoding: function cff_parseencoding(pos) { - if (pos == 0) { - return Encodings.StandardEncoding; - } else if (pos == 1) { - return Encodings.ExpertEncoding; + + parseEncoding: function cff_parseencoding(pos, properties, strings, charset) { + var encoding = {}; + var bytes = this.bytes; + + function readSupplement() { + var supplementsCount = bytes[pos++]; + for (var i = 0; i < supplementsCount; i++) { + var code = bytes[pos++]; + var sid = (bytes[pos++] << 8) + (bytes[pos++] & 0xff); + encoding[code] = properties.differences.indexOf(strings[sid]); + } } - error('not implemented encodings'); + if (pos == 0 || pos == 1) { + var gid = 1; + var baseEncoding = pos ? Encodings.ExpertEncoding + : Encodings.StandardEncoding; + for (var i = 0; i < charset.length; i++) { + var index = baseEncoding.indexOf(charset[i]); + if (index != -1) + encoding[index] = gid++; + } + } else { + + var format = bytes[pos++]; + switch (format & 0x7f) { + case 0: + var glyphsCount = bytes[pos++]; + for (var i = 1; i <= glyphsCount; i++) + encoding[bytes[pos++]] = i; + + if (format & 0x80) + readSupplement(); + break; + + case 1: + var rangesCount = bytes[pos++]; + log(rangesCount); + var gid = 1; + for (var i = 0; i < rangesCount; i++) { + var start = bytes[pos++]; + var count = bytes[pos++]; + for (var j = start; j <= start + count; j++) + encoding[j] = gid++; + } + + if (format & 0x80) + readSupplement(); + break; + + default: + error('Unknow encoding format: ' + format + " in CFF"); + break; + } + } + return encoding; }, + parseCharsets: function cff_parsecharsets(pos, length, strings) { var bytes = this.bytes; var format = bytes[pos++]; From f6e14010f123d8829a551d338404c8f0b9a31f1c Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 00:12:00 +0200 Subject: [PATCH 08/15] Fix regression mapping from gid to glyph from the last commit --- fonts.js | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fonts.js b/fonts.js index 48e53d0c1..50701cbca 100755 --- a/fonts.js +++ b/fonts.js @@ -423,6 +423,7 @@ var Font = (function Font() { // Wrap the CFF data inside an OTF font file data = this.convert(name, cff, properties); + writeToFile(data, "/tmp/" + name + ".otf"); break; case 'TrueType': @@ -2198,10 +2199,16 @@ var Type2CFF = (function() { var nominalWidth = privDict['nominalWidthX']; var charstrings = []; - for (var code in encoding) { - var gid = encoding[code]; - var width = widths[code] || defaultWidth; - charstrings.push({unicode: code, width: width, gid: gid}); + var differences = properties.differences; + for (var i = 1; i < charsets.length; i++) { + var glyph = charsets[i]; + var charCode = properties.glyphs[glyph]; + if (charCode) { + var width = widths[charCode] || defaultWidth; + charstrings.push({unicode: charCode, width: width, gid: i}); + } else if (glyph !== '.notdef') { + warn('Cannot find unicode for glyph ' + charName); + } } // sort the array by the unicode value @@ -2246,7 +2253,6 @@ var Type2CFF = (function() { case 1: var rangesCount = bytes[pos++]; - log(rangesCount); var gid = 1; for (var i = 0; i < rangesCount; i++) { var start = bytes[pos++]; From 76f6398e479a01e46d96572b478c6f2ffc4dd85d Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 00:37:39 +0200 Subject: [PATCH 09/15] Fix some strict warnings --- crypto.js | 1 + fonts.js | 14 ++++++++------ pdf.js | 6 ++++-- web/viewer.js | 12 +++++++----- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/crypto.js b/crypto.js index 4aaca9520..bfffa1f44 100644 --- a/crypto.js +++ b/crypto.js @@ -569,6 +569,7 @@ var CipherTransformFactory = (function() { }; } error('Unknown crypto method'); + return null; } constructor.prototype = { diff --git a/fonts.js b/fonts.js index 50701cbca..3e3eece81 100755 --- a/fonts.js +++ b/fonts.js @@ -66,7 +66,7 @@ var FontMeasure = (function FontMeasure() { return { setActive: function fonts_setActive(font, size) { - if (current = font) { + if (current == font) { var sizes = current.sizes; if (!(measureCache = sizes[size])) measureCache = sizes[size] = Object.create(null); @@ -856,7 +856,7 @@ var Font = (function Font() { var language = int16(font.getBytes(2)); if (format == 4) { - return; + return cmap.data; } else if (format == 0) { // Characters below 0x20 are controls characters that are hardcoded // into the platform so if some characters in the font are assigned @@ -927,6 +927,7 @@ var Font = (function Font() { return cmap.data = createCMapTable(glyphs); } } + return cmap.data; }; // Check that required tables are present @@ -2287,7 +2288,7 @@ var Type2CFF = (function() { id = (id << 8) | bytes[pos++]; charset.push(strings[id]); } - return charset; + break; case 1: while (charset.length <= length) { var first = bytes[pos++]; @@ -2296,7 +2297,7 @@ var Type2CFF = (function() { for (var i = 0; i <= numLeft; ++i) charset.push(strings[first++]); } - return charset; + break; case 2: while (charset.length <= length) { var first = bytes[pos++]; @@ -2306,11 +2307,11 @@ var Type2CFF = (function() { for (var i = 0; i <= numLeft; ++i) charset.push(strings[first++]); } - return charset; + break; default: error('Unknown charset format'); } - + return charset; }, getPrivDict: function cff_getprivdict(baseDict, strings) { var dict = {}; @@ -2440,6 +2441,7 @@ var Type2CFF = (function() { } else { error('Incorrect byte'); } + return -1; }; function parseFloatOperand() { diff --git a/pdf.js b/pdf.js index 41cd6d07e..d97588c2c 100644 --- a/pdf.js +++ b/pdf.js @@ -2093,7 +2093,7 @@ var LZWStream = (function() { var c = this.str.getByte(); if (c == null) { this.eof = true; - return; + return null; } cachedData = (cachedData << 8) | c; bitsCached += 8; @@ -5208,7 +5208,7 @@ var Util = (function() { return 'rgb(' + ri + ',' + gi + ',' + bi + ')'; }; constructor.makeCssCmyk = function makecmyk(c, m, y, k) { - var c = (new DeviceCmykCS()).getRgb([c, m, y, k]); + c = (new DeviceCmykCS()).getRgb([c, m, y, k]); var ri = (255 * c[0]) | 0, gi = (255 * c[1]) | 0, bi = (255 * c[2]) | 0; return 'rgb(' + ri + ',' + gi + ',' + bi + ')'; }; @@ -5335,6 +5335,7 @@ var ColorSpace = (function() { } else { error('unrecognized color space object: "' + cs + '"'); } + return null; }; return constructor; @@ -5623,6 +5624,7 @@ var Pattern = (function() { default: error('Unknown type of pattern: ' + typeNum); } + return null; }; constructor.parseShading = function pattern_shading(shading, matrix, diff --git a/web/viewer.js b/web/viewer.js index c93df3b74..1e016e6e9 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -417,16 +417,18 @@ window.addEventListener('transitionend', function(evt) { var pagesCount = PDFView.pages.length; var container = document.getElementById('sidebarView'); - container._interval = window.setInterval(function() { - if (pageIndex >= pagesCount) - return window.clearInterval(container._interval); + container._interval = window.setInterval(function interval() { + if (pageIndex >= pagesCount) { + window.clearInterval(container._interval); + return; + } PDFView.thumbnails[pageIndex++].draw(); }, 500); }, true); -window.addEventListener('scalechange', function(evt) { +window.addEventListener('scalechange', function scalechange(evt) { var options = document.getElementById('scaleSelect').options; for (var i = 0; i < options.length; i++) { var option = options[i]; @@ -434,7 +436,7 @@ window.addEventListener('scalechange', function(evt) { } }, true); -window.addEventListener('pagechange', function(evt) { +window.addEventListener('pagechange', function pagechange(evt) { var page = evt.detail; document.location.hash = page; document.getElementById('pageNumber').value = page; From 3fd2f42a50b501a24ef483a4e37bc31a3201bc73 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 01:23:55 +0200 Subject: [PATCH 10/15] Lie to the sanitizer about the real nature of Type1C font --- fonts.js | 68 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/fonts.js b/fonts.js index 3e3eece81..190ca868f 100755 --- a/fonts.js +++ b/fonts.js @@ -415,11 +415,10 @@ var Font = (function Font() { this.mimetype = 'font/opentype'; var subtype = properties.subtype; - if (subtype === 'Type1C') { + if (subtype === 'Type1C') var cff = new Type2CFF(file, properties); - } else { + else var cff = new CFF(name, file, properties); - } // Wrap the CFF data inside an OTF font file data = this.convert(name, cff, properties); @@ -2140,23 +2139,18 @@ var Type2CFF = (function() { this.bytes = bytes; this.properties = properties; - // Other classes expect this.data to be a Javascript array - var data = []; - for (var i = 0, ii = bytes.length; i < ii; ++i) - data.push(bytes[i]); - this.data = data; - - this.parse(properties); + this.data = this.parse(); }; constructor.prototype = { - parse: function cff_parse(properties) { + parse: function cff_parse() { var header = this.parseHeader(); + var properties = this.properties; var nameIndex = this.parseIndex(header.endPos); var dictIndex = this.parseIndex(nameIndex.endPos); if (dictIndex.length != 1) - error('More than 1 font'); + error('CFF contains more than 1 font'); var stringIndex = this.parseIndex(dictIndex.endPos); var gsubrIndex = this.parseIndex(stringIndex.endPos); @@ -2168,20 +2162,30 @@ var Type2CFF = (function() { var bytes = this.bytes; - var privInfo = topDict['Private']; - var privOffset = privInfo[1], privLength = privInfo[0]; + var privateInfo = topDict['Private']; + var privOffset = privateInfo[1], privLength = privateInfo[0]; var privBytes = bytes.subarray(privOffset, privOffset + privLength); baseDict = this.parseDict(privBytes); var privDict = this.getPrivDict(baseDict, strings); var charStrings = this.parseIndex(topDict['CharStrings']); - var charset = this.parseCharsets(topDict['charset'], charStrings.length, strings); - var encoding = this.parseEncoding(topDict['Encoding'], properties, strings, charset); + var charset = this.parseCharsets(topDict['charset'], + charStrings.length, strings); + var hasSupplement = this.parseEncoding(topDict['Encoding'], properties, + strings, charset); + + // The font sanitizer does not support CFF encoding with a + // supplement, since the encoding is not really use to map + // between gid to glyph, let's overwrite what is declared in + // the top dictionary to let the sanitizer think the font use + // StandardEncoding, that's a lie but that's ok. + if (hasSupplement) + bytes[topDict['Encoding']] = 0; // charstrings contains info about glyphs (one element per glyph // containing mappings for {unicode, width}) - var charstrings = this.getCharStrings(charset, charStrings, encoding, - privDict, this.properties); + var charstrings = this.getCharStrings(charset, charStrings, + privDict, this.properties); // create the mapping between charstring and glyph id var glyphIds = []; @@ -2190,9 +2194,14 @@ var Type2CFF = (function() { this.charstrings = charstrings; this.glyphIds = glyphIds; + + var data = []; + for (var i = 0, ii = bytes.length; i < ii; ++i) + data.push(bytes[i]); + return data; }, - getCharStrings: function cff_charstrings(charsets, charStrings, encoding, + getCharStrings: function cff_charstrings(charsets, charStrings, privDict, properties) { var widths = properties.widths; @@ -2203,13 +2212,10 @@ var Type2CFF = (function() { var differences = properties.differences; for (var i = 1; i < charsets.length; i++) { var glyph = charsets[i]; - var charCode = properties.glyphs[glyph]; - if (charCode) { - var width = widths[charCode] || defaultWidth; - charstrings.push({unicode: charCode, width: width, gid: i}); - } else if (glyph !== '.notdef') { - warn('Cannot find unicode for glyph ' + charName); - } + var code = differences.indexOf(glyph); + var width = widths[code] || defaultWidth; + properties.encoding[i] = i + 0x1F; + charstrings.push({unicode: code + 0x1F, width: width, gid: i}); } // sort the array by the unicode value @@ -2248,8 +2254,10 @@ var Type2CFF = (function() { for (var i = 1; i <= glyphsCount; i++) encoding[bytes[pos++]] = i; - if (format & 0x80) + if (format & 0x80) { readSupplement(); + return true; + } break; case 1: @@ -2262,8 +2270,10 @@ var Type2CFF = (function() { encoding[j] = gid++; } - if (format & 0x80) + if (format & 0x80) { readSupplement(); + return true; + } break; default: @@ -2271,7 +2281,7 @@ var Type2CFF = (function() { break; } } - return encoding; + return false; }, parseCharsets: function cff_parsecharsets(pos, length, strings) { From ec1a8e98cf56d7cad19dd10644b9590a4e78e728 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 01:48:56 +0200 Subject: [PATCH 11/15] Fix PDF reference regression --- fonts.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fonts.js b/fonts.js index 190ca868f..5333f0411 100755 --- a/fonts.js +++ b/fonts.js @@ -2210,12 +2210,22 @@ var Type2CFF = (function() { var charstrings = []; var differences = properties.differences; + var index = 1; + var kCmapGlyphOffset = 0xE000; for (var i = 1; i < charsets.length; i++) { var glyph = charsets[i]; + for (var j = index; j < differences.length; j++) { + if (differences[j]) { + index = j; + break; + } + } + var code = differences.indexOf(glyph); var width = widths[code] || defaultWidth; - properties.encoding[i] = i + 0x1F; - charstrings.push({unicode: code + 0x1F, width: width, gid: i}); + properties.encoding[index] = index + kCmapGlyphOffset; + charstrings.push({unicode: code + kCmapGlyphOffset, width: width, gid: i}); + index++; } // sort the array by the unicode value From 8145c00215f9d0c8c5b81fcd069961bf32d6b98b Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 02:18:13 +0200 Subject: [PATCH 12/15] Fix another regression on pdf.pdf#5 --- fonts.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fonts.js b/fonts.js index 5333f0411..0d3c63c82 100755 --- a/fonts.js +++ b/fonts.js @@ -422,7 +422,6 @@ var Font = (function Font() { // Wrap the CFF data inside an OTF font file data = this.convert(name, cff, properties); - writeToFile(data, "/tmp/" + name + ".otf"); break; case 'TrueType': @@ -2210,7 +2209,7 @@ var Type2CFF = (function() { var charstrings = []; var differences = properties.differences; - var index = 1; + var index = 0; var kCmapGlyphOffset = 0xE000; for (var i = 1; i < charsets.length; i++) { var glyph = charsets[i]; @@ -2222,6 +2221,9 @@ var Type2CFF = (function() { } var code = differences.indexOf(glyph); + if (code == -1) + code = properties.glyphs[glyph] || index; + var width = widths[code] || defaultWidth; properties.encoding[index] = index + kCmapGlyphOffset; charstrings.push({unicode: code + kCmapGlyphOffset, width: width, gid: i}); From cd7cf3536dc60ceba7383ad84b709eeaf8cd7b1f Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 03:31:45 +0200 Subject: [PATCH 13/15] Ensure lastChar is correct if it's not specified --- pdf.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pdf.js b/pdf.js index d97588c2c..b3ff32aa8 100644 --- a/pdf.js +++ b/pdf.js @@ -4284,9 +4284,12 @@ var PartialEvaluator = (function() { // firstChar and width are required // (except for 14 standard fonts) var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')) || 0; - var lastChar = xref.fetchIfRef(fontDict.get('LastChar')) || 0; var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; + var lastChar = xref.fetchIfRef(fontDict.get('LastChar')); + if (!lastChar) + lastChar = diffEncoding.length || baseEncoding.length; + // merge in the differences var glyphsMap = {}; for (var i = firstChar; i <= lastChar; i++) { From 19c6cef7cce56f235b8efc696da9f76b57c5f5c8 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 13:42:10 +0200 Subject: [PATCH 14/15] Address review comments of #409 --- fonts.js | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fonts.js b/fonts.js index 0d3c63c82..24fd35d61 100755 --- a/fonts.js +++ b/fonts.js @@ -415,10 +415,8 @@ var Font = (function Font() { this.mimetype = 'font/opentype'; var subtype = properties.subtype; - if (subtype === 'Type1C') - var cff = new Type2CFF(file, properties); - else - var cff = new CFF(name, file, properties); + var cff = (subtype === 'Type1C') ? new Type2CFF(file, properties) + : new CFF(name, file, properties); // Wrap the CFF data inside an OTF font file data = this.convert(name, cff, properties); @@ -2161,16 +2159,16 @@ var Type2CFF = (function() { var bytes = this.bytes; - var privateInfo = topDict['Private']; + var privateInfo = topDict.Private; var privOffset = privateInfo[1], privLength = privateInfo[0]; var privBytes = bytes.subarray(privOffset, privOffset + privLength); baseDict = this.parseDict(privBytes); var privDict = this.getPrivDict(baseDict, strings); - var charStrings = this.parseIndex(topDict['CharStrings']); - var charset = this.parseCharsets(topDict['charset'], + var charStrings = this.parseIndex(topDict.CharStrings); + var charset = this.parseCharsets(topDict.charset, charStrings.length, strings); - var hasSupplement = this.parseEncoding(topDict['Encoding'], properties, + var hasSupplement = this.parseEncoding(topDict.Encoding, properties, strings, charset); // The font sanitizer does not support CFF encoding with a @@ -2179,7 +2177,7 @@ var Type2CFF = (function() { // the top dictionary to let the sanitizer think the font use // StandardEncoding, that's a lie but that's ok. if (hasSupplement) - bytes[topDict['Encoding']] = 0; + bytes[topDict.Encoding] = 0; // charstrings contains info about glyphs (one element per glyph // containing mappings for {unicode, width}) @@ -2258,7 +2256,6 @@ var Type2CFF = (function() { encoding[index] = gid++; } } else { - var format = bytes[pos++]; switch (format & 0x7f) { case 0: From 763bd7059af5ae692d77d41717e83d5dbba5a03e Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Wed, 31 Aug 2011 14:17:57 +0200 Subject: [PATCH 15/15] Fix a warning when the destination link point to nothing --- fonts.js | 1 + web/viewer.js | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fonts.js b/fonts.js index 24fd35d61..02aa52601 100755 --- a/fonts.js +++ b/fonts.js @@ -80,6 +80,7 @@ var FontMeasure = (function FontMeasure() { size *= kScalePrecision; var rule = italic + ' ' + bold + ' ' + size + 'px "' + name + '"'; ctx.font = rule; + current = font; }, measureText: function fonts_measureText(text) { var width; diff --git a/web/viewer.js b/web/viewer.js index 1e016e6e9..d57e47045 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -217,11 +217,10 @@ var PageView = function(container, content, id, width, height, function setupLinks(canvas, content, scale) { function bindLink(link, dest) { - if (dest) { - link.onclick = function() { + link.onclick = function() { + if (dest) PDFView.navigateTo(dest); - return false; - }; + return false; } } var links = content.getLinks(); @@ -232,7 +231,7 @@ var PageView = function(container, content, id, width, height, link.style.width = Math.ceil(links[i].width * scale) + 'px'; link.style.height = Math.ceil(links[i].height * scale) + 'px'; link.href = links[i].url || ''; - bindLink(link, links[i].dest); + bindLink(link, ('dest' in links[i]) ? links[i].dest : null); div.appendChild(link); } }