From 3f16be334eb6dd904b77fff0343d4e2c1bd091e4 Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Thu, 25 Aug 2011 02:29:22 +0200 Subject: [PATCH] Add built-in encoding for Type1 if there is no encoding built yet (fix some missing chars in pypy.pdf page 7) --- fonts.js | 69 +++++++++++++++++++++++++++++++++++++------------------- pdf.js | 8 +++++-- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/fonts.js b/fonts.js index 9b306ff0e..e7055b96a 100755 --- a/fonts.js +++ b/fonts.js @@ -1284,8 +1284,6 @@ var Font = (function Font() { var charcode = chars.charCodeAt(i); var unicode = encoding[charcode]; if ('undefined' == typeof(unicode)) { - // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf - // at the moment, for unknown reasons. warn('Unencoded charcode ' + charcode); unicode = charcode; } @@ -1590,13 +1588,12 @@ var Type1Parser = function() { while (i < count && (eexecStr[i] == ' ' || eexecStr[i] == '\n')) ++i; - var t = ''; + var token = ''; while (i < count && !(eexecStr[i] == ' ' || eexecStr[i] == '\n')) - t += eexecStr[i++]; - - return t; - } + token += eexecStr[i++]; + return token; + }; var c = eexecStr[i]; if ((glyphsSection || subrsSection) && c == 'R') { @@ -1680,18 +1677,28 @@ var Type1Parser = function() { return program; }, - this.extractFontHeader = function t1_extractFontProgram(stream) { + this.extractFontHeader = function t1_extractFontHeader(stream, properties) { var headerString = ''; for (var i = 0; i < stream.length; i++) headerString += String.fromCharCode(stream[i]); - var info = { - textMatrix: null - }; - var token = ''; var count = headerString.length; for (var i = 0; i < count; i++) { + var getToken = function() { + var char = headerString[i]; + while (i < count && (char == ' ' || char == '\n' || char == '/')) + char = headerString[++i]; + + var token = ''; + while (i < count && !(char == ' ' || char == '\n' || char == '/')) { + token += char; + char = headerString[++i]; + } + + return token; + }; + var c = headerString[i]; if (c == ' ' || c == '\n') { switch (token) { @@ -1705,7 +1712,25 @@ var Type1Parser = function() { // Make the angle into the right direction matrix[2] *= -1; - info.textMatrix = matrix; + properties.textMatrix = matrix; + break; + case '/Encoding': + if (!properties.builtInEncoding) + break; + + var size = parseInt(getToken()); + getToken(); // read in 'array' + + for (var j = 0; j < size; j++) { + var token = getToken(); + if (token == 'dup') { + var index = parseInt(getToken()); + var glyph = getToken(); + properties.encoding[index] = GlyphsUnicode[glyph]; + getToken(); // read the in 'put' + j = index; + } + } break; } token = ''; @@ -1713,8 +1738,6 @@ var Type1Parser = function() { token += c; } } - - return info; }; }; @@ -1798,13 +1821,11 @@ var CFF = function(name, file, properties) { var length2 = file.dict.get('Length2'); var headerBlock = file.getBytes(length1); - var header = type1Parser.extractFontHeader(headerBlock); - for (var info in header) - properties[info] = header[info]; + type1Parser.extractFontHeader(headerBlock, properties); // Decrypt the data blocks and retrieve it's content var eexecBlock = file.getBytes(length2); - var data = type1Parser.extractFontProgram(eexecBlock); + var data = type1Parser.extractFontProgram(eexecBlock, properties); for (var info in data.properties) properties[info] = data.properties[info]; @@ -1874,15 +1895,14 @@ CFF.prototype = { getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs) { var charstrings = []; + var missings = []; for (var i = 0; i < glyphs.length; i++) { var glyph = glyphs[i]; var unicode = GlyphsUnicode[glyph.glyph]; if (!unicode) { - if (glyph.glyph != '.notdef') { - warn(glyph.glyph + - ' does not have an entry in the glyphs unicode dictionary'); - } + if (glyph.glyph != '.notdef') + missings.push(glyph.glyph); } else { charstrings.push({ glyph: glyph, @@ -1894,6 +1914,9 @@ CFF.prototype = { } } + if (missings.length) + warn(missings + ' does not have unicode in the glyphs dictionary'); + charstrings.sort(function charstrings_sort(a, b) { return a.unicode - b.unicode; }); diff --git a/pdf.js b/pdf.js index 4408bc49e..138d4bbf4 100644 --- a/pdf.js +++ b/pdf.js @@ -4191,6 +4191,7 @@ var PartialEvaluator = (function() { fd = fontDict.get('FontDescriptor'); } + var builtInEncoding = false; var encodingMap = {}; var glyphMap = {}; var charset = []; @@ -4261,9 +4262,11 @@ var PartialEvaluator = (function() { if (!baseEncoding) { var type = subType.name; if (type == 'TrueType') { - baseEncoding = Encodings.WinAnsiEncoding.slice(0); + baseEncoding = Encodings.WinAnsiEncoding.slice(); } else if (type == 'Type1') { - baseEncoding = Encodings.StandardEncoding.slice(0); + baseEncoding = Encodings.StandardEncoding.slice(); + if (!diffEncoding.length) + builtInEncoding = true; } else { error('Unknown type of font'); } @@ -4419,6 +4422,7 @@ var PartialEvaluator = (function() { subtype: fileType, widths: glyphWidths, encoding: encodingMap, + builtInEncoding: builtInEncoding, charset: charset, firstChar: fontDict.get('FirstChar'), lastChar: fontDict.get('LastChar'),