diff --git a/fonts.js b/fonts.js index 1f0a6a211..607e2aab9 100755 --- a/fonts.js +++ b/fonts.js @@ -7,7 +7,7 @@ var isWorker = (typeof window == 'undefined'); /** * Maximum file size of the font. */ -var kMaxFontFileSize = 40000; +var kMaxFontFileSize = 200000; /** * Maximum time to wait for a font to be loaded by font-face rules. @@ -389,6 +389,7 @@ var Font = (function() { var data; switch (properties.type) { case 'Type1': + case 'CIDFontType0': var cff = new CFF(name, file, properties); this.mimetype = 'font/opentype'; @@ -397,6 +398,7 @@ var Font = (function() { break; case 'TrueType': + case 'CIDFontType2': this.mimetype = 'font/opentype'; // Repair the TrueType file if it is can be damaged in the point of @@ -409,9 +411,10 @@ var Font = (function() { break; } this.data = data; - + this.type = properties.type; this.id = Fonts.registerFont(name, data, properties); this.loadedName = 'pdfFont' + this.id; + this.compositeFont = properties.compositeFont; }; function stringToArray(str) { @@ -679,6 +682,65 @@ var Font = (function() { '\x00\x00\x00\x00'; // maxMemType1 }; + function createNameTable(name) { + var strings = [ + 'Original licence', // 0.Copyright + name, // 1.Font family + 'Unknown', // 2.Font subfamily (font weight) + 'uniqueID', // 3.Unique ID + name, // 4.Full font name + 'Version 0.11', // 5.Version + '', // 6.Postscript name + 'Unknown', // 7.Trademark + 'Unknown', // 8.Manufacturer + 'Unknown' // 9.Designer + ]; + + // Mac want 1-byte per character strings while Windows want + // 2-bytes per character, so duplicate the names table + var stringsUnicode = []; + for (var i = 0; i < strings.length; i++) { + var str = strings[i]; + + var strUnicode = ''; + for (var j = 0; j < str.length; j++) + strUnicode += string16(str.charCodeAt(j)); + stringsUnicode.push(strUnicode); + } + + var names = [strings, stringsUnicode]; + var platforms = ['\x00\x01', '\x00\x03']; + var encodings = ['\x00\x00', '\x00\x01']; + var languages = ['\x00\x00', '\x04\x09']; + + var namesRecordCount = strings.length * platforms.length; + var nameTable = + '\x00\x00' + // format + string16(namesRecordCount) + // Number of names Record + string16(namesRecordCount * 12 + 6); // Storage + + // Build the name records field + var strOffset = 0; + for (var i = 0; i < platforms.length; i++) { + var strs = names[i]; + for (var j = 0; j < strs.length; j++) { + var str = strs[j]; + var nameRecord = + platforms[i] + // platform ID + encodings[i] + // encoding ID + languages[i] + // language ID + string16(j) + // name ID + string16(str.length) + + string16(strOffset); + nameTable += nameRecord; + strOffset += str.length; + } + } + + nameTable += strings.join('') + stringsUnicode.join(''); + return nameTable; + } + constructor.prototype = { name: null, font: null, @@ -811,7 +873,7 @@ var Font = (function() { // This keep a reference to the CMap and the post tables since they can // be rewritted - var cmap, post; + var cmap, post, nameTable, maxp; var tables = []; for (var i = 0; i < numTables; i++) { @@ -822,6 +884,10 @@ var Font = (function() { cmap = table; else if (table.tag == 'post') post = table; + else if (table.tag == 'name') + nameTable = table; + else if (table.tag == 'maxp') + maxp = table; requiredTables.splice(index, 1); } @@ -857,7 +923,51 @@ var Font = (function() { }); // Replace the old CMAP table with a shiny new one - replaceCMapTable(cmap, font, properties); + if (properties.type == 'CIDFontType2') { + // Type2 composite fonts map characters directly to glyphs so the cmap + // table must be replaced. + + var glyphs = []; + var charset = properties.charset; + if (!charset.length) { + // PDF did not contain a GIDMap for the font so create an identity cmap + + // First get the number of glyphs from the maxp table + font.pos = (font.start ? font.start : 0) + maxp.length; + var version = int16(font.getBytes(4)); + var numGlyphs = int16(font.getBytes(2)); + + // Now create an identity mapping + for (var i = 1; i < numGlyphs; i++) { + glyphs.push({ + unicode: i + }); + } + } else { + for (var i = 1; i < charset.length; i++) { + var index = charset.indexOf(i); + if (index != -1) { + glyphs.push({ + unicode: index + }); + } else { + break; + } + } + } + + if (!cmap) { + // Font did not contain a cmap + tables.push({ + tag: 'cmap', + data: createCMapTable(glyphs) + }) + } else { + cmap.data = createCMapTable(glyphs); + } + } else { + replaceCMapTable(cmap, font, properties); + } // Rewrite the 'post' table if needed if (!post) { @@ -867,6 +977,14 @@ var Font = (function() { }); } + // Rewrite the 'name' table if needed + if (!nameTable) { + tables.push({ + tag: 'name', + data: stringToArray(createNameTable(this.name)) + }); + } + // Tables needs to be written by ascendant alphabetic order tables.sort(function tables_sort(a, b) { return (a.tag > b.tag) - (a.tag < b.tag); @@ -909,65 +1027,6 @@ var Font = (function() { }, convert: function font_convert(fontName, font, properties) { - function createNameTable(name) { - var strings = [ - 'Original licence', // 0.Copyright - name, // 1.Font family - 'Unknown', // 2.Font subfamily (font weight) - 'uniqueID', // 3.Unique ID - name, // 4.Full font name - 'Version 0.11', // 5.Version - '', // 6.Postscript name - 'Unknown', // 7.Trademark - 'Unknown', // 8.Manufacturer - 'Unknown' // 9.Designer - ]; - - // Mac want 1-byte per character strings while Windows want - // 2-bytes per character, so duplicate the names table - var stringsUnicode = []; - for (var i = 0; i < strings.length; i++) { - var str = strings[i]; - - var strUnicode = ''; - for (var j = 0; j < str.length; j++) - strUnicode += string16(str.charCodeAt(j)); - stringsUnicode.push(strUnicode); - } - - var names = [strings, stringsUnicode]; - var platforms = ['\x00\x01', '\x00\x03']; - var encodings = ['\x00\x00', '\x00\x01']; - var languages = ['\x00\x00', '\x04\x09']; - - var namesRecordCount = strings.length * platforms.length; - var nameTable = - '\x00\x00' + // format - string16(namesRecordCount) + // Number of names Record - string16(namesRecordCount * 12 + 6); // Storage - - // Build the name records field - var strOffset = 0; - for (var i = 0; i < platforms.length; i++) { - var strs = names[i]; - for (var j = 0; j < strs.length; j++) { - var str = strs[j]; - var nameRecord = - platforms[i] + // platform ID - encodings[i] + // encoding ID - languages[i] + // language ID - string16(j) + // name ID - string16(str.length) + - string16(strOffset); - nameTable += nameRecord; - strOffset += str.length; - } - } - - nameTable += strings.join('') + stringsUnicode.join(''); - return nameTable; - } - function isFixedPitch(glyphs) { for (var i = 0; i < glyphs.length - 1; i++) { if (glyphs[i] != glyphs[i + 1]) @@ -1110,44 +1169,66 @@ var Font = (function() { charsToUnicode: function fonts_chars2Unicode(chars) { var charsCache = this.charsCache; - + var str; + // if we translated this string before, just grab it from the cache if (charsCache) { - var str = charsCache[chars]; + str = charsCache[chars]; if (str) return str; } - - // translate the string using the font's encoding - var encoding = this.encoding; - if (!encoding) - return chars; - + // lazily create the translation cache if (!charsCache) charsCache = this.charsCache = Object.create(null); - - str = ''; - for (var i = 0; i < chars.length; ++i) { - var charcode = chars.charCodeAt(i); - var unicode = encoding[charcode]; - if ('undefined' == typeof(unicode)) { - // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf - // at the moment, for unknown reasons. - warn('Unencoded charcode '+ charcode); - unicode = charcode; + + if (this.compositeFont) { + // composite fonts have multi-byte strings + // convert the string from single-byte to multi-byte + // XXX assuming CIDFonts are two-byte - later need to extract the correct byte encoding + // according to the PDF spec + str = ''; + var multiByteStr = ""; + var length = chars.length; + for (var i = 0; i < length; i++) { + var byte1 = chars.charCodeAt(i++) & 0xFF; + var byte2; + if (i == length) + byte2 = 0; + else + byte2 = chars.charCodeAt(i) & 0xFF; + multiByteStr += String.fromCharCode((byte1 << 8) | byte2); } - - // Check if the glyph has already been converted - if (!IsNum(unicode)) - unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; - - // Handle surrogate pairs - if (unicode > 0xFFFF) { - str += String.fromCharCode(unicode & 0xFFFF); - unicode >>= 16; + str = multiByteStr; + } + else { + // translate the string using the font's encoding + var encoding = this.encoding; + if (!encoding) + return chars; + + str = ''; + for (var i = 0; i < chars.length; ++i) { + var charcode = chars.charCodeAt(i); + var unicode = encoding[charcode]; + if ('undefined' == typeof(unicode)) { + // FIXME/issue 233: we're hitting this in test/pdf/sizes.pdf + // at the moment, for unknown reasons. + warn('Unencoded charcode '+ charcode); + unicode = charcode; + } + + // Check if the glyph has already been converted + if (!IsNum(unicode)) + unicode = encoding[unicode] = GlyphsUnicode[unicode.name]; + + // Handle surrogate pairs + if (unicode > 0xFFFF) { + str += String.fromCharCode(unicode & 0xFFFF); + unicode >>= 16; + } + str += String.fromCharCode(unicode); } - str += String.fromCharCode(unicode); } // Enter the translated string into the cache diff --git a/pdf.js b/pdf.js index a4c10fef7..a3179406c 100644 --- a/pdf.js +++ b/pdf.js @@ -3624,11 +3624,31 @@ var PartialEvaluator = (function() { }, translateFont: function(fontDict, xref, resources) { - var fd = fontDict.get('FontDescriptor'); + var fd; + var descendant = []; + var subType = fontDict.get('Subtype'); + var compositeFont = false; + assertWellFormed(IsName(subType), 'invalid font Subtype'); + + // If font is a composite + // - get the descendant font + // - set the type according to the descendant font + // - get the FontDescriptor from the descendant font + if (subType.name == 'Type0') { + var df = fontDict.get('DescendantFonts'); + if (!df) + return null; + compositeFont = true; + descendant = xref.fetch(df[0]); + subType = descendant.get('Subtype'); + fd = descendant.get('FontDescriptor'); + } else { + fd = fontDict.get('FontDescriptor'); + } + if (!fd) - // XXX deprecated "special treatment" for standard - // fonts? What do we need to do here? return null; + var descriptor = xref.fetch(fd); var fontName = descriptor.get('FontName'); @@ -3642,7 +3662,38 @@ var PartialEvaluator = (function() { var encodingMap = {}; var charset = []; - if (fontDict.has('Encoding')) { + if (compositeFont) { + // Special CIDFont support + // XXX only CIDFontType2 supported for now + if (subType.name == 'CIDFontType2') { + var cidToGidMap = descendant.get('CIDToGIDMap'); + if (cidToGidMap && IsRef(cidToGidMap)) { + // Extract the charset from the CIDToGIDMap + var glyphsStream = xref.fetchIfRef(cidToGidMap); + var glyphsData = glyphsStream.getBytes(0); + var i = 0; + // Glyph ids are big-endian 2-byte values + for (var j=0; j