From 7f038536fbe8a307d1a3e1439839241b0cedf168 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Fri, 28 Oct 2011 20:38:31 -0500 Subject: [PATCH 01/13] Migration of the 'encoding-1' branch (ref #674) --- src/canvas.js | 2 +- src/evaluator.js | 482 +++++++++++++----------------- src/fonts.js | 751 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 717 insertions(+), 518 deletions(-) diff --git a/src/canvas.js b/src/canvas.js index b7045dc39..d22023776 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -445,7 +445,7 @@ var CanvasGraphics = (function canvasGraphics() { this.save(); ctx.scale(fontSize, fontSize); ctx.transform.apply(ctx, fontMatrix); - this.executeIRQueue(glyph.IRQueue); + this.executeIRQueue(glyph.codeIRQueue); this.restore(); var transformed = Util.applyTransform([glyph.width, 0], fontMatrix); diff --git a/src/evaluator.js b/src/evaluator.js index 48e12c83d..bb2efaa14 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -459,15 +459,178 @@ var PartialEvaluator = (function partialEvaluator() { }; }, - extractEncoding: function partialEvaluatorExtractEncoding(dict, - xref, - properties) { - var type = properties.type, encoding; - if (properties.composite) { - var defaultWidth = xref.fetchIfRef(dict.get('DW')) || 1000; - properties.defaultWidth = defaultWidth; + extractDataStructures: function + partialEvaluatorExtractDataStructures(dict, baseDict, + xref, properties) { + // 9.10.2 + var toUnicode = dict.get('ToUnicode') || + baseDict.get('ToUnicode'); + if (toUnicode) + properties.toUnicode = this.readToUnicode(toUnicode, xref); + + if (properties.composite) { + // CIDSystemInfo helps to match CID to glyphs + var cidSystemInfo = xref.fetchIfRef(dict.get('CIDSystemInfo')); + if (isDict(cidSystemInfo)) { + properties.cidSystemInfo = { + registry: cidSystemInfo.get('Registry'), + ordering: cidSystemInfo.get('Ordering'), + supplement: cidSystemInfo.get('Supplement') + }; + } + + var cidToGidMap = xref.fetchIfRef(dict.get('CIDToGIDMap')); + if (isStream(cidToGidMap)) + properties.cidToGidMap = this.readCidToGidMap(cidToGidMap); + } + + var differences = []; + var baseEncoding = Encodings.StandardEncoding; + var hasEncoding = dict.has('Encoding'); + if (hasEncoding) { + var encoding = xref.fetchIfRef(dict.get('Encoding')); + if (isDict(encoding)) { + var baseName = encoding.get('BaseEncoding'); + if (baseName) + baseEncoding = Encodings[baseName.name]; + + // Load the differences between the base and original + if (encoding.has('Differences')) { + var diffEncoding = encoding.get('Differences'); + var index = 0; + for (var j = 0; j < diffEncoding.length; j++) { + var data = diffEncoding[j]; + if (isNum(data)) + index = data; + else + differences[index++] = data.name; + } + } + } else if (isName(encoding)) { + baseEncoding = Encodings[encoding.name]; + } else { + error('Encoding is not a Name nor a Dict'); + } + } + properties.differences = differences; + properties.baseEncoding = baseEncoding; + properties.hasEncoding = hasEncoding; + }, + + readToUnicode: + function partialEvaluatorReadToUnicode(toUnicode, xref) { + var cmapObj = xref.fetchIfRef(toUnicode); + var charToUnicode = []; + if (isName(cmapObj)) { + error('ToUnicode file cmap translation not implemented'); + } else if (isStream(cmapObj)) { + var tokens = []; + var token = ''; + var beginArrayToken = {}; + + var cmap = cmapObj.getBytes(cmapObj.length); + for (var i = 0; i < cmap.length; i++) { + var byte = cmap[i]; + if (byte == 0x20 || byte == 0x0D || byte == 0x0A || + byte == 0x3C || byte == 0x5B || byte == 0x5D) { + switch (token) { + case 'usecmap': + error('usecmap is not implemented'); + break; + + case 'beginbfchar': + case 'beginbfrange': + case 'begincidchar': + case 'begincidrange': + token = ''; + tokens = []; + break; + + case 'endcidrange': + case 'endbfrange': + for (var j = 0; j < tokens.length; j += 3) { + var startRange = tokens[j]; + var endRange = tokens[j + 1]; + var code = tokens[j + 2]; + while (startRange <= endRange) { + charToUnicode[startRange] = code++; + ++startRange; + } + } + break; + + case 'endcidchar': + case 'endbfchar': + for (var j = 0; j < tokens.length; j += 2) { + var index = tokens[j]; + var code = tokens[j + 1]; + charToUnicode[index] = code; + } + break; + + case '': + break; + + default: + if (token[0] >= '0' && token[0] <= '9') + token = parseInt(token, 10); // a number + tokens.push(token); + token = ''; + } + switch (byte) { + case 0x5B: + // begin list parsing + tokens.push(beginArrayToken); + break; + case 0x5D: + // collect array items + var items = [], item; + while (tokens.length && + (item = tokens.pop()) != beginArrayToken) + items.unshift(item); + tokens.push(items); + break; + } + } else if (byte == 0x3E) { + if (token.length) { + // parsing hex number + tokens.push(parseInt(token, 16)); + token = ''; + } + } else { + token += String.fromCharCode(byte); + } + } + } + return charToUnicode; + }, + readCidToGidMap: + function partialEvaluatorReadCidToGidMap(cidToGidStream) { + // Extract the encoding from the CIDToGIDMap + var glyphsData = cidToGidStream.getBytes(); + + // Set encoding 0 to later verify the font has an encoding + var result = []; + for (var j = 0; j < glyphsData.length; j++) { + var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; + if (glyphID == 0) + continue; + + var code = j >> 1; + result[code] = glyphID; + } + return result; + }, + + extractWidths: function partialEvaluatorWidths(dict, + xref, + descriptor, + properties) { + var glyphsWidths = []; + var defaultWidth = 0; + if (properties.composite) { + defaultWidth = xref.fetchIfRef(dict.get('DW')) || 1000; - var glyphsWidths = {}; var widths = xref.fetchIfRef(dict.get('W')); if (widths) { var start = 0, end = 0; @@ -487,246 +650,41 @@ var PartialEvaluator = (function partialEvaluator() { } } } - properties.widths = glyphsWidths; - - // Glyph ids are big-endian 2-byte values - encoding = properties.encoding; - - // CIDSystemInfo might help to match width and glyphs - var cidSystemInfo = dict.get('CIDSystemInfo'); - if (isDict(cidSystemInfo)) { - properties.cidSystemInfo = { - registry: cidSystemInfo.get('Registry'), - ordering: cidSystemInfo.get('Ordering'), - supplement: cidSystemInfo.get('Supplement') - }; - } - - var cidToGidMap = dict.get('CIDToGIDMap'); - if (!cidToGidMap || !isRef(cidToGidMap)) { - - - return Object.create(GlyphsUnicode); - } - - // Extract the encoding from the CIDToGIDMap - var glyphsStream = xref.fetchIfRef(cidToGidMap); - var glyphsData = glyphsStream.getBytes(0); - - // Set encoding 0 to later verify the font has an encoding - encoding[0] = { unicode: 0, width: 0 }; - for (var j = 0; j < glyphsData.length; j++) { - var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; - if (glyphID == 0) - continue; - - var code = j >> 1; - var width = glyphsWidths[code]; - encoding[code] = { - unicode: glyphID, - width: isNum(width) ? width : defaultWidth - }; - } - - return Object.create(GlyphsUnicode); - } - - var differences = properties.differences; - var map = properties.encoding; - var baseEncoding = null; - if (dict.has('Encoding')) { - encoding = xref.fetchIfRef(dict.get('Encoding')); - if (isDict(encoding)) { - var baseName = encoding.get('BaseEncoding'); - if (baseName) - baseEncoding = Encodings[baseName.name].slice(); - - // Load the differences between the base and original - if (encoding.has('Differences')) { - var diffEncoding = encoding.get('Differences'); - var index = 0; - for (var j = 0; j < diffEncoding.length; j++) { - var data = diffEncoding[j]; - if (isNum(data)) - index = data; - else - differences[index++] = data.name; - } - } - } else if (isName(encoding)) { - baseEncoding = Encodings[encoding.name].slice(); + } else { + var firstChar = properties.firstChar; + var widths = xref.fetchIfRef(dict.get('Widths')); + if (widths) { + for (var i = 0, j = firstChar; i < widths.length; i++, j++) + glyphsWidths[j] = widths[i]; + defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; } else { - error('Encoding is not a Name nor a Dict'); - } - } + // Trying get the BaseFont metrics (see comment above). + var baseFontName = dict.get('BaseFont'); + if (isName(baseFontName)) { + var metrics = this.getBaseFontMetrics(baseFontName.name); - if (!baseEncoding) { - switch (type) { - case 'TrueType': - baseEncoding = Encodings.WinAnsiEncoding.slice(); - break; - case 'Type1': - case 'Type3': - baseEncoding = Encodings.StandardEncoding.slice(); - break; - default: - warn('Unknown type of font: ' + type); - baseEncoding = []; - break; - } - } - - // merge in the differences - var firstChar = properties.firstChar; - var lastChar = properties.lastChar; - var widths = properties.widths || []; - var glyphs = {}; - for (var i = firstChar; i <= lastChar; i++) { - var glyph = differences[i]; - var replaceGlyph = true; - if (!glyph) { - glyph = baseEncoding[i] || i; - replaceGlyph = false; - } - var index = GlyphsUnicode[glyph] || i; - var width = widths[i] || widths[glyph]; - map[i] = { - unicode: index, - width: isNum(width) ? width : properties.defaultWidth - }; - - if (replaceGlyph || !glyphs[glyph]) - glyphs[glyph] = map[i]; - if (replaceGlyph || !glyphs[index]) - glyphs[index] = map[i]; - - // If there is no file, the character mapping can't be modified - // but this is unlikely that there is any standard encoding with - // chars below 0x1f, so that's fine. - if (!properties.file) - continue; - - if (index <= 0x1f || (index >= 127 && index <= 255)) - map[i].unicode += kCmapGlyphOffset; - } - - if (type == 'TrueType' && dict.has('ToUnicode') && differences) { - var cmapObj = dict.get('ToUnicode'); - if (isRef(cmapObj)) { - cmapObj = xref.fetch(cmapObj); - } - if (isName(cmapObj)) { - error('ToUnicode file cmap translation not implemented'); - } else if (isStream(cmapObj)) { - var tokens = []; - var token = ''; - var beginArrayToken = {}; - - var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0; i < cmap.length; i++) { - var byte = cmap[i]; - if (byte == 0x20 || byte == 0x0D || byte == 0x0A || - byte == 0x3C || byte == 0x5B || byte == 0x5D) { - switch (token) { - case 'usecmap': - error('usecmap is not implemented'); - break; - - case 'beginbfchar': - case 'beginbfrange': - case 'begincidchar': - case 'begincidrange': - token = ''; - tokens = []; - break; - - case 'endcidrange': - case 'endbfrange': - for (var j = 0; j < tokens.length; j += 3) { - var startRange = tokens[j]; - var endRange = tokens[j + 1]; - var code = tokens[j + 2]; - while (startRange < endRange) { - var mapping = map[startRange] || {}; - mapping.unicode = code++; - map[startRange] = mapping; - ++startRange; - } - } - break; - - case 'endcidchar': - case 'endbfchar': - for (var j = 0; j < tokens.length; j += 2) { - var index = tokens[j]; - var code = tokens[j + 1]; - var mapping = map[index] || {}; - mapping.unicode = code; - map[index] = mapping; - } - break; - - case '': - break; - - default: - if (token[0] >= '0' && token[0] <= '9') - token = parseInt(token, 10); // a number - tokens.push(token); - token = ''; - } - switch (byte) { - case 0x5B: - // begin list parsing - tokens.push(beginArrayToken); - break; - case 0x5D: - // collect array items - var items = [], item; - while (tokens.length && - (item = tokens.pop()) != beginArrayToken) - items.unshift(item); - tokens.push(items); - break; - } - } else if (byte == 0x3E) { - if (token.length) { - // parsing hex number - tokens.push(parseInt(token, 16)); - token = ''; - } - } else { - token += String.fromCharCode(byte); - } + glyphsWidths = metrics.widths; + defaultWidth = metrics.defaultWidth; } } } - return glyphs; + + properties.defaultWidth = defaultWidth; + properties.widths = glyphsWidths; }, - getBaseFontMetricsAndMap: function getBaseFontMetricsAndMap(name) { - var map = {}; - if (/^Symbol(-?(Bold|Italic))*$/.test(name)) { - // special case for symbols - var encoding = Encodings.symbolsEncoding.slice(); - for (var i = 0, n = encoding.length, j; i < n; i++) { - if (!(j = encoding[i])) - continue; - map[i] = GlyphsUnicode[j] || 0; - } - } - - var defaultWidth = 0; - var widths = Metrics[stdFontMap[name] || name]; - if (isNum(widths)) { - defaultWidth = widths; - widths = null; + getBaseFontMetrics: function getBaseFontMetrics(name) { + var defaultWidth = 0, widths = []; + var glyphWidths = Metrics[stdFontMap[name] || name]; + if (isNum(glyphWidths)) { + defaultWidth = glyphWidths; + } else { + widths = glyphWidths; } return { defaultWidth: defaultWidth, - widths: widths || [], - map: map + widths: widths }; }, @@ -755,6 +713,7 @@ var PartialEvaluator = (function partialEvaluator() { assertWellFormed(isName(type), 'invalid font Subtype'); composite = true; } + var maxCharIndex = composite ? 0xFFFF : 0xFF; var descriptor = xref.fetchIfRef(dict.get('FontDescriptor')); if (!descriptor) { @@ -773,18 +732,16 @@ var PartialEvaluator = (function partialEvaluator() { // Using base font name as a font name. baseFontName = baseFontName.name.replace(/[,_]/g, '-'); - var metricsAndMap = this.getBaseFontMetricsAndMap(baseFontName); + var metrics = this.getBaseFontMetrics(baseFontName); var properties = { type: type.name, - encoding: metricsAndMap.map, - differences: [], - widths: metricsAndMap.widths, - defaultWidth: metricsAndMap.defaultWidth, + widths: metrics.widths, + defaultWidth: metrics.defaultWidth, firstChar: 0, - lastChar: 256 + lastChar: maxCharIndex }; - this.extractEncoding(dict, xref, properties); + this.extractDataStructures(dict, dict, xref, properties); return { name: baseFontName, @@ -801,26 +758,7 @@ var PartialEvaluator = (function partialEvaluator() { // TODO Fill the width array depending on which of the base font this is // a variant. var firstChar = xref.fetchIfRef(dict.get('FirstChar')) || 0; - var lastChar = xref.fetchIfRef(dict.get('LastChar')) || 256; - var defaultWidth = 0; - var glyphWidths = {}; - var encoding = {}; - var widths = xref.fetchIfRef(dict.get('Widths')); - if (widths) { - for (var i = 0, j = firstChar; i < widths.length; i++, j++) - glyphWidths[j] = widths[i]; - defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; - } else { - // Trying get the BaseFont metrics (see comment above). - var baseFontName = dict.get('BaseFont'); - if (isName(baseFontName)) { - var metricsAndMap = this.getBaseFontMetricsAndMap(baseFontName.name); - - glyphWidths = metricsAndMap.widths; - defaultWidth = metricsAndMap.defaultWidth; - encoding = metricsAndMap.map; - } - } + var lastChar = xref.fetchIfRef(dict.get('LastChar')) || maxCharIndex; var fontName = xref.fetchIfRef(descriptor.get('FontName')); assertWellFormed(isName(fontName), 'invalid font name'); @@ -853,34 +791,30 @@ var PartialEvaluator = (function partialEvaluator() { fixedPitch: false, fontMatrix: dict.get('FontMatrix') || IDENTITY_MATRIX, firstChar: firstChar || 0, - lastChar: lastChar || 256, + lastChar: lastChar || maxCharIndex, bbox: descriptor.get('FontBBox'), ascent: descriptor.get('Ascent'), descent: descriptor.get('Descent'), xHeight: descriptor.get('XHeight'), capHeight: descriptor.get('CapHeight'), - defaultWidth: defaultWidth, flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), - differences: [], - widths: glyphWidths, - encoding: encoding, coded: false }; - properties.glyphs = this.extractEncoding(dict, xref, properties); + this.extractWidths(dict, xref, descriptor, properties); + this.extractDataStructures(dict, baseDict, xref, properties); if (type.name === 'Type3') { properties.coded = true; var charProcs = xref.fetchIfRef(dict.get('CharProcs')); var fontResources = xref.fetchIfRef(dict.get('Resources')) || resources; properties.resources = fontResources; + properties.charProcIRQueues = {}; for (var key in charProcs.map) { var glyphStream = xref.fetchIfRef(charProcs.map[key]); var queueObj = {}; - properties.glyphs[key].IRQueue = this.getIRQueue(glyphStream, - fontResources, - queueObj, - dependency); + properties.charProcIRQueues[key] = + this.getIRQueue(glyphStream, fontResources, queueObj, dependency); } } diff --git a/src/fonts.js b/src/fonts.js index b027b766a..202481449 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -672,6 +672,44 @@ var UnicodeRanges = [ { 'begin': 0x1F030, 'end': 0x1F09F } // Domino Tiles ]; +var MacStandardGlyphOrdering = [ + '.notdef', '.null', 'nonmarkingreturn', 'space', 'exclam', 'quotedbl', + 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', + 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', + 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', + 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', + 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', + 'asciitilde', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', + 'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', + 'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis', + 'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve', + 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex', + 'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet', + 'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute', + 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal', + 'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi', + 'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash', + 'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin', + 'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis', + 'nonbreakingspace', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash', + 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright', + 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency', + 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered', + 'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex', + 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', + 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute', + 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron', + 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron', + 'Lslash', 'lslash', 'Scaron', 'scaron', 'Zcaron', 'zcaron', 'brokenbar', + 'Eth', 'eth', 'Yacute', 'yacute', 'Thorn', 'thorn', 'minus', 'multiply', + 'onesuperior', 'twosuperior', 'threesuperior', 'onehalf', 'onequarter', + 'threequarters', 'franc', 'Gbreve', 'gbreve', 'Idotaccent', 'Scedilla', + 'scedilla', 'Cacute', 'cacute', 'Ccaron', 'ccaron', 'dcroat']; + function getUnicodeRangeFor(value) { for (var i = 0; i < UnicodeRanges.length; i++) { var range = UnicodeRanges[i]; @@ -681,6 +719,16 @@ function getUnicodeRangeFor(value) { return -1; } +function adaptUnicode(unicode) { + return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ? + unicode + kCmapGlyphOffset : unicode; +} + +function isAdaptedUnicode(unicode) { + return unicode >= kCmapGlyphOffset && + unicode < kCmapGlyphOffset + kSizeOfGlyphArea; +} + /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -692,8 +740,8 @@ function getUnicodeRangeFor(value) { var Font = (function Font() { var constructor = function font_constructor(name, file, properties) { this.name = name; - this.encoding = properties.encoding; this.coded = properties.coded; + this.charProcIRQueues = properties.charProcIRQueues; this.resources = properties.resources; this.sizes = []; @@ -702,6 +750,9 @@ var Font = (function Font() { names = names.split(/[-,_]/g)[0]; this.serif = serifFonts[names] || (name.search(/serif/gi) != -1); + var type = properties.type; + this.type = type; + // If the font is to be ignored, register it like an already loaded font // to avoid the cost of waiting for it be be loaded by the platform. if (properties.ignore) { @@ -709,12 +760,19 @@ var Font = (function Font() { this.loading = false; return; } + + this.differences = properties.differences; + this.widths = properties.widths; + this.defaultWidth = properties.defaultWidth; + this.composite = properties.composite; + this.toUnicode = properties.toUnicode; + this.fontMatrix = properties.fontMatrix; if (properties.type == 'Type3') return; - // Trying to fix encoding using glyph widths and CIDSystemInfo. - this.fixWidths(properties); + // Trying to fix encoding using glyph CIDSystemInfo. + this.loadCidToUnicode(properties); if (!file) { // The file data is not specified. Trying to fix the font name @@ -730,15 +788,14 @@ var Font = (function Font() { // name ArialBlack for example will be replaced by Helvetica. this.black = (name.search(/Black/g) != -1); - this.defaultWidth = properties.defaultWidth; + this.encoding = properties.baseEncoding; + this.noUnicodeAdaptation = true; this.loadedName = fontName.split('-')[0]; - this.composite = properties.composite; this.loading = false; return; } var data; - var type = properties.type; switch (type) { case 'Type1': case 'CIDFontType0': @@ -767,11 +824,11 @@ var Font = (function Font() { } this.data = data; - this.type = type; this.fontMatrix = properties.fontMatrix; - this.defaultWidth = properties.defaultWidth; + this.encoding = properties.baseEncoding; + this.hasShortCmap = properties.hasShortCmap; + this.hasEncoding = properties.hasEncoding; this.loadedName = getUniqueName(); - this.composite = properties.composite; this.loading = true; }; @@ -987,7 +1044,7 @@ var Font = (function Font() { format314); }; - function createOS2Table(properties, override) { + function createOS2Table(properties, charstrings, override) { override = override || { unitsPerEm: 0, yMax: 0, @@ -1004,26 +1061,31 @@ var Font = (function Font() { var firstCharIndex = null; var lastCharIndex = 0; - var encoding = properties.encoding; - for (var index in encoding) { - var code = encoding[index].unicode; - if (firstCharIndex > code || !firstCharIndex) - firstCharIndex = code; - if (lastCharIndex < code) - lastCharIndex = code; + if (charstrings) { + for (var i = 0; i < charstrings.length; ++i) { + var code = charstrings[i].unicode; + if (firstCharIndex > code || !firstCharIndex) + firstCharIndex = code; + if (lastCharIndex < code) + lastCharIndex = code; - var position = getUnicodeRangeFor(code); - if (position < 32) { - ulUnicodeRange1 |= 1 << position; - } else if (position < 64) { - ulUnicodeRange2 |= 1 << position - 32; - } else if (position < 96) { - ulUnicodeRange3 |= 1 << position - 64; - } else if (position < 123) { - ulUnicodeRange4 |= 1 << position - 96; - } else { - error('Unicode ranges Bits > 123 are reserved for internal usage'); + var position = getUnicodeRangeFor(code); + if (position < 32) { + ulUnicodeRange1 |= 1 << position; + } else if (position < 64) { + ulUnicodeRange2 |= 1 << position - 32; + } else if (position < 96) { + ulUnicodeRange3 |= 1 << position - 64; + } else if (position < 123) { + ulUnicodeRange4 |= 1 << position - 96; + } else { + error('Unicode ranges Bits > 123 are reserved for internal usage'); + } } + } else { + // TODO + firstCharIndex = 0; + lastCharIndex = 255; } var unitsPerEm = override.unitsPerEm || kPDFGlyphSpaceUnits; @@ -1208,6 +1270,29 @@ var Font = (function Font() { }; }; + function createGlyphNameMap(glyphs, ids, properties) { + var glyphNames = properties.glyphNames; + if (!glyphNames) { + properties.glyphNameMap = {}; + return; + } + var glyphsLength = glyphs.length; + var glyphNameMap = {}; + var encoding = []; + for (var i = 0; i < glyphsLength; ++i) { + var glyphName = glyphNames[ids[i]]; + if (!glyphName) + continue; + var unicode = glyphs[i].unicode; + glyphNameMap[glyphName] = unicode; + var code = glyphs[i].code; + encoding[code] = glyphName; + } + properties.glyphNameMap = glyphNameMap; + if (!properties.hasEncoding) + properties.baseEncoding = encoding; + } + function replaceCMapTable(cmap, font, properties) { var start = (font.start ? font.start : 0) + cmap.offset; font.pos = start; @@ -1262,7 +1347,6 @@ var Font = (function Font() { cmap.data[i] = data.charCodeAt(i); } - var encoding = properties.encoding; for (var i = 0; i < numRecords; i++) { var table = tables[i]; font.pos = start + table.offset; @@ -1271,29 +1355,88 @@ var Font = (function Font() { var length = int16(font.getBytes(2)); var language = int16(font.getBytes(2)); - if (format == 4) { - return cmap.data; - } else if (format == 0) { + if (format == 0) { // Characters below 0x20 are controls characters that are hardcoded // into the platform so if some characters in the font are assigned // under this limit they will not be displayed so let's rewrite the // CMap. var glyphs = []; - var deltas = []; + var ids = []; for (var j = 0; j < 256; j++) { var index = font.getByte(); if (index) { - deltas.push(index); - - var unicode = j + kCmapGlyphOffset; - var mapping = encoding[j] || {}; - mapping.unicode = unicode; - encoding[j] = mapping; - glyphs.push({ unicode: unicode }); + var unicode = adaptUnicode(j); + glyphs.push({ unicode: unicode, code: j }); + ids.push(index); } } - return cmap.data = createCMapTable(glyphs, deltas); + properties.hasShortCmap = true; + + createGlyphNameMap(glyphs, ids, properties); + return cmap.data = createCMapTable(glyphs, ids); + } else if (format == 4) { + // re-creating the table in format 4 since the encoding + // might be changed + var segCount = (int16(font.getBytes(2)) >> 1); + font.getBytes(6); // skipping range fields + var segIndex, segments = []; + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments.push({ end: int16(font.getBytes(2)) }); + } + font.getBytes(2); + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments[segIndex].start = int16(font.getBytes(2)); + } + + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments[segIndex].delta = int16(font.getBytes(2)); + } + + var offsetsCount = 0; + for (segIndex = 0; segIndex < segCount; segIndex++) { + var segment = segments[segIndex]; + var rangeOffset = int16(font.getBytes(2)); + if (!rangeOffset) { + segment.offsetIndex = -1; + continue; + } + + var offsetIndex = (rangeOffset >> 1) - (segCount - segIndex); + segment.offsetIndex = offsetIndex; + offsetsCount = Math.max(offsetsCount, offsetIndex + + segment.end - segment.start + 1); + } + + var offsets = []; + for (var j = 0; j < offsetsCount; j++) + offsets.push(int16(font.getBytes(2))); + + var glyphs = [], ids = []; + + for (segIndex = 0; segIndex < segCount; segIndex++) { + var segment = segments[segIndex]; + var start = segment.start, end = segment.end; + var delta = segment.delta, offsetIndex = segment.offsetIndex; + + for (var j = start; j <= end; j++) { + if (j == 0xFFFF) + continue; + + var glyphCode = offsetIndex < 0 ? j : + offsets[offsetIndex + j - start]; + glyphCode = (glyphCode + delta) & 0xFFFF; + if (glyphCode == 0) + continue; + + var unicode = adaptUnicode(j); + glyphs.push({ unicode: unicode, code: j }); + ids.push(glyphCode); + } + } + + createGlyphNameMap(glyphs, ids, properties); + return cmap.data = createCMapTable(glyphs, ids); } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode @@ -1305,15 +1448,15 @@ var Font = (function Font() { var glyphs = []; var ids = []; - for (var j = 0; j < firstCode + entryCount; j++) { - var code = (j >= firstCode) ? int16(font.getBytes(2)) : j; - glyphs.push({ unicode: j + kCmapGlyphOffset }); - ids.push(code); - - var mapping = encoding[j] || {}; - mapping.unicode = glyphs[j].unicode; - encoding[j] = mapping; + for (var j = 0; j < entryCount; j++) { + var glyphCode = int16(font.getBytes(2)); + var code = firstCode + j; + var unicode = adaptUnicode(code); + glyphs.push({ unicode: unicode, code: code }); + ids.push(glyphCode); } + + createGlyphNameMap(glyphs, ids, properties); return cmap.data = createCMapTable(glyphs, ids); } } @@ -1396,6 +1539,52 @@ var Font = (function Font() { } } + function readGlyphNameMap(post, properties) { + var start = (font.start ? font.start : 0) + post.offset; + font.pos = start; + + var length = post.length, end = start + length; + var version = int32(font.getBytes(4)); + // skip rest to the tables + font.getBytes(28); + + var glyphNames; + switch (version) { + case 0x00010000: + glyphNames = MacStandardGlyphOrdering; + break; + case 0x00020000: + var numGlyphs = int16(font.getBytes(2)); + var glyphNameIndexes = []; + for (var i = 0; i < numGlyphs; ++i) + glyphNameIndexes.push(int16(font.getBytes(2))); + var customNames = []; + while (font.pos < end) { + var stringLength = font.getByte(); + var string = ''; + for (var i = 0; i < stringLength; ++i) + string += font.getChar(); + customNames.push(string); + } + glyphNames = []; + for (var i = 0; i < numGlyphs; ++i) { + var j = glyphNameIndexes[i]; + if (j < 258) { + glyphNames.push(MacStandardGlyphOrdering[j]); + continue; + } + glyphNames.push(customNames[j - 258]); + } + break; + case 0x00030000: + break; + default: + warn('Unknown/unsupported post table version ' + version); + break; + } + properties.glyphNames = glyphNames; + } + // Check that required tables are present var requiredTables = ['OS/2', 'cmap', 'head', 'hhea', 'hmtx', 'maxp', 'name', 'post']; @@ -1403,7 +1592,7 @@ var Font = (function Font() { var header = readOpenTypeHeader(font); var numTables = header.numTables; - var cmap, maxp, hhea, hmtx, vhea, vmtx, head, loca, glyf; + var cmap, post, maxp, hhea, hmtx, vhea, vmtx, head, loca, glyf; var tables = []; for (var i = 0; i < numTables; i++) { var table = readTableEntry(font); @@ -1411,6 +1600,8 @@ var Font = (function Font() { if (index != -1) { if (table.tag == 'cmap') cmap = table; + else if (table.tag == 'post') + post = table; else if (table.tag == 'maxp') maxp = table; else if (table.tag == 'hhea') @@ -1461,7 +1652,7 @@ var Font = (function Font() { tables.push({ tag: 'OS/2', - data: stringToArray(createOS2Table(properties, override)) + data: stringToArray(createOS2Table(properties, null, override)) }); } @@ -1486,6 +1677,11 @@ var Font = (function Font() { hhea.data[11] = 0xFF; } + // The 'post' table has glyphs names. + if (post) { + readGlyphNameMap(post, properties); + } + // Replace the old CMAP table with a shiny new one if (properties.type == 'CIDFontType2') { // Type2 composite fonts map characters directly to glyphs so the cmap @@ -1503,28 +1699,17 @@ var Font = (function Font() { tables.push(cmap); } - var encoding = properties.encoding, i; - - // offsetting glyphs to avoid problematic unicode ranges - for (i in encoding) { - if (encoding.hasOwnProperty(i)) { - var unicode = encoding[i].unicode; - if (unicode <= 0x1f || - (unicode >= 127 && unicode < kSizeOfGlyphArea)) - encoding[i].unicode += kCmapGlyphOffset; - } - } - var glyphs = []; for (i = 1; i < numGlyphs; i++) { - glyphs.push({ - unicode: i <= 0x1f || (i >= 127 && i < kSizeOfGlyphArea) ? - i + kCmapGlyphOffset : i - }); + if (isAdaptedUnicode(i)) + continue; + + glyphs.push({ unicode: adaptUnicode(i) }); } cmap.data = createCMapTable(glyphs); } else { replaceCMapTable(cmap, font, properties); + this.glyphNameMap = properties.glyphNameMap; } // Rewrite the 'post' table if needed @@ -1598,12 +1783,29 @@ var Font = (function Font() { var charstrings = font.charstrings; properties.fixedPitch = isFixedPitch(charstrings); + var glyphNameMap = {}; + for (var i = 0; i < charstrings.length; ++i) { + var charstring = charstrings[i]; + glyphNameMap[charstring.glyph] = charstring.unicode; + } + this.glyphNameMap = glyphNameMap; + + if (!properties.hasEncoding && (properties.subtype == 'Type1C' || + properties.subtype == 'CIDFontType0C')) { + var encoding = []; + for (var i = 0; i < charstrings.length; ++i) { + var charstring = charstrings[i]; + encoding[charstring.code] = charstring.glyph; + } + properties.baseEncoding = encoding; + } + var fields = { // PostScript Font Program 'CFF ': font.data, // OS/2 and Windows Specific metrics - 'OS/2': stringToArray(createOS2Table(properties)), + 'OS/2': stringToArray(createOS2Table(properties, charstrings)), // Character to glyphs mapping 'cmap': createCMapTable(charstrings.slice(), @@ -1657,9 +1859,8 @@ var Font = (function Font() { // Horizontal metrics 'hmtx': (function fontFieldsHmtx() { var hmtx = '\x00\x00\x00\x00'; // Fake .notdef - for (var i = 0; i < charstrings.length; i++) { + for (var i = 0; i < charstrings.length; i++) hmtx += string16(charstrings[i].width) + string16(0); - } return stringToArray(hmtx); })(), @@ -1688,82 +1889,48 @@ var Font = (function Font() { return stringToArray(otf.file); }, - fixWidths: function font_fixWidths(properties) { - if (properties.type !== 'CIDFontType0' && - properties.type !== 'CIDFontType2') - return; - - var encoding = properties.encoding; - if (encoding[0]) + loadCidToUnicode: function font_loadCidToUnicode(properties) { + if (properties.cidToGidMap) { + this.cidToUnicode = properties.cidToGidMap; return; - var glyphsWidths = properties.widths; - if (!glyphsWidths) + } + + if (!properties.cidSystemInfo) return; - var defaultWidth = properties.defaultWidth; + var cidToUnicodeMap = []; + this.cidToUnicode = cidToUnicodeMap; + var cidSystemInfo = properties.cidSystemInfo; var cidToUnicode; if (cidSystemInfo) { cidToUnicode = CIDToUnicodeMaps[ cidSystemInfo.registry + '-' + cidSystemInfo.ordering]; } - if (!cidToUnicode) { - // the font is directly characters to glyphs with no encoding - // so create an identity encoding - for (i = 0; i < 0xD800; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - // skipping surrogates + 256-user defined - for (i = 0xE100; i <= 0xFFFF; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - return; - } - encoding[0] = { unicode: 0, width: 0 }; + if (!cidToUnicode) + return; // identity encoding + var glyph = 1, i, j, k; for (i = 0; i < cidToUnicode.length; ++i) { var unicode = cidToUnicode[i]; - var width; if (isArray(unicode)) { var length = unicode.length; - width = glyphsWidths[glyph]; - for (j = 0; j < length; j++) { - k = unicode[j]; - encoding[k] = { - unicode: k, - width: isNum(width) ? width : defaultWidth - }; - } + for (j = 0; j < length; j++) + cidToUnicodeMap[unicode[j]] = glyph; glyph++; } else if (typeof unicode === 'object') { var fillLength = unicode.f; if (fillLength) { k = unicode.c; for (j = 0; j < fillLength; ++j) { - width = glyphsWidths[glyph++]; - encoding[k] = { - unicode: k, - width: isNum(width) ? width : defaultWidth - }; + cidToUnicodeMap[k] = glyph++; k++; } } else glyph += unicode.s; } else if (unicode) { - width = glyphsWidths[glyph++]; - encoding[unicode] = { - unicode: unicode, - width: isNum(width) ? width : defaultWidth - }; + cidToUnicodeMap[unicode] = glyph++; } else glyph++; } @@ -1798,6 +1965,74 @@ var Font = (function Font() { return rule; }, + charToGlyph: function fonts_charToGlyph(charcode) { + var unicode, width, codeIRQueue; + + var width = this.widths[charcode]; + + switch (this.type) { + case 'CIDFontType0': + if (this.noUnicodeAdaptation) { + width = this.widths[this.cidToUnicode[charcode]]; + unicode = charcode; + break; + } + unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + break; + case 'CIDFontType2': + if (this.noUnicodeAdaptation) { + width = this.widths[this.cidToUnicode[charcode]]; + unicode = charcode; + break; + } + unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + break; + case 'Type1': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + if (this.noUnicodeAdaptation) { + width = this.widths[glyphName]; + unicode = GlyphsUnicode[glyphName] || charcode; + break; + } + unicode = this.glyphNameMap[glyphName] || + adaptUnicode(GlyphsUnicode[glyphName] || charcode); + break; + case 'Type3': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + codeIRQueue = this.charProcIRQueues[glyphName]; + unicode = charcode; + break; + case 'TrueType': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + if (!glyphName) + glyphName = Encodings.StandardEncoding[charcode]; + if (!this.hasEncoding) { + width = this.widths[charcode] || this.widths[glyphName]; + unicode = this.noUnicodeAdaptation ? + charcode : adaptUnicode(charcode); + break; + } + if (this.hasShortCmap) { + var j = Encodings.MacRomanEncoding.indexOf(glyphName); + unicode = j >= 0 ? adaptUnicode(j) : + this.glyphNameMap[glyphName]; + } else { + unicode = glyphName in GlyphsUnicode ? + adaptUnicode(GlyphsUnicode[glyphName]) : + this.glyphNameMap[glyphName]; + } + break; + default: + warn('Unsupported font type: ' + this.type); + break; + } + return { + unicode: unicode, + width: isNum(width) ? width : this.defaultWidth, + codeIRQueue: codeIRQueue + }; + }, + charsToGlyphs: function fonts_chars2Glyphs(chars) { var charsCache = this.charsCache; var glyphs; @@ -1813,11 +2048,6 @@ var Font = (function Font() { if (!charsCache) charsCache = this.charsCache = Object.create(null); - // translate the string using the font's encoding - var encoding = this.encoding; - if (!encoding) - return chars; - glyphs = []; if (this.composite) { @@ -1829,14 +2059,7 @@ var Font = (function Font() { // loop should never end on the last byte for (var i = 0; i < length; i++) { var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]); - var glyph = encoding[charcode]; - if ('undefined' == typeof(glyph)) { - warn('Unencoded charcode ' + charcode); - glyph = { - unicode: charcode, - width: this.defaultWidth - }; - } + var glyph = this.charToGlyph(charcode); glyphs.push(glyph); // placing null after each word break charcode (ASCII SPACE) if (charcode == 0x20) @@ -1846,14 +2069,7 @@ var Font = (function Font() { else { for (var i = 0; i < chars.length; ++i) { var charcode = chars.charCodeAt(i); - var glyph = encoding[charcode]; - if ('undefined' == typeof(glyph)) { - warn('Unencoded charcode ' + charcode); - glyph = { - unicode: charcode, - width: this.defaultWidth - }; - } + var glyph = this.charToGlyph(charcode); glyphs.push(glyph); if (charcode == 0x20) glyphs.push(null); @@ -2107,6 +2323,17 @@ var Type1Parser = function type1Parser() { warn('Support for Type1 command ' + value + ' (' + escape + ') is not implemented in charstring: ' + charstring); + if (value == 12) { + // we know how to ignore only some the Type1 commands + switch (escape) { + case 7: + charstring.push('drop', 'drop', 'drop', 'drop'); + continue; + case 8: + charstring.push('drop'); + continue; + } + } } value = command; @@ -2327,24 +2554,30 @@ var Type1Parser = function type1Parser() { properties.fontMatrix = matrix; break; case '/Encoding': - var size = parseInt(getToken(), 10); - getToken(); // read in 'array' + var encodingArg = getToken(); + var encoding; + if (!/^\d+$/.test(encodingArg)) { + // encoding name is specified + encoding = Encodings[encodingArg]; + } else { + encoding = []; + var size = parseInt(encodingArg, 10); + getToken(); // read in 'array' - for (var j = 0; j < size; j++) { - var token = getToken(); - if (token == 'dup') { - var index = parseInt(getToken(), 10); - var glyph = getToken(); - - if ('undefined' == typeof(properties.differences[index])) { - var mapping = properties.encoding[index] || {}; - mapping.unicode = GlyphsUnicode[glyph] || index; - properties.glyphs[glyph] = properties.encoding[index] = - mapping; + for (var j = 0; j < size; j++) { + var token = getToken(); + if (token == 'dup') { + var index = parseInt(getToken(), 10); + var glyph = getToken(); + encoding[index] = glyph; + getToken(); // read the in 'put' } - getToken(); // read the in 'put' } } + if (!properties.hasEncoding && encoding) { + properties.baseEncoding = encoding; + break; + } break; } token = ''; @@ -2487,46 +2720,38 @@ CFF.prototype = { }, encodeNumber: function cff_encodeNumber(value) { + // some of the fonts has ouf-of-range values + // they are just arithmetic overflows + // make sanitizer happy + value |= 0; if (value >= -32768 && value <= 32767) { return '\x1c' + String.fromCharCode((value >> 8) & 0xFF) + String.fromCharCode(value & 0xFF); - } else if (value >= (-2147483648) && value <= 2147483647) { + } else { return '\x1d' + String.fromCharCode((value >> 24) & 0xFF) + String.fromCharCode((value >> 16) & 0xFF) + String.fromCharCode((value >> 8) & 0xFF) + String.fromCharCode(value & 0xFF); } - error('Value: ' + value + ' is not allowed'); - return null; }, getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs, properties) { var charstrings = []; - var missings = []; - for (var i = 0; i < glyphs.length; i++) { - var glyph = glyphs[i]; - var mapping = properties.glyphs[glyph.glyph]; - if (!mapping) { - if (glyph.glyph != '.notdef') - missings.push(glyph.glyph); - } else { - charstrings.push({ - glyph: glyph.glyph, - unicode: mapping.unicode, - charstring: glyph.data, - width: glyph.width, - lsb: glyph.lsb - }); - } + var item = glyphs[i]; + charstrings.push({ + glyph: item.glyph, + unicode: adaptUnicode(i), + gid: i, + charstring: item.data, + width: item.width, + lsb: item.lsb + }); } - if (missings.length) - warn(missings + ' does not have unicode in the glyphs dictionary'); - charstrings.sort(function charstrings_sort(a, b) { return a.unicode - b.unicode; }); @@ -2807,6 +3032,20 @@ var Type2CFF = (function type2CFF() { var encoding = this.parseEncoding(topDict.Encoding, properties, strings, charset); + var charset, encoding; + var isCIDFont = properties.subtype == 'CIDFontType0C'; + if (isCIDFont) { + charset = []; + charset.length = charStrings.length; + encoding = this.parseCidMap(topDict.charset, + charStrings.length); + } else { + charset = this.parseCharsets(topDict.charset, + charStrings.length, strings); + encoding = this.parseEncoding(topDict.Encoding, properties, + strings, charset); + } + // The font sanitizer does not support CFF encoding with a // supplement, since the encoding is not really use to map // between gid to glyph, let's overwrite what is declared in @@ -2863,80 +3102,46 @@ var Type2CFF = (function type2CFF() { getCharStrings: function cff_charstrings(charsets, encoding, privateDict, properties) { - var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; - var firstChar = properties.firstChar; - var glyphMap = {}; + var unicodeUsed = []; + var unassignedUnicodeItems = []; for (var i = 0; i < charsets.length; i++) { var glyph = charsets[i]; + var encodingFound = false; for (var charcode in encoding) { - if (encoding[charcode] == i) - glyphMap[glyph] = charcode | 0; + if (encoding[charcode] == i) { + var code = charcode | 0; + charstrings.push({ + unicode: adaptUnicode(code), + code: code, + gid: i, + glyph: glyph + }); + unicodeUsed[code] = true; + encodingFound = true; + break; + } + } + if (!encodingFound) { + unassignedUnicodeItems.push(i); } } - var differences = properties.differences; - for (var i = 0; i < differences.length; ++i) { - var glyph = differences[i]; - if (!glyph) - continue; - var oldGlyph = charsets[i]; - if (oldGlyph) - delete glyphMap[oldGlyph]; - glyphMap[differences[i]] = i; - } - - var glyphs = properties.glyphs; - for (var i = 1; i < charsets.length; i++) { - var glyph = charsets[i]; - var code = glyphMap[glyph] || 0; - - var mapping = glyphs[code] || glyphs[glyph] || { width: defaultWidth }; - var unicode = mapping.unicode; - - if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) - unicode += kCmapGlyphOffset; - - var width = (mapping.hasOwnProperty('width') && isNum(mapping.width)) ? - mapping.width : defaultWidth; - properties.encoding[code] = { - unicode: unicode, - width: width - }; - + var nextUnusedUnicode = 0x21; + for (var j = 0; j < unassignedUnicodeItems.length; ++j) { + var i = unassignedUnicodeItems[j]; + // giving unicode value anyway + while (unicodeUsed[nextUnusedUnicode]) + nextUnusedUnicode++; + var code = nextUnusedUnicode++; charstrings.push({ - unicode: unicode, - width: width, + unicode: adaptUnicode(code), code: code, - gid: i + gid: i, + glyph: charsets[i] }); } - // sort the array by the unicode value - charstrings.sort(function type2CFFGetCharStringsSort(a, b) { - return a.unicode - b.unicode; - }); - - // remove duplicates -- they might appear during selection: - // properties.glyphs[code] || properties.glyphs[glyph] - var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; - var lastUnicode = charstrings[0].unicode, wasModified = false; - for (var i = 1; i < charstrings.length; ++i) { - if (lastUnicode != charstrings[i].unicode) { - lastUnicode = charstrings[i].unicode; - continue; - } - // duplicate found -- keeping the item that has - // different code and unicode, that one created - // as result of modification of the base encoding - var duplicateIndex = - charstrings[i].unicode == charstrings[i].code ? i : i - 1; - charstrings[duplicateIndex].unicode = nextUnusedUnicode++; - wasModified = true; - } - if (!wasModified) - return charstrings; - // sort the array by the unicode value (again) charstrings.sort(function type2CFFGetCharStringsSort(a, b) { return a.unicode - b.unicode; @@ -2964,8 +3169,8 @@ var Type2CFF = (function type2CFF() { if (pos == 0 || pos == 1) { var gid = 1; - var baseEncoding = pos ? Encodings.ExpertEncoding.slice() : - Encodings.StandardEncoding.slice(); + var baseEncoding = pos ? Encodings.ExpertEncoding : + Encodings.StandardEncoding; for (var i = 0; i < charset.length; i++) { var index = baseEncoding.indexOf(charset[i]); if (index != -1) @@ -2985,8 +3190,8 @@ var Type2CFF = (function type2CFF() { var gid = 1; for (var i = 0; i < rangesCount; i++) { var start = bytes[pos++]; - var count = bytes[pos++]; - for (var j = start; j <= start + count; j++) + var left = bytes[pos++]; + for (var j = start; j <= start + left; j++) encoding[j] = gid++; } break; @@ -3047,6 +3252,46 @@ var Type2CFF = (function type2CFF() { } return charset; }, + + parseCidMap: function cff_parsecharsets(pos, length) { + var bytes = this.bytes; + var format = bytes[pos++]; + + var encoding = {}; + var map = {encoding: encoding}; + + encoding[0] = 0; + + var gid = 1; + switch (format) { + case 0: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + encoding[cid] = gid++; + } + break; + case 1: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + var count = bytes[pos++]; + for (var i = 0; i <= count; i++) + encoding[cid++] = gid++; + } + break; + case 2: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + var count = (bytes[pos++] << 8) | bytes[pos++]; + for (var i = 0; i <= count; i++) + encoding[cid++] = gid++; + } + break; + default: + error('Unknown charset format'); + } + return map; + }, + getPrivDict: function cff_getprivdict(baseDict, strings) { var dict = {}; @@ -3108,6 +3353,17 @@ var Type2CFF = (function type2CFF() { case 18: dict['Private'] = value; break; + case 3102: + case 3103: + case 3104: + case 3105: + case 3106: + case 3107: + case 3108: + case 3109: + case 3110: + dict['cidOperatorPresent'] = true; + break; default: TODO('interpret top dict key'); } @@ -3220,6 +3476,15 @@ var Type2CFF = (function type2CFF() { } var b = (b << 8) | op; } + if (!operands.length && b == 8 && + dict[pos + 1] == 9) { + // no operands for FamilyBlues, removing the key + // and next one is FamilyOtherBlues - skipping them + // also replacing FamilyBlues to pass sanitizer + dict[pos] = 139; + pos += 2; + continue; + } entries.push([b, operands]); operands = []; ++pos; From 38372ee2f215191c88c1fc538835942e59be8c0c Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 29 Oct 2011 08:45:29 -0500 Subject: [PATCH 02/13] TrueType encoding for files without font data --- src/fonts.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index 202481449..6202b3279 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -766,6 +766,7 @@ var Font = (function Font() { this.defaultWidth = properties.defaultWidth; this.composite = properties.composite; this.toUnicode = properties.toUnicode; + this.hasEncoding = properties.hasEncoding; this.fontMatrix = properties.fontMatrix; if (properties.type == 'Type3') @@ -827,7 +828,6 @@ var Font = (function Font() { this.fontMatrix = properties.fontMatrix; this.encoding = properties.baseEncoding; this.hasShortCmap = properties.hasShortCmap; - this.hasEncoding = properties.hasEncoding; this.loadedName = getUniqueName(); this.loading = true; }; @@ -2006,10 +2006,14 @@ var Font = (function Font() { var glyphName = this.differences[charcode] || this.encoding[charcode]; if (!glyphName) glyphName = Encodings.StandardEncoding[charcode]; + if (!isNum(width)) + width = this.widths[glyphName]; + if (this.noUnicodeAdaptation) { + unicode = GlyphsUnicode[glyphName] || charcode; + break; + } if (!this.hasEncoding) { - width = this.widths[charcode] || this.widths[glyphName]; - unicode = this.noUnicodeAdaptation ? - charcode : adaptUnicode(charcode); + unicode = adaptUnicode(charcode); break; } if (this.hasShortCmap) { From 343e174951ce6745d4761627882dcfd4e658111e Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 29 Oct 2011 09:29:19 -0500 Subject: [PATCH 03/13] Change cmap encoding for Type 1 --- src/fonts.js | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index 6202b3279..5d70bf5e4 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -729,6 +729,12 @@ function isAdaptedUnicode(unicode) { unicode < kCmapGlyphOffset + kSizeOfGlyphArea; } +function isSpecialUnicode(unicode) { + return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) || + unicode >= kCmapGlyphOffset && + unicode < kCmapGlyphOffset + kSizeOfGlyphArea; +} + /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -2744,11 +2750,28 @@ CFF.prototype = { getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs, properties) { var charstrings = []; - for (var i = 0; i < glyphs.length; i++) { + var reverseMapping = {}; + var encoding = properties.baseEncoding; + var differences = properties.differences; + var i, length; + for (i = 0, length = encoding.length; i < length; ++i) { + if (encoding[i] && !isSpecialUnicode(i)) + reverseMapping[encoding[i]] = i; + } + for (i = 0, length = differences.length; i < length; ++i) { + if (differences[i] && !isSpecialUnicode(i)) + reverseMapping[differences[i]] = i; + } + reverseMapping['.notdef'] = 0; + var unusedUnicode = kCmapGlyphOffset; + for (i = 0, length = glyphs.length; i < length; i++) { var item = glyphs[i]; + var glyphName = item.glyph; + var unicode = glyphName in reverseMapping ? + reverseMapping[glyphName] : unusedUnicode++; charstrings.push({ - glyph: item.glyph, - unicode: adaptUnicode(i), + glyph: glyphName, + unicode: unicode, gid: i, charstring: item.data, width: item.width, From a1d2c1c6d34ea6a7f4f66a228429e0941335b97e Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 29 Oct 2011 14:19:13 -0500 Subject: [PATCH 04/13] Fix Type1 width; TrueType special characters --- src/fonts.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index 5d70bf5e4..a70d9ff4f 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1996,7 +1996,8 @@ var Font = (function Font() { case 'Type1': var glyphName = this.differences[charcode] || this.encoding[charcode]; if (this.noUnicodeAdaptation) { - width = this.widths[glyphName]; + if (!isNum(width)) + width = this.widths[glyphName]; unicode = GlyphsUnicode[glyphName] || charcode; break; } @@ -2024,7 +2025,7 @@ var Font = (function Font() { } if (this.hasShortCmap) { var j = Encodings.MacRomanEncoding.indexOf(glyphName); - unicode = j >= 0 ? adaptUnicode(j) : + unicode = j >= 0 && !isSpecialUnicode(j) ? j : this.glyphNameMap[glyphName]; } else { unicode = glyphName in GlyphsUnicode ? From aaad824f772c529ee54757661925581a72c5af42 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 29 Oct 2011 21:55:43 -0500 Subject: [PATCH 05/13] Fixing duplicates in cmap --- src/fonts.js | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index a70d9ff4f..62650b10f 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1432,7 +1432,7 @@ var Font = (function Font() { var glyphCode = offsetIndex < 0 ? j : offsets[offsetIndex + j - start]; glyphCode = (glyphCode + delta) & 0xFFFF; - if (glyphCode == 0) + if (glyphCode == 0 || isAdaptedUnicode(j)) continue; var unicode = adaptUnicode(j); @@ -1457,6 +1457,9 @@ var Font = (function Font() { for (var j = 0; j < entryCount; j++) { var glyphCode = int16(font.getBytes(2)); var code = firstCode + j; + if (isAdaptedUnicode(glyphCode)) + continue; + var unicode = adaptUnicode(code); glyphs.push({ unicode: unicode, code: code }); ids.push(glyphCode); @@ -2754,14 +2757,23 @@ CFF.prototype = { var reverseMapping = {}; var encoding = properties.baseEncoding; var differences = properties.differences; - var i, length; + var usedIn = []; + var i, length, glyphName; for (i = 0, length = encoding.length; i < length; ++i) { - if (encoding[i] && !isSpecialUnicode(i)) - reverseMapping[encoding[i]] = i; + glyphName = encoding[i]; + if (!glyphName || isSpecialUnicode(i)) + continue; + reverseMapping[glyphName] = i; + usedIn[i] = glyphName; } for (i = 0, length = differences.length; i < length; ++i) { - if (differences[i] && !isSpecialUnicode(i)) - reverseMapping[differences[i]] = i; + glyphName = differences[i]; + if (!glyphName || isSpecialUnicode(i)) + continue; + if (usedIn[i]) + delete reverseMapping[usedIn[i]]; + reverseMapping[glyphName] = i; + usedIn[i] = glyphName; } reverseMapping['.notdef'] = 0; var unusedUnicode = kCmapGlyphOffset; From 5d347b0ddbbc4c7731798402bade6a65ef64f929 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sun, 30 Oct 2011 23:32:06 -0500 Subject: [PATCH 06/13] removing differences from the cmap --- src/fonts.js | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index 62650b10f..c3fcb3c8a 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -2756,24 +2756,12 @@ CFF.prototype = { var charstrings = []; var reverseMapping = {}; var encoding = properties.baseEncoding; - var differences = properties.differences; - var usedIn = []; var i, length, glyphName; for (i = 0, length = encoding.length; i < length; ++i) { glyphName = encoding[i]; if (!glyphName || isSpecialUnicode(i)) continue; reverseMapping[glyphName] = i; - usedIn[i] = glyphName; - } - for (i = 0, length = differences.length; i < length; ++i) { - glyphName = differences[i]; - if (!glyphName || isSpecialUnicode(i)) - continue; - if (usedIn[i]) - delete reverseMapping[usedIn[i]]; - reverseMapping[glyphName] = i; - usedIn[i] = glyphName; } reverseMapping['.notdef'] = 0; var unusedUnicode = kCmapGlyphOffset; From 70767b45227ce28b73cd870bc53451781901fad9 Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 8 Nov 2011 09:16:23 -0500 Subject: [PATCH 07/13] README typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index df6e1c6d7..deb925601 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ using the pdf.js API. ### Extension -A up-to-date Firefox extension is also available: +An up-to-date Firefox extension is also available: + http://mozilla.github.com/pdf.js/extensions/firefox/pdf.js.xpi From f2d54d3207dac690396d278fcf651daa1cd4defb Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Tue, 8 Nov 2011 20:02:28 -0500 Subject: [PATCH 08/13] Replacing URL flag format --- extensions/firefox/components/pdfContentHandler.js | 2 +- web/viewer.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/firefox/components/pdfContentHandler.js b/extensions/firefox/components/pdfContentHandler.js index 444db1c1f..67459b759 100644 --- a/extensions/firefox/components/pdfContentHandler.js +++ b/extensions/firefox/components/pdfContentHandler.js @@ -52,7 +52,7 @@ pdfContentHandler.prototype = { } let targetUrl = aRequest.URI.spec; - if (targetUrl.indexOf('?pdfjs.action=download') >= 0) + if (targetUrl.indexOf('#pdfjs.action=download') >= 0) throw NS_ERROR_WONT_HANDLE_CONTENT; aRequest.cancel(Cr.NS_BINDING_ABORTED); diff --git a/web/viewer.js b/web/viewer.js index 1ab2c555c..bfb3a4303 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -139,7 +139,7 @@ var PDFView = { }, download: function pdfViewDownload() { - window.open(this.url + '?pdfjs.action=download', '_parent'); + window.open(this.url + '#pdfjs.action=download', '_parent'); }, navigateTo: function pdfViewNavigateTo(dest) { From 99254891e6b7a08aeb76748af83ab50762a043c7 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Thu, 10 Nov 2011 18:33:45 +0100 Subject: [PATCH 09/13] Disable work on firefox using feature detection --- src/core.js | 18 +++++++++++------- test/driver.js | 5 +++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/core.js b/src/core.js index 5e97763ab..43c059b56 100644 --- a/src/core.js +++ b/src/core.js @@ -15,10 +15,6 @@ if (!globalScope.PDFJS) { globalScope.PDFJS = {}; } -// Temporarily disabling workers until 'localhost' FF bugfix lands: -// https://bugzilla.mozilla.org/show_bug.cgi?id=683280 -globalScope.PDFJS.disableWorker = true; - // getPdf() // Convenience function to perform binary Ajax GET // Usage: getPdf('http://...', callback) @@ -471,6 +467,7 @@ var PDFDoc = (function pdfDoc() { this.objs = new PDFObjects(); this.pageCache = []; + this.fontsLoading = {}; this.workerReadyPromise = new Promise('workerReady'); // If worker support isn't disabled explicit and the browser has worker @@ -484,7 +481,16 @@ var PDFDoc = (function pdfDoc() { throw 'No PDFJS.workerSrc specified'; } - var worker = new Worker(workerSrc); + var worker + try { + worker = new Worker(workerSrc); + } catch (e) { + // Some versions of FF can't create a worker on localhost, see: + // https://bugzilla.mozilla.org/show_bug.cgi?id=683280 + globalScope.PDFJS.disableWorker = true; + this.setupFakeWorker(); + return; + } var messageHandler = new MessageHandler('main', worker); @@ -505,8 +511,6 @@ var PDFDoc = (function pdfDoc() { } else { this.setupFakeWorker(); } - - this.fontsLoading = {}; } constructor.prototype = { diff --git a/test/driver.js b/test/driver.js index 16375c30b..e84b7c8e0 100644 --- a/test/driver.js +++ b/test/driver.js @@ -7,6 +7,11 @@ 'use strict'; +// Disable worker support for running test as +// https://github.com/mozilla/pdf.js/pull/764#issuecomment-2638944 +// "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1." +PDFJS.disableWorker = true; + var appPath, browser, canvas, currentTaskIdx, manifest, stdout; var inFlightRequests = 0; From 2988b33dcc6e924d9eca83c0ffec56dbb3a2b0e1 Mon Sep 17 00:00:00 2001 From: Kalervo Kujala Date: Thu, 10 Nov 2011 21:09:05 +0200 Subject: [PATCH 10/13] Fix jslint warnings in pattern.js. Also refactor few small issues. --- src/pattern.js | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/pattern.js b/src/pattern.js index 2a31fec4a..72d13d896 100644 --- a/src/pattern.js +++ b/src/pattern.js @@ -19,10 +19,10 @@ var Pattern = (function patternPattern() { constructor.shadingFromIR = function pattern_shadingFromIR(ctx, raw) { return Shadings[raw[0]].fromIR(ctx, raw); - } + }; - constructor.parseShading = function pattern_shading(shading, matrix, - xref, res, ctx) { + constructor.parseShading = function pattern_shading(shading, matrix, xref, + res, ctx) { var dict = isStream(shading) ? shading.dict : shading; var type = dict.get('ShadingType'); @@ -116,17 +116,18 @@ Shadings.RadialAxial = (function radialAxialShading() { p1 = Util.applyTransform(p1, userMatrix); } + var grad; if (type == 2) - var grad = ctx.createLinearGradient(p0[0], p0[1], p1[0], p1[1]); + grad = ctx.createLinearGradient(p0[0], p0[1], p1[0], p1[1]); else if (type == 3) - var grad = ctx.createRadialGradient(p0[0], p0[1], r0, p1[0], p1[1], r1); + grad = ctx.createRadialGradient(p0[0], p0[1], r0, p1[0], p1[1], r1); for (var i = 0, ii = colorStops.length; i < ii; ++i) { var c = colorStops[i]; grad.addColorStop(c[0], c[1]); } return grad; - } + }; constructor.prototype = { getIR: function radialAxialShadingGetIR() { @@ -166,7 +167,7 @@ Shadings.Dummy = (function dummyShading() { constructor.fromIR = function dummyShadingFromIR() { return 'hotpink'; - } + }; constructor.prototype = { getIR: function dummyShadingGetIR() { @@ -242,9 +243,9 @@ var TilingPattern = (function tilingPattern() { graphics.transform.apply(graphics, tmpTranslate); if (bbox && isArray(bbox) && 4 == bbox.length) { - var bboxWidth = bbox[2] - bbox[0]; - var bboxHeight = bbox[3] - bbox[1]; - graphics.rectangle(bbox[0], bbox[1], bboxWidth, bboxHeight); + var bboxWidth = x1 - x0; + var bboxHeight = y1 - y0; + graphics.rectangle(x0, y0, bboxWidth, bboxHeight); graphics.clip(); graphics.endPath(); } @@ -264,7 +265,7 @@ var TilingPattern = (function tilingPattern() { return [ 'TilingPattern', args, codeIR, matrix, bbox, xstep, ystep, paintType ]; - } + }; TilingPattern.prototype = { getPattern: function tiling_getPattern() { From 37b4ceb098f0b832d7556989ed55e220a6498827 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 12 Nov 2011 11:10:49 -0600 Subject: [PATCH 11/13] fix lint error --- src/core.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.js b/src/core.js index 43c059b56..c5f88b280 100644 --- a/src/core.js +++ b/src/core.js @@ -481,7 +481,7 @@ var PDFDoc = (function pdfDoc() { throw 'No PDFJS.workerSrc specified'; } - var worker + var worker; try { worker = new Worker(workerSrc); } catch (e) { From effddf39eafb2022b12bef5f46d32b21f8f4ea2a Mon Sep 17 00:00:00 2001 From: Vivien Nicolas <21@vingtetun.org> Date: Mon, 14 Nov 2011 11:58:21 +0100 Subject: [PATCH 12/13] Bump up firefox version in install.rdf --- extensions/firefox/install.rdf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/firefox/install.rdf b/extensions/firefox/install.rdf index 26b2192b6..952d55fbf 100644 --- a/extensions/firefox/install.rdf +++ b/extensions/firefox/install.rdf @@ -12,7 +12,7 @@ {ec8030f7-c20a-464f-9b0e-13a3a9e97384} 6.0 - 10.0.* + 11.0.* true From a182431ac584fe1c59d46ef15dfd55561ac391f9 Mon Sep 17 00:00:00 2001 From: Artur Adib Date: Mon, 14 Nov 2011 13:59:11 -0500 Subject: [PATCH 13/13] Fixing lint --- src/core.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.js b/src/core.js index 43c059b56..c5f88b280 100644 --- a/src/core.js +++ b/src/core.js @@ -481,7 +481,7 @@ var PDFDoc = (function pdfDoc() { throw 'No PDFJS.workerSrc specified'; } - var worker + var worker; try { worker = new Worker(workerSrc); } catch (e) {