diff --git a/Makefile b/Makefile index 3e385b175..2cc886091 100644 --- a/Makefile +++ b/Makefile @@ -63,6 +63,7 @@ bundle: | $(BUILD_DIR) @cd src; \ cat $(PDF_JS_FILES) > all_files.tmp; \ sed '/PDFJSSCRIPT_INCLUDE_ALL/ r all_files.tmp' pdf.js > ../$(BUILD_TARGET); \ + sed -i '' "s/PDFJSSCRIPT_BUNDLE_VER/`git log --format="%H" -n 1`/" ../$(BUILD_TARGET); \ rm -f *.tmp; \ cd .. diff --git a/README.md b/README.md index deb925601..97db68d36 100644 --- a/README.md +++ b/README.md @@ -95,9 +95,17 @@ workings of PDF and pdf.js: ## Contributing pdf.js is a community-driven project, so contributors are always welcome. -Simply fork our repo and contribute away. A great place to start is our -[open issues](https://github.com/mozilla/pdf.js/issues). For better consistency and -long-term stability, please do look around the code and try to follow our conventions. +Simply fork our repo and contribute away. Good starting places for picking +a bug are the top error messages and TODOs in our corpus report: + ++ http://people.mozilla.com/~bdahl/corpusreport/test/ref/ + +and of course our open Github issues: + ++ https://github.com/mozilla/pdf.js/issues + +For better consistency and long-term stability, please do look around the +code and try to follow our conventions. More information about the contributor process can be found on the [contributor wiki page](https://github.com/mozilla/pdf.js/wiki/Contributing). @@ -152,9 +160,9 @@ See the bot repo for details: ## Additional resources -Our demo site is here: +Gallery of user projects and modifications: -+ http://mozilla.github.com/pdf.js/web/viewer.html ++ https://github.com/mozilla/pdf.js/wiki/Gallery-of-user-projects-and-modifications You can read more about pdf.js here: diff --git a/extensions/firefox/install.rdf b/extensions/firefox/install.rdf index 26b2192b6..952d55fbf 100644 --- a/extensions/firefox/install.rdf +++ b/extensions/firefox/install.rdf @@ -12,7 +12,7 @@ {ec8030f7-c20a-464f-9b0e-13a3a9e97384} 6.0 - 10.0.* + 11.0.* true diff --git a/src/canvas.js b/src/canvas.js index 44e73c6af..9b3ed0ba9 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -445,7 +445,7 @@ var CanvasGraphics = (function canvasGraphics() { this.save(); ctx.scale(fontSize, fontSize); ctx.transform.apply(ctx, fontMatrix); - this.executeIRQueue(glyph.IRQueue); + this.executeIRQueue(glyph.codeIRQueue); this.restore(); var transformed = Util.applyTransform([glyph.width, 0], fontMatrix); @@ -546,7 +546,9 @@ var CanvasGraphics = (function canvasGraphics() { setStrokeColor: function canvasGraphicsSetStrokeColor(/*...*/) { var cs = this.current.strokeColorSpace; var color = cs.getRgb(arguments); - this.setStrokeRGBColor.apply(this, color); + var color = Util.makeCssRgb.apply(null, cs.getRgb(arguments)); + this.ctx.strokeStyle = color; + this.current.strokeColor = color; }, getColorN_IR_Pattern: function canvasGraphicsGetColorN_IR_Pattern(IR, cs) { if (IR[0] == 'TilingPattern') { @@ -581,8 +583,9 @@ var CanvasGraphics = (function canvasGraphics() { }, setFillColor: function canvasGraphicsSetFillColor(/*...*/) { var cs = this.current.fillColorSpace; - var color = cs.getRgb(arguments); - this.setFillRGBColor.apply(this, color); + var color = Util.makeCssRgb.apply(null, cs.getRgb(arguments)); + this.ctx.fillStyle = color; + this.current.fillColor = color; }, setFillColorN_IR: function canvasGraphicsSetFillColorN(/*...*/) { var cs = this.current.fillColorSpace; @@ -594,27 +597,49 @@ var CanvasGraphics = (function canvasGraphics() { } }, setStrokeGray: function canvasGraphicsSetStrokeGray(gray) { - this.setStrokeRGBColor(gray, gray, gray); + if (!(this.current.strokeColorSpace instanceof DeviceGrayCS)) + this.current.strokeColorSpace = new DeviceGrayCS(); + + var color = Util.makeCssRgb(gray, gray, gray); + this.ctx.strokeStyle = color; + this.current.strokeColor = color; }, setFillGray: function canvasGraphicsSetFillGray(gray) { - this.setFillRGBColor(gray, gray, gray); + if (!(this.current.fillColorSpace instanceof DeviceGrayCS)) + this.current.fillColorSpace = new DeviceGrayCS(); + + var color = Util.makeCssRgb(gray, gray, gray); + this.ctx.fillStyle = color; + this.current.fillColor = color; }, setStrokeRGBColor: function canvasGraphicsSetStrokeRGBColor(r, g, b) { + if (!(this.current.strokeColorSpace instanceof DeviceRgbCS)) + this.current.strokeColorSpace = new DeviceRgbCS(); + var color = Util.makeCssRgb(r, g, b); this.ctx.strokeStyle = color; this.current.strokeColor = color; }, setFillRGBColor: function canvasGraphicsSetFillRGBColor(r, g, b) { + if (!(this.current.fillColorSpace instanceof DeviceRgbCS)) + this.current.fillColorSpace = new DeviceRgbCS(); + var color = Util.makeCssRgb(r, g, b); this.ctx.fillStyle = color; this.current.fillColor = color; }, setStrokeCMYKColor: function canvasGraphicsSetStrokeCMYKColor(c, m, y, k) { + if (!(this.current.strokeColorSpace instanceof DeviceCmykCS)) + this.current.strokeColorSpace = new DeviceCmykCS(); + var color = Util.makeCssCmyk(c, m, y, k); this.ctx.strokeStyle = color; this.current.strokeColor = color; }, setFillCMYKColor: function canvasGraphicsSetFillCMYKColor(c, m, y, k) { + if (!(this.current.fillColorSpace instanceof DeviceCmykCS)) + this.current.fillColorSpace = new DeviceCmykCS(); + var color = Util.makeCssCmyk(c, m, y, k); this.ctx.fillStyle = color; this.current.fillColor = color; diff --git a/src/evaluator.js b/src/evaluator.js index 064288c6f..1cb8fe39f 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -459,18 +459,183 @@ var PartialEvaluator = (function partialEvaluator() { }; }, - extractEncoding: function partialEvaluatorExtractEncoding(dict, - xref, - properties) { - var type = properties.type, encoding; - if (properties.composite) { - var defaultWidth = xref.fetchIfRef(dict.get('DW')) || 1000; - properties.defaultWidth = defaultWidth; + extractDataStructures: function + partialEvaluatorExtractDataStructures(dict, baseDict, + xref, properties) { + // 9.10.2 + var toUnicode = dict.get('ToUnicode') || + baseDict.get('ToUnicode'); + if (toUnicode) + properties.toUnicode = this.readToUnicode(toUnicode, xref); + + if (properties.composite) { + // CIDSystemInfo helps to match CID to glyphs + var cidSystemInfo = xref.fetchIfRef(dict.get('CIDSystemInfo')); + if (isDict(cidSystemInfo)) { + properties.cidSystemInfo = { + registry: cidSystemInfo.get('Registry'), + ordering: cidSystemInfo.get('Ordering'), + supplement: cidSystemInfo.get('Supplement') + }; + } + + var cidToGidMap = xref.fetchIfRef(dict.get('CIDToGIDMap')); + if (isStream(cidToGidMap)) + properties.cidToGidMap = this.readCidToGidMap(cidToGidMap); + } + + var differences = []; + var baseEncoding = Encodings.StandardEncoding; + var hasEncoding = dict.has('Encoding'); + if (hasEncoding) { + var encoding = xref.fetchIfRef(dict.get('Encoding')); + if (isDict(encoding)) { + var baseName = encoding.get('BaseEncoding'); + if (baseName) + baseEncoding = Encodings[baseName.name]; + + // Load the differences between the base and original + if (encoding.has('Differences')) { + var diffEncoding = encoding.get('Differences'); + var index = 0; + for (var j = 0, jj = diffEncoding.length; j < jj; j++) { + var data = diffEncoding[j]; + if (isNum(data)) + index = data; + else + differences[index++] = data.name; + } + } + } else if (isName(encoding)) { + baseEncoding = Encodings[encoding.name]; + } else { + error('Encoding is not a Name nor a Dict'); + } + } + properties.differences = differences; + properties.baseEncoding = baseEncoding; + properties.hasEncoding = hasEncoding; + }, + + readToUnicode: + function partialEvaluatorReadToUnicode(toUnicode, xref) { + var cmapObj = xref.fetchIfRef(toUnicode); + var charToUnicode = []; + if (isName(cmapObj)) { + var isIdentityMap = cmapObj.name.substr(0, 9) == 'Identity-'; + if (!isIdentityMap) + error('ToUnicode file cmap translation not implemented'); + } else if (isStream(cmapObj)) { + var tokens = []; + var token = ''; + var beginArrayToken = {}; + + var cmap = cmapObj.getBytes(cmapObj.length); + for (var i = 0, ii = cmap.length; i < ii; i++) { + var byte = cmap[i]; + if (byte == 0x20 || byte == 0x0D || byte == 0x0A || + byte == 0x3C || byte == 0x5B || byte == 0x5D) { + switch (token) { + case 'usecmap': + error('usecmap is not implemented'); + break; + + case 'beginbfchar': + case 'beginbfrange': + case 'begincidchar': + case 'begincidrange': + token = ''; + tokens = []; + break; + + case 'endcidrange': + case 'endbfrange': + for (var j = 0, jj = tokens.length; j < jj; j += 3) { + var startRange = tokens[j]; + var endRange = tokens[j + 1]; + var code = tokens[j + 2]; + while (startRange <= endRange) { + charToUnicode[startRange] = code++; + ++startRange; + } + } + break; + + case 'endcidchar': + case 'endbfchar': + for (var j = 0, jj = tokens.length; j < jj; j += 2) { + var index = tokens[j]; + var code = tokens[j + 1]; + charToUnicode[index] = code; + } + break; + + case '': + break; + + default: + if (token[0] >= '0' && token[0] <= '9') + token = parseInt(token, 10); // a number + tokens.push(token); + token = ''; + } + switch (byte) { + case 0x5B: + // begin list parsing + tokens.push(beginArrayToken); + break; + case 0x5D: + // collect array items + var items = [], item; + while (tokens.length && + (item = tokens.pop()) != beginArrayToken) + items.unshift(item); + tokens.push(items); + break; + } + } else if (byte == 0x3E) { + if (token.length) { + // parsing hex number + tokens.push(parseInt(token, 16)); + token = ''; + } + } else { + token += String.fromCharCode(byte); + } + } + } + return charToUnicode; + }, + readCidToGidMap: + function partialEvaluatorReadCidToGidMap(cidToGidStream) { + // Extract the encoding from the CIDToGIDMap + var glyphsData = cidToGidStream.getBytes(); + + // Set encoding 0 to later verify the font has an encoding + var result = []; + for (var j = 0, jj = glyphsData.length; j < jj; j++) { + var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; + if (glyphID == 0) + continue; + + var code = j >> 1; + result[code] = glyphID; + } + return result; + }, + + extractWidths: function partialEvaluatorWidths(dict, + xref, + descriptor, + properties) { + var glyphsWidths = []; + var defaultWidth = 0; + if (properties.composite) { + defaultWidth = xref.fetchIfRef(dict.get('DW')) || 1000; - var glyphsWidths = {}; var widths = xref.fetchIfRef(dict.get('W')); if (widths) { - var start = 0; + var start = 0, end = 0; for (var i = 0, ii = widths.length; i < ii; i++) { var code = widths[i]; if (isArray(code)) { @@ -487,247 +652,42 @@ var PartialEvaluator = (function partialEvaluator() { } } } - properties.widths = glyphsWidths; - - // Glyph ids are big-endian 2-byte values - encoding = properties.encoding; - - // CIDSystemInfo might help to match width and glyphs - var cidSystemInfo = dict.get('CIDSystemInfo'); - if (isDict(cidSystemInfo)) { - properties.cidSystemInfo = { - registry: cidSystemInfo.get('Registry'), - ordering: cidSystemInfo.get('Ordering'), - supplement: cidSystemInfo.get('Supplement') - }; - } - - var cidToGidMap = dict.get('CIDToGIDMap'); - if (!cidToGidMap || !isRef(cidToGidMap)) { - - - return Object.create(GlyphsUnicode); - } - - // Extract the encoding from the CIDToGIDMap - var glyphsStream = xref.fetchIfRef(cidToGidMap); - var glyphsData = glyphsStream.getBytes(0); - - // Set encoding 0 to later verify the font has an encoding - encoding[0] = { unicode: 0, width: 0 }; - for (var j = 0, jj = glyphsData.length; j < jj; j++) { - var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; - if (glyphID == 0) - continue; - - var code = j >> 1; - var width = glyphsWidths[code]; - encoding[code] = { - unicode: glyphID, - width: isNum(width) ? width : defaultWidth - }; - } - - return Object.create(GlyphsUnicode); - } - - var differences = properties.differences; - var map = properties.encoding; - var baseEncoding = null; - if (dict.has('Encoding')) { - encoding = xref.fetchIfRef(dict.get('Encoding')); - if (isDict(encoding)) { - var baseName = encoding.get('BaseEncoding'); - if (baseName) - baseEncoding = Encodings[baseName.name].slice(); - - // Load the differences between the base and original - if (encoding.has('Differences')) { - var diffEncoding = encoding.get('Differences'); - var index = 0; - for (var j = 0, jj = diffEncoding.length; j < jj; j++) { - var data = diffEncoding[j]; - if (isNum(data)) - index = data; - else - differences[index++] = data.name; - } - } - } else if (isName(encoding)) { - baseEncoding = Encodings[encoding.name].slice(); + } else { + var firstChar = properties.firstChar; + var widths = xref.fetchIfRef(dict.get('Widths')); + if (widths) { + var j = firstChar; + for (var i = 0, ii = widths.length; i < ii; i++) + glyphsWidths[j++] = widths[i]; + defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; } else { - error('Encoding is not a Name nor a Dict'); - } - } + // Trying get the BaseFont metrics (see comment above). + var baseFontName = dict.get('BaseFont'); + if (isName(baseFontName)) { + var metrics = this.getBaseFontMetrics(baseFontName.name); - if (!baseEncoding) { - switch (type) { - case 'TrueType': - baseEncoding = Encodings.WinAnsiEncoding.slice(); - break; - case 'Type1': - case 'Type3': - baseEncoding = Encodings.StandardEncoding.slice(); - break; - default: - warn('Unknown type of font: ' + type); - baseEncoding = []; - break; - } - } - - // merge in the differences - var firstChar = properties.firstChar; - var lastChar = properties.lastChar; - var widths = properties.widths || []; - var glyphs = {}; - for (var i = firstChar; i <= lastChar; i++) { - var glyph = differences[i]; - var replaceGlyph = true; - if (!glyph) { - glyph = baseEncoding[i] || i; - replaceGlyph = false; - } - var index = GlyphsUnicode[glyph] || i; - var width = widths[i] || widths[glyph]; - map[i] = { - unicode: index, - width: isNum(width) ? width : properties.defaultWidth - }; - - if (replaceGlyph || !glyphs[glyph]) - glyphs[glyph] = map[i]; - if (replaceGlyph || !glyphs[index]) - glyphs[index] = map[i]; - - // If there is no file, the character mapping can't be modified - // but this is unlikely that there is any standard encoding with - // chars below 0x1f, so that's fine. - if (!properties.file) - continue; - - if (index <= 0x1f || (index >= 127 && index <= 255)) - map[i].unicode += kCmapGlyphOffset; - } - - if (type == 'TrueType' && dict.has('ToUnicode') && differences) { - var cmapObj = dict.get('ToUnicode'); - if (isRef(cmapObj)) { - cmapObj = xref.fetch(cmapObj); - } - if (isName(cmapObj)) { - error('ToUnicode file cmap translation not implemented'); - } else if (isStream(cmapObj)) { - var tokens = []; - var token = ''; - var beginArrayToken = {}; - - var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0, ii = cmap.length; i < ii; i++) { - var byte = cmap[i]; - if (byte == 0x20 || byte == 0x0D || byte == 0x0A || - byte == 0x3C || byte == 0x5B || byte == 0x5D) { - switch (token) { - case 'usecmap': - error('usecmap is not implemented'); - break; - - case 'beginbfchar': - case 'beginbfrange': - case 'begincidchar': - case 'begincidrange': - token = ''; - tokens = []; - break; - - case 'endcidrange': - case 'endbfrange': - for (var j = 0, jj = tokens.length; j < jj; j += 3) { - var startRange = tokens[j]; - var endRange = tokens[j + 1]; - var code = tokens[j + 2]; - while (startRange < endRange) { - var mapping = map[startRange] || {}; - mapping.unicode = code++; - map[startRange] = mapping; - ++startRange; - } - } - break; - - case 'endcidchar': - case 'endbfchar': - for (var j = 0, jj = tokens.length; j < jj; j += 2) { - var index = tokens[j]; - var code = tokens[j + 1]; - var mapping = map[index] || {}; - mapping.unicode = code; - map[index] = mapping; - } - break; - - case '': - break; - - default: - if (token[0] >= '0' && token[0] <= '9') - token = parseInt(token, 10); // a number - tokens.push(token); - token = ''; - } - switch (byte) { - case 0x5B: - // begin list parsing - tokens.push(beginArrayToken); - break; - case 0x5D: - // collect array items - var items = [], item; - while (tokens.length && - (item = tokens.pop()) != beginArrayToken) - items.unshift(item); - tokens.push(items); - break; - } - } else if (byte == 0x3E) { - if (token.length) { - // parsing hex number - tokens.push(parseInt(token, 16)); - token = ''; - } - } else { - token += String.fromCharCode(byte); - } + glyphsWidths = metrics.widths; + defaultWidth = metrics.defaultWidth; } } } - return glyphs; + + properties.defaultWidth = defaultWidth; + properties.widths = glyphsWidths; }, - getBaseFontMetricsAndMap: function getBaseFontMetricsAndMap(name) { - var map = {}; - if (/^Symbol(-?(Bold|Italic))*$/.test(name)) { - // special case for symbols - var encoding = Encodings.symbolsEncoding.slice(); - for (var i = 0, n = encoding.length, j; i < n; i++) { - j = encoding[i]; - if (!j) - continue; - map[i] = GlyphsUnicode[j] || 0; - } - } - - var defaultWidth = 0; - var widths = Metrics[stdFontMap[name] || name]; - if (isNum(widths)) { - defaultWidth = widths; - widths = null; + getBaseFontMetrics: function getBaseFontMetrics(name) { + var defaultWidth = 0, widths = []; + var glyphWidths = Metrics[stdFontMap[name] || name]; + if (isNum(glyphWidths)) { + defaultWidth = glyphWidths; + } else { + widths = glyphWidths; } return { defaultWidth: defaultWidth, - widths: widths || [], - map: map + widths: widths }; }, @@ -756,6 +716,7 @@ var PartialEvaluator = (function partialEvaluator() { assertWellFormed(isName(type), 'invalid font Subtype'); composite = true; } + var maxCharIndex = composite ? 0xFFFF : 0xFF; var descriptor = xref.fetchIfRef(dict.get('FontDescriptor')); if (!descriptor) { @@ -774,18 +735,16 @@ var PartialEvaluator = (function partialEvaluator() { // Using base font name as a font name. baseFontName = baseFontName.name.replace(/[,_]/g, '-'); - var metricsAndMap = this.getBaseFontMetricsAndMap(baseFontName); + var metrics = this.getBaseFontMetrics(baseFontName); var properties = { type: type.name, - encoding: metricsAndMap.map, - differences: [], - widths: metricsAndMap.widths, - defaultWidth: metricsAndMap.defaultWidth, + widths: metrics.widths, + defaultWidth: metrics.defaultWidth, firstChar: 0, - lastChar: 256 + lastChar: maxCharIndex }; - this.extractEncoding(dict, xref, properties); + this.extractDataStructures(dict, dict, xref, properties); return { name: baseFontName, @@ -802,27 +761,7 @@ var PartialEvaluator = (function partialEvaluator() { // TODO Fill the width array depending on which of the base font this is // a variant. var firstChar = xref.fetchIfRef(dict.get('FirstChar')) || 0; - var lastChar = xref.fetchIfRef(dict.get('LastChar')) || 256; - var defaultWidth = 0; - var glyphWidths = {}; - var encoding = {}; - var widths = xref.fetchIfRef(dict.get('Widths')); - if (widths) { - for (var i = 0, j = firstChar, ii = widths.length; i < ii; i++, j++) - glyphWidths[j] = widths[i]; - defaultWidth = parseFloat(descriptor.get('MissingWidth')) || 0; - } else { - // Trying get the BaseFont metrics (see comment above). - var baseFontName = dict.get('BaseFont'); - if (isName(baseFontName)) { - var metricsAndMap = this.getBaseFontMetricsAndMap(baseFontName.name); - - glyphWidths = metricsAndMap.widths; - defaultWidth = metricsAndMap.defaultWidth; - encoding = metricsAndMap.map; - } - } - + var lastChar = xref.fetchIfRef(dict.get('LastChar')) || maxCharIndex; var fontName = xref.fetchIfRef(descriptor.get('FontName')); assertWellFormed(isName(fontName), 'invalid font name'); @@ -854,34 +793,30 @@ var PartialEvaluator = (function partialEvaluator() { fixedPitch: false, fontMatrix: dict.get('FontMatrix') || IDENTITY_MATRIX, firstChar: firstChar || 0, - lastChar: lastChar || 256, + lastChar: lastChar || maxCharIndex, bbox: descriptor.get('FontBBox'), ascent: descriptor.get('Ascent'), descent: descriptor.get('Descent'), xHeight: descriptor.get('XHeight'), capHeight: descriptor.get('CapHeight'), - defaultWidth: defaultWidth, flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), - differences: [], - widths: glyphWidths, - encoding: encoding, coded: false }; - properties.glyphs = this.extractEncoding(dict, xref, properties); + this.extractWidths(dict, xref, descriptor, properties); + this.extractDataStructures(dict, baseDict, xref, properties); if (type.name === 'Type3') { properties.coded = true; var charProcs = xref.fetchIfRef(dict.get('CharProcs')); var fontResources = xref.fetchIfRef(dict.get('Resources')) || resources; properties.resources = fontResources; + properties.charProcIRQueues = {}; for (var key in charProcs.map) { var glyphStream = xref.fetchIfRef(charProcs.map[key]); var queueObj = {}; - properties.glyphs[key].IRQueue = this.getIRQueue(glyphStream, - fontResources, - queueObj, - dependency); + properties.charProcIRQueues[key] = + this.getIRQueue(glyphStream, fontResources, queueObj, dependency); } } diff --git a/src/fonts.js b/src/fonts.js index ca02bb020..116bb4dfc 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -672,6 +672,44 @@ var UnicodeRanges = [ { 'begin': 0x1F030, 'end': 0x1F09F } // Domino Tiles ]; +var MacStandardGlyphOrdering = [ + '.notdef', '.null', 'nonmarkingreturn', 'space', 'exclam', 'quotedbl', + 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', + 'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period', 'slash', + 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', + 'nine', 'colon', 'semicolon', 'less', 'equal', 'greater', 'question', 'at', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', + 'backslash', 'bracketright', 'asciicircum', 'underscore', 'grave', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright', + 'asciitilde', 'Adieresis', 'Aring', 'Ccedilla', 'Eacute', 'Ntilde', + 'Odieresis', 'Udieresis', 'aacute', 'agrave', 'acircumflex', 'adieresis', + 'atilde', 'aring', 'ccedilla', 'eacute', 'egrave', 'ecircumflex', 'edieresis', + 'iacute', 'igrave', 'icircumflex', 'idieresis', 'ntilde', 'oacute', 'ograve', + 'ocircumflex', 'odieresis', 'otilde', 'uacute', 'ugrave', 'ucircumflex', + 'udieresis', 'dagger', 'degree', 'cent', 'sterling', 'section', 'bullet', + 'paragraph', 'germandbls', 'registered', 'copyright', 'trademark', 'acute', + 'dieresis', 'notequal', 'AE', 'Oslash', 'infinity', 'plusminus', 'lessequal', + 'greaterequal', 'yen', 'mu', 'partialdiff', 'summation', 'product', 'pi', + 'integral', 'ordfeminine', 'ordmasculine', 'Omega', 'ae', 'oslash', + 'questiondown', 'exclamdown', 'logicalnot', 'radical', 'florin', + 'approxequal', 'Delta', 'guillemotleft', 'guillemotright', 'ellipsis', + 'nonbreakingspace', 'Agrave', 'Atilde', 'Otilde', 'OE', 'oe', 'endash', + 'emdash', 'quotedblleft', 'quotedblright', 'quoteleft', 'quoteright', + 'divide', 'lozenge', 'ydieresis', 'Ydieresis', 'fraction', 'currency', + 'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'daggerdbl', 'periodcentered', + 'quotesinglbase', 'quotedblbase', 'perthousand', 'Acircumflex', + 'Ecircumflex', 'Aacute', 'Edieresis', 'Egrave', 'Iacute', 'Icircumflex', + 'Idieresis', 'Igrave', 'Oacute', 'Ocircumflex', 'apple', 'Ograve', 'Uacute', + 'Ucircumflex', 'Ugrave', 'dotlessi', 'circumflex', 'tilde', 'macron', + 'breve', 'dotaccent', 'ring', 'cedilla', 'hungarumlaut', 'ogonek', 'caron', + 'Lslash', 'lslash', 'Scaron', 'scaron', 'Zcaron', 'zcaron', 'brokenbar', + 'Eth', 'eth', 'Yacute', 'yacute', 'Thorn', 'thorn', 'minus', 'multiply', + 'onesuperior', 'twosuperior', 'threesuperior', 'onehalf', 'onequarter', + 'threequarters', 'franc', 'Gbreve', 'gbreve', 'Idotaccent', 'Scedilla', + 'scedilla', 'Cacute', 'cacute', 'Ccaron', 'ccaron', 'dcroat']; + function getUnicodeRangeFor(value) { for (var i = 0, ii = UnicodeRanges.length; i < ii; i++) { var range = UnicodeRanges[i]; @@ -681,6 +719,22 @@ function getUnicodeRangeFor(value) { return -1; } +function adaptUnicode(unicode) { + return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ? + unicode + kCmapGlyphOffset : unicode; +} + +function isAdaptedUnicode(unicode) { + return unicode >= kCmapGlyphOffset && + unicode < kCmapGlyphOffset + kSizeOfGlyphArea; +} + +function isSpecialUnicode(unicode) { + return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) || + unicode >= kCmapGlyphOffset && + unicode < kCmapGlyphOffset + kSizeOfGlyphArea; +} + /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -692,8 +746,8 @@ function getUnicodeRangeFor(value) { var Font = (function Font() { var constructor = function font_constructor(name, file, properties) { this.name = name; - this.encoding = properties.encoding; this.coded = properties.coded; + this.charProcIRQueues = properties.charProcIRQueues; this.resources = properties.resources; this.sizes = []; @@ -702,6 +756,9 @@ var Font = (function Font() { names = names.split(/[-,_]/g)[0]; this.serif = serifFonts[names] || (name.search(/serif/gi) != -1); + var type = properties.type; + this.type = type; + // If the font is to be ignored, register it like an already loaded font // to avoid the cost of waiting for it be be loaded by the platform. if (properties.ignore) { @@ -709,12 +766,20 @@ var Font = (function Font() { this.loading = false; return; } + + this.differences = properties.differences; + this.widths = properties.widths; + this.defaultWidth = properties.defaultWidth; + this.composite = properties.composite; + this.toUnicode = properties.toUnicode; + this.hasEncoding = properties.hasEncoding; + this.fontMatrix = properties.fontMatrix; if (properties.type == 'Type3') return; - // Trying to fix encoding using glyph widths and CIDSystemInfo. - this.fixWidths(properties); + // Trying to fix encoding using glyph CIDSystemInfo. + this.loadCidToUnicode(properties); if (!file) { // The file data is not specified. Trying to fix the font name @@ -730,15 +795,14 @@ var Font = (function Font() { // name ArialBlack for example will be replaced by Helvetica. this.black = (name.search(/Black/g) != -1); - this.defaultWidth = properties.defaultWidth; + this.encoding = properties.baseEncoding; + this.noUnicodeAdaptation = true; this.loadedName = fontName.split('-')[0]; - this.composite = properties.composite; this.loading = false; return; } var data; - var type = properties.type; switch (type) { case 'Type1': case 'CIDFontType0': @@ -767,11 +831,10 @@ var Font = (function Font() { } this.data = data; - this.type = type; this.fontMatrix = properties.fontMatrix; - this.defaultWidth = properties.defaultWidth; + this.encoding = properties.baseEncoding; + this.hasShortCmap = properties.hasShortCmap; this.loadedName = getUniqueName(); - this.composite = properties.composite; this.loading = true; }; @@ -987,7 +1050,7 @@ var Font = (function Font() { format314); }; - function createOS2Table(properties, override) { + function createOS2Table(properties, charstrings, override) { override = override || { unitsPerEm: 0, yMax: 0, @@ -1004,26 +1067,31 @@ var Font = (function Font() { var firstCharIndex = null; var lastCharIndex = 0; - var encoding = properties.encoding; - for (var index in encoding) { - var code = encoding[index].unicode; - if (firstCharIndex > code || !firstCharIndex) - firstCharIndex = code; - if (lastCharIndex < code) - lastCharIndex = code; + if (charstrings) { + for (var i = 0; i < charstrings.length; ++i) { + var code = charstrings[i].unicode; + if (firstCharIndex > code || !firstCharIndex) + firstCharIndex = code; + if (lastCharIndex < code) + lastCharIndex = code; - var position = getUnicodeRangeFor(code); - if (position < 32) { - ulUnicodeRange1 |= 1 << position; - } else if (position < 64) { - ulUnicodeRange2 |= 1 << position - 32; - } else if (position < 96) { - ulUnicodeRange3 |= 1 << position - 64; - } else if (position < 123) { - ulUnicodeRange4 |= 1 << position - 96; - } else { - error('Unicode ranges Bits > 123 are reserved for internal usage'); + var position = getUnicodeRangeFor(code); + if (position < 32) { + ulUnicodeRange1 |= 1 << position; + } else if (position < 64) { + ulUnicodeRange2 |= 1 << position - 32; + } else if (position < 96) { + ulUnicodeRange3 |= 1 << position - 64; + } else if (position < 123) { + ulUnicodeRange4 |= 1 << position - 96; + } else { + error('Unicode ranges Bits > 123 are reserved for internal usage'); + } } + } else { + // TODO + firstCharIndex = 0; + lastCharIndex = 255; } var unitsPerEm = override.unitsPerEm || kPDFGlyphSpaceUnits; @@ -1208,6 +1276,29 @@ var Font = (function Font() { }; }; + function createGlyphNameMap(glyphs, ids, properties) { + var glyphNames = properties.glyphNames; + if (!glyphNames) { + properties.glyphNameMap = {}; + return; + } + var glyphsLength = glyphs.length; + var glyphNameMap = {}; + var encoding = []; + for (var i = 0; i < glyphsLength; ++i) { + var glyphName = glyphNames[ids[i]]; + if (!glyphName) + continue; + var unicode = glyphs[i].unicode; + glyphNameMap[glyphName] = unicode; + var code = glyphs[i].code; + encoding[code] = glyphName; + } + properties.glyphNameMap = glyphNameMap; + if (!properties.hasEncoding) + properties.baseEncoding = encoding; + } + function replaceCMapTable(cmap, font, properties) { var start = (font.start ? font.start : 0) + cmap.offset; font.pos = start; @@ -1262,7 +1353,6 @@ var Font = (function Font() { cmap.data[i] = data.charCodeAt(i); } - var encoding = properties.encoding; for (var i = 0; i < numRecords; i++) { var table = tables[i]; font.pos = start + table.offset; @@ -1271,29 +1361,88 @@ var Font = (function Font() { var length = int16(font.getBytes(2)); var language = int16(font.getBytes(2)); - if (format == 4) { - return cmap.data; - } else if (format == 0) { + if (format == 0) { // Characters below 0x20 are controls characters that are hardcoded // into the platform so if some characters in the font are assigned // under this limit they will not be displayed so let's rewrite the // CMap. var glyphs = []; - var deltas = []; + var ids = []; for (var j = 0; j < 256; j++) { var index = font.getByte(); if (index) { - deltas.push(index); - - var unicode = j + kCmapGlyphOffset; - var mapping = encoding[j] || {}; - mapping.unicode = unicode; - encoding[j] = mapping; - glyphs.push({ unicode: unicode }); + var unicode = adaptUnicode(j); + glyphs.push({ unicode: unicode, code: j }); + ids.push(index); } } - return cmap.data = createCMapTable(glyphs, deltas); + properties.hasShortCmap = true; + + createGlyphNameMap(glyphs, ids, properties); + return cmap.data = createCMapTable(glyphs, ids); + } else if (format == 4) { + // re-creating the table in format 4 since the encoding + // might be changed + var segCount = (int16(font.getBytes(2)) >> 1); + font.getBytes(6); // skipping range fields + var segIndex, segments = []; + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments.push({ end: int16(font.getBytes(2)) }); + } + font.getBytes(2); + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments[segIndex].start = int16(font.getBytes(2)); + } + + for (segIndex = 0; segIndex < segCount; segIndex++) { + segments[segIndex].delta = int16(font.getBytes(2)); + } + + var offsetsCount = 0; + for (segIndex = 0; segIndex < segCount; segIndex++) { + var segment = segments[segIndex]; + var rangeOffset = int16(font.getBytes(2)); + if (!rangeOffset) { + segment.offsetIndex = -1; + continue; + } + + var offsetIndex = (rangeOffset >> 1) - (segCount - segIndex); + segment.offsetIndex = offsetIndex; + offsetsCount = Math.max(offsetsCount, offsetIndex + + segment.end - segment.start + 1); + } + + var offsets = []; + for (var j = 0; j < offsetsCount; j++) + offsets.push(int16(font.getBytes(2))); + + var glyphs = [], ids = []; + + for (segIndex = 0; segIndex < segCount; segIndex++) { + var segment = segments[segIndex]; + var start = segment.start, end = segment.end; + var delta = segment.delta, offsetIndex = segment.offsetIndex; + + for (var j = start; j <= end; j++) { + if (j == 0xFFFF) + continue; + + var glyphCode = offsetIndex < 0 ? j : + offsets[offsetIndex + j - start]; + glyphCode = (glyphCode + delta) & 0xFFFF; + if (glyphCode == 0 || isAdaptedUnicode(j)) + continue; + + var unicode = adaptUnicode(j); + glyphs.push({ unicode: unicode, code: j }); + ids.push(glyphCode); + } + } + + createGlyphNameMap(glyphs, ids, properties); + return cmap.data = createCMapTable(glyphs, ids); } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode @@ -1305,15 +1454,18 @@ var Font = (function Font() { var glyphs = []; var ids = []; - for (var j = 0; j < firstCode + entryCount; j++) { - var code = (j >= firstCode) ? int16(font.getBytes(2)) : j; - glyphs.push({ unicode: j + kCmapGlyphOffset }); - ids.push(code); + for (var j = 0; j < entryCount; j++) { + var glyphCode = int16(font.getBytes(2)); + var code = firstCode + j; + if (isAdaptedUnicode(glyphCode)) + continue; - var mapping = encoding[j] || {}; - mapping.unicode = glyphs[j].unicode; - encoding[j] = mapping; + var unicode = adaptUnicode(code); + glyphs.push({ unicode: unicode, code: code }); + ids.push(glyphCode); } + + createGlyphNameMap(glyphs, ids, properties); return cmap.data = createCMapTable(glyphs, ids); } } @@ -1396,6 +1548,52 @@ var Font = (function Font() { } } + function readGlyphNameMap(post, properties) { + var start = (font.start ? font.start : 0) + post.offset; + font.pos = start; + + var length = post.length, end = start + length; + var version = int32(font.getBytes(4)); + // skip rest to the tables + font.getBytes(28); + + var glyphNames; + switch (version) { + case 0x00010000: + glyphNames = MacStandardGlyphOrdering; + break; + case 0x00020000: + var numGlyphs = int16(font.getBytes(2)); + var glyphNameIndexes = []; + for (var i = 0; i < numGlyphs; ++i) + glyphNameIndexes.push(int16(font.getBytes(2))); + var customNames = []; + while (font.pos < end) { + var stringLength = font.getByte(); + var string = ''; + for (var i = 0; i < stringLength; ++i) + string += font.getChar(); + customNames.push(string); + } + glyphNames = []; + for (var i = 0; i < numGlyphs; ++i) { + var j = glyphNameIndexes[i]; + if (j < 258) { + glyphNames.push(MacStandardGlyphOrdering[j]); + continue; + } + glyphNames.push(customNames[j - 258]); + } + break; + case 0x00030000: + break; + default: + warn('Unknown/unsupported post table version ' + version); + break; + } + properties.glyphNames = glyphNames; + } + // Check that required tables are present var requiredTables = ['OS/2', 'cmap', 'head', 'hhea', 'hmtx', 'maxp', 'name', 'post']; @@ -1403,7 +1601,7 @@ var Font = (function Font() { var header = readOpenTypeHeader(font); var numTables = header.numTables; - var cmap, maxp, hhea, hmtx, vhea, vmtx, head, loca, glyf; + var cmap, post, maxp, hhea, hmtx, vhea, vmtx, head, loca, glyf; var tables = []; for (var i = 0; i < numTables; i++) { var table = readTableEntry(font); @@ -1411,6 +1609,8 @@ var Font = (function Font() { if (index != -1) { if (table.tag == 'cmap') cmap = table; + else if (table.tag == 'post') + post = table; else if (table.tag == 'maxp') maxp = table; else if (table.tag == 'hhea') @@ -1461,7 +1661,7 @@ var Font = (function Font() { tables.push({ tag: 'OS/2', - data: stringToArray(createOS2Table(properties, override)) + data: stringToArray(createOS2Table(properties, null, override)) }); } @@ -1486,6 +1686,11 @@ var Font = (function Font() { hhea.data[11] = 0xFF; } + // The 'post' table has glyphs names. + if (post) { + readGlyphNameMap(post, properties); + } + // Replace the old CMAP table with a shiny new one if (properties.type == 'CIDFontType2') { // Type2 composite fonts map characters directly to glyphs so the cmap @@ -1503,28 +1708,17 @@ var Font = (function Font() { tables.push(cmap); } - var encoding = properties.encoding, i; - - // offsetting glyphs to avoid problematic unicode ranges - for (i in encoding) { - if (encoding.hasOwnProperty(i)) { - var unicode = encoding[i].unicode; - if (unicode <= 0x1f || - (unicode >= 127 && unicode < kSizeOfGlyphArea)) - encoding[i].unicode += kCmapGlyphOffset; - } - } - var glyphs = []; for (i = 1; i < numGlyphs; i++) { - glyphs.push({ - unicode: i <= 0x1f || (i >= 127 && i < kSizeOfGlyphArea) ? - i + kCmapGlyphOffset : i - }); + if (isAdaptedUnicode(i)) + continue; + + glyphs.push({ unicode: adaptUnicode(i) }); } cmap.data = createCMapTable(glyphs); } else { replaceCMapTable(cmap, font, properties); + this.glyphNameMap = properties.glyphNameMap; } // Rewrite the 'post' table if needed @@ -1598,12 +1792,29 @@ var Font = (function Font() { var charstrings = font.charstrings; properties.fixedPitch = isFixedPitch(charstrings); + var glyphNameMap = {}; + for (var i = 0; i < charstrings.length; ++i) { + var charstring = charstrings[i]; + glyphNameMap[charstring.glyph] = charstring.unicode; + } + this.glyphNameMap = glyphNameMap; + + if (!properties.hasEncoding && (properties.subtype == 'Type1C' || + properties.subtype == 'CIDFontType0C')) { + var encoding = []; + for (var i = 0; i < charstrings.length; ++i) { + var charstring = charstrings[i]; + encoding[charstring.code] = charstring.glyph; + } + properties.baseEncoding = encoding; + } + var fields = { // PostScript Font Program 'CFF ': font.data, // OS/2 and Windows Specific metrics - 'OS/2': stringToArray(createOS2Table(properties)), + 'OS/2': stringToArray(createOS2Table(properties, charstrings)), // Character to glyphs mapping 'cmap': createCMapTable(charstrings.slice(), @@ -1657,9 +1868,8 @@ var Font = (function Font() { // Horizontal metrics 'hmtx': (function fontFieldsHmtx() { var hmtx = '\x00\x00\x00\x00'; // Fake .notdef - for (var i = 0, ii = charstrings.length; i < ii; i++) { + for (var i = 0, ii = charstrings.length; i < ii; i++) hmtx += string16(charstrings[i].width) + string16(0); - } return stringToArray(hmtx); })(), @@ -1688,82 +1898,48 @@ var Font = (function Font() { return stringToArray(otf.file); }, - fixWidths: function font_fixWidths(properties) { - if (properties.type !== 'CIDFontType0' && - properties.type !== 'CIDFontType2') - return; - - var encoding = properties.encoding; - if (encoding[0]) + loadCidToUnicode: function font_loadCidToUnicode(properties) { + if (properties.cidToGidMap) { + this.cidToUnicode = properties.cidToGidMap; return; - var glyphsWidths = properties.widths; - if (!glyphsWidths) + } + + if (!properties.cidSystemInfo) return; - var defaultWidth = properties.defaultWidth; + var cidToUnicodeMap = []; + this.cidToUnicode = cidToUnicodeMap; + var cidSystemInfo = properties.cidSystemInfo; var cidToUnicode; if (cidSystemInfo) { cidToUnicode = CIDToUnicodeMaps[ cidSystemInfo.registry + '-' + cidSystemInfo.ordering]; } - if (!cidToUnicode) { - // the font is directly characters to glyphs with no encoding - // so create an identity encoding - for (i = 0; i < 0xD800; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - // skipping surrogates + 256-user defined - for (i = 0xE100; i <= 0xFFFF; i++) { - var width = glyphsWidths[i]; - encoding[i] = { - unicode: i, - width: isNum(width) ? width : defaultWidth - }; - } - return; - } - encoding[0] = { unicode: 0, width: 0 }; - var glyph = 1, i, j, k, cidLength, ii; + if (!cidToUnicode) + return; // identity encoding + + var glyph = 1, i, j, k, ii; for (i = 0, ii = cidToUnicode.length; i < ii; ++i) { var unicode = cidToUnicode[i]; - var width; if (isArray(unicode)) { var length = unicode.length; - width = glyphsWidths[glyph]; - for (j = 0; j < length; j++) { - k = unicode[j]; - encoding[k] = { - unicode: k, - width: isNum(width) ? width : defaultWidth - }; - } + for (j = 0; j < length; j++) + cidToUnicodeMap[unicode[j]] = glyph; glyph++; } else if (typeof unicode === 'object') { var fillLength = unicode.f; if (fillLength) { k = unicode.c; for (j = 0; j < fillLength; ++j) { - width = glyphsWidths[glyph++]; - encoding[k] = { - unicode: k, - width: isNum(width) ? width : defaultWidth - }; + cidToUnicodeMap[k] = glyph++; k++; } } else glyph += unicode.s; } else if (unicode) { - width = glyphsWidths[glyph++]; - encoding[unicode] = { - unicode: unicode, - width: isNum(width) ? width : defaultWidth - }; + cidToUnicodeMap[unicode] = glyph++; } else glyph++; } @@ -1797,6 +1973,79 @@ var Font = (function Font() { return rule; }, + charToGlyph: function fonts_charToGlyph(charcode) { + var unicode, width, codeIRQueue; + + var width = this.widths[charcode]; + + switch (this.type) { + case 'CIDFontType0': + if (this.noUnicodeAdaptation) { + width = this.widths[this.cidToUnicode[charcode]]; + unicode = charcode; + break; + } + unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + break; + case 'CIDFontType2': + if (this.noUnicodeAdaptation) { + width = this.widths[this.cidToUnicode[charcode]]; + unicode = charcode; + break; + } + unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + break; + case 'Type1': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + if (this.noUnicodeAdaptation) { + if (!isNum(width)) + width = this.widths[glyphName]; + unicode = GlyphsUnicode[glyphName] || charcode; + break; + } + unicode = this.glyphNameMap[glyphName] || + adaptUnicode(GlyphsUnicode[glyphName] || charcode); + break; + case 'Type3': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + codeIRQueue = this.charProcIRQueues[glyphName]; + unicode = charcode; + break; + case 'TrueType': + var glyphName = this.differences[charcode] || this.encoding[charcode]; + if (!glyphName) + glyphName = Encodings.StandardEncoding[charcode]; + if (!isNum(width)) + width = this.widths[glyphName]; + if (this.noUnicodeAdaptation) { + unicode = GlyphsUnicode[glyphName] || charcode; + break; + } + if (!this.hasEncoding) { + unicode = adaptUnicode(charcode); + break; + } + if (this.hasShortCmap) { + var j = Encodings.MacRomanEncoding.indexOf(glyphName); + unicode = j >= 0 && !isSpecialUnicode(j) ? j : + this.glyphNameMap[glyphName]; + } else { + unicode = glyphName in GlyphsUnicode ? + adaptUnicode(GlyphsUnicode[glyphName]) : + this.glyphNameMap[glyphName]; + } + break; + default: + warn('Unsupported font type: ' + this.type); + break; + } + return { + unicode: unicode, + width: isNum(width) ? width : this.defaultWidth, + codeIRQueue: codeIRQueue + }; + }, + charsToGlyphs: function fonts_chars2Glyphs(chars) { var charsCache = this.charsCache; var glyphs; @@ -1812,11 +2061,6 @@ var Font = (function Font() { if (!charsCache) charsCache = this.charsCache = Object.create(null); - // translate the string using the font's encoding - var encoding = this.encoding; - if (!encoding) - return chars; - glyphs = []; if (this.composite) { @@ -1828,14 +2072,7 @@ var Font = (function Font() { // loop should never end on the last byte for (var i = 0; i < length; i++) { var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]); - var glyph = encoding[charcode]; - if ('undefined' == typeof(glyph)) { - warn('Unencoded charcode ' + charcode); - glyph = { - unicode: charcode, - width: this.defaultWidth - }; - } + var glyph = this.charToGlyph(charcode); glyphs.push(glyph); // placing null after each word break charcode (ASCII SPACE) if (charcode == 0x20) @@ -1845,14 +2082,7 @@ var Font = (function Font() { else { for (var i = 0, ii = chars.length; i < ii; ++i) { var charcode = chars.charCodeAt(i); - var glyph = encoding[charcode]; - if ('undefined' == typeof(glyph)) { - warn('Unencoded charcode ' + charcode); - glyph = { - unicode: charcode, - width: this.defaultWidth - }; - } + var glyph = this.charToGlyph(charcode); glyphs.push(glyph); if (charcode == 0x20) glyphs.push(null); @@ -2106,6 +2336,17 @@ var Type1Parser = function type1Parser() { warn('Support for Type1 command ' + value + ' (' + escape + ') is not implemented in charstring: ' + charstring); + if (value == 12) { + // we know how to ignore only some the Type1 commands + switch (escape) { + case 7: + charstring.push('drop', 'drop', 'drop', 'drop'); + continue; + case 8: + charstring.push('drop'); + continue; + } + } } value = command; @@ -2326,24 +2567,30 @@ var Type1Parser = function type1Parser() { properties.fontMatrix = matrix; break; case '/Encoding': - var size = parseInt(getToken(), 10); - getToken(); // read in 'array' + var encodingArg = getToken(); + var encoding; + if (!/^\d+$/.test(encodingArg)) { + // encoding name is specified + encoding = Encodings[encodingArg]; + } else { + encoding = []; + var size = parseInt(encodingArg, 10); + getToken(); // read in 'array' - for (var j = 0; j < size; j++) { - var token = getToken(); - if (token == 'dup') { - var index = parseInt(getToken(), 10); - var glyph = getToken(); - - if ('undefined' == typeof(properties.differences[index])) { - var mapping = properties.encoding[index] || {}; - mapping.unicode = GlyphsUnicode[glyph] || index; - properties.glyphs[glyph] = properties.encoding[index] = - mapping; + for (var j = 0; j < size; j++) { + var token = getToken(); + if (token == 'dup') { + var index = parseInt(getToken(), 10); + var glyph = getToken(); + encoding[index] = glyph; + getToken(); // read the in 'put' } - getToken(); // read the in 'put' } } + if (!properties.hasEncoding && encoding) { + properties.baseEncoding = encoding; + break; + } break; } token = ''; @@ -2486,45 +2733,51 @@ CFF.prototype = { }, encodeNumber: function cff_encodeNumber(value) { + // some of the fonts has ouf-of-range values + // they are just arithmetic overflows + // make sanitizer happy + value |= 0; if (value >= -32768 && value <= 32767) { return '\x1c' + String.fromCharCode((value >> 8) & 0xFF) + String.fromCharCode(value & 0xFF); - } else if (value >= (-2147483648) && value <= 2147483647) { + } else { return '\x1d' + String.fromCharCode((value >> 24) & 0xFF) + String.fromCharCode((value >> 16) & 0xFF) + String.fromCharCode((value >> 8) & 0xFF) + String.fromCharCode(value & 0xFF); } - error('Value: ' + value + ' is not allowed'); - return null; }, getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs, properties) { var charstrings = []; - var missings = []; - - for (var i = 0, ii = glyphs.length; i < ii; i++) { - var glyph = glyphs[i]; - var mapping = properties.glyphs[glyph.glyph]; - if (!mapping) { - if (glyph.glyph != '.notdef') - missings.push(glyph.glyph); - } else { - charstrings.push({ - glyph: glyph.glyph, - unicode: mapping.unicode, - charstring: glyph.data, - width: glyph.width, - lsb: glyph.lsb - }); - } + var reverseMapping = {}; + var encoding = properties.baseEncoding; + var i, length, glyphName; + for (i = 0, length = encoding.length; i < length; ++i) { + glyphName = encoding[i]; + if (!glyphName || isSpecialUnicode(i)) + continue; + reverseMapping[glyphName] = i; + } + reverseMapping['.notdef'] = 0; + var unusedUnicode = kCmapGlyphOffset; + for (i = 0, length = glyphs.length; i < length; i++) { + var item = glyphs[i]; + var glyphName = item.glyph; + var unicode = glyphName in reverseMapping ? + reverseMapping[glyphName] : unusedUnicode++; + charstrings.push({ + glyph: glyphName, + unicode: unicode, + gid: i, + charstring: item.data, + width: item.width, + lsb: item.lsb + }); } - - if (missings.length) - warn(missings + ' does not have unicode in the glyphs dictionary'); charstrings.sort(function charstrings_sort(a, b) { return a.unicode - b.unicode; @@ -2807,6 +3060,20 @@ var Type2CFF = (function type2CFF() { var encoding = this.parseEncoding(topDict.Encoding, properties, strings, charset); + var charset, encoding; + var isCIDFont = properties.subtype == 'CIDFontType0C'; + if (isCIDFont) { + charset = []; + charset.length = charStrings.length; + encoding = this.parseCidMap(topDict.charset, + charStrings.length); + } else { + charset = this.parseCharsets(topDict.charset, + charStrings.length, strings); + encoding = this.parseEncoding(topDict.Encoding, properties, + strings, charset); + } + // The font sanitizer does not support CFF encoding with a // supplement, since the encoding is not really use to map // between gid to glyph, let's overwrite what is declared in @@ -2863,80 +3130,46 @@ var Type2CFF = (function type2CFF() { getCharStrings: function cff_charstrings(charsets, encoding, privateDict, properties) { - var defaultWidth = privateDict['defaultWidthX']; var charstrings = []; - var firstChar = properties.firstChar; - var glyphMap = {}; + var unicodeUsed = []; + var unassignedUnicodeItems = []; for (var i = 0, ii = charsets.length; i < ii; i++) { var glyph = charsets[i]; + var encodingFound = false; for (var charcode in encoding) { - if (encoding[charcode] == i) - glyphMap[glyph] = charcode | 0; + if (encoding[charcode] == i) { + var code = charcode | 0; + charstrings.push({ + unicode: adaptUnicode(code), + code: code, + gid: i, + glyph: glyph + }); + unicodeUsed[code] = true; + encodingFound = true; + break; + } + } + if (!encodingFound) { + unassignedUnicodeItems.push(i); } } - var differences = properties.differences; - for (var i = 0, ii = differences.length; i < ii; ++i) { - var glyph = differences[i]; - if (!glyph) - continue; - var oldGlyph = charsets[i]; - if (oldGlyph) - delete glyphMap[oldGlyph]; - glyphMap[differences[i]] = i; - } - - var glyphs = properties.glyphs; - for (var i = 1, ii = charsets.length; i < ii; i++) { - var glyph = charsets[i]; - var code = glyphMap[glyph] || 0; - - var mapping = glyphs[code] || glyphs[glyph] || { width: defaultWidth }; - var unicode = mapping.unicode; - - if (unicode <= 0x1f || (unicode >= 127 && unicode <= 255)) - unicode += kCmapGlyphOffset; - - var width = (mapping.hasOwnProperty('width') && isNum(mapping.width)) ? - mapping.width : defaultWidth; - properties.encoding[code] = { - unicode: unicode, - width: width - }; - + var nextUnusedUnicode = 0x21; + for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) { + var i = unassignedUnicodeItems[j]; + // giving unicode value anyway + while (unicodeUsed[nextUnusedUnicode]) + nextUnusedUnicode++; + var code = nextUnusedUnicode++; charstrings.push({ - unicode: unicode, - width: width, + unicode: adaptUnicode(code), code: code, - gid: i + gid: i, + glyph: charsets[i] }); } - // sort the array by the unicode value - charstrings.sort(function type2CFFGetCharStringsSort(a, b) { - return a.unicode - b.unicode; - }); - - // remove duplicates -- they might appear during selection: - // properties.glyphs[code] || properties.glyphs[glyph] - var nextUnusedUnicode = kCmapGlyphOffset + 0x0020; - var lastUnicode = charstrings[0].unicode, wasModified = false; - for (var i = 1, ii = charstrings.length; i < ii; ++i) { - if (lastUnicode != charstrings[i].unicode) { - lastUnicode = charstrings[i].unicode; - continue; - } - // duplicate found -- keeping the item that has - // different code and unicode, that one created - // as result of modification of the base encoding - var duplicateIndex = - charstrings[i].unicode == charstrings[i].code ? i : i - 1; - charstrings[duplicateIndex].unicode = nextUnusedUnicode++; - wasModified = true; - } - if (!wasModified) - return charstrings; - // sort the array by the unicode value (again) charstrings.sort(function type2CFFGetCharStringsSort(a, b) { return a.unicode - b.unicode; @@ -2964,8 +3197,8 @@ var Type2CFF = (function type2CFF() { if (pos == 0 || pos == 1) { var gid = 1; - var baseEncoding = pos ? Encodings.ExpertEncoding.slice() : - Encodings.StandardEncoding.slice(); + var baseEncoding = pos ? Encodings.ExpertEncoding : + Encodings.StandardEncoding; for (var i = 0, ii = charset.length; i < ii; i++) { var index = baseEncoding.indexOf(charset[i]); if (index != -1) @@ -2985,8 +3218,8 @@ var Type2CFF = (function type2CFF() { var gid = 1; for (var i = 0; i < rangesCount; i++) { var start = bytes[pos++]; - var count = bytes[pos++]; - for (var j = start; j <= start + count; j++) + var left = bytes[pos++]; + for (var j = start; j <= start + left; j++) encoding[j] = gid++; } break; @@ -3047,6 +3280,46 @@ var Type2CFF = (function type2CFF() { } return charset; }, + + parseCidMap: function cff_parsecharsets(pos, length) { + var bytes = this.bytes; + var format = bytes[pos++]; + + var encoding = {}; + var map = {encoding: encoding}; + + encoding[0] = 0; + + var gid = 1; + switch (format) { + case 0: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + encoding[cid] = gid++; + } + break; + case 1: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + var count = bytes[pos++]; + for (var i = 0; i <= count; i++) + encoding[cid++] = gid++; + } + break; + case 2: + while (gid < length) { + var cid = (bytes[pos++] << 8) | bytes[pos++]; + var count = (bytes[pos++] << 8) | bytes[pos++]; + for (var i = 0; i <= count; i++) + encoding[cid++] = gid++; + } + break; + default: + error('Unknown charset format'); + } + return map; + }, + getPrivDict: function cff_getprivdict(baseDict, strings) { var dict = {}; @@ -3108,6 +3381,17 @@ var Type2CFF = (function type2CFF() { case 18: dict['Private'] = value; break; + case 3102: + case 3103: + case 3104: + case 3105: + case 3106: + case 3107: + case 3108: + case 3109: + case 3110: + dict['cidOperatorPresent'] = true; + break; default: TODO('interpret top dict key'); } @@ -3220,6 +3504,15 @@ var Type2CFF = (function type2CFF() { } var b = (b << 8) | op; } + if (!operands.length && b == 8 && + dict[pos + 1] == 9) { + // no operands for FamilyBlues, removing the key + // and next one is FamilyOtherBlues - skipping them + // also replacing FamilyBlues to pass sanitizer + dict[pos] = 139; + pos += 2; + continue; + } entries.push([b, operands]); operands = []; ++pos; diff --git a/src/pdf.js b/src/pdf.js index 51f606548..1042a651b 100644 --- a/src/pdf.js +++ b/src/pdf.js @@ -7,8 +7,9 @@ var PDFJS = {}; // Use strict in our context only - users might not want it 'use strict'; + PDFJS.build = 'PDFJSSCRIPT_BUNDLE_VER'; + // Files are inserted below - see Makefile /* PDFJSSCRIPT_INCLUDE_ALL */ }).call((typeof window === 'undefined') ? this : window); - diff --git a/test/driver.js b/test/driver.js index e84b7c8e0..c11cecf56 100644 --- a/test/driver.js +++ b/test/driver.js @@ -56,23 +56,29 @@ function load() { } function cleanup() { - var styleSheet = document.styleSheets[0]; - if (styleSheet) { + // Clear out all the stylesheets since a new one is created for each font. + while (document.styleSheets.length > 0) { + var styleSheet = document.styleSheets[0]; while (styleSheet.cssRules.length > 0) styleSheet.deleteRule(0); + var ownerNode = styleSheet.ownerNode; + ownerNode.parentNode.removeChild(ownerNode); } var guard = document.getElementById('content-end'); var body = document.body; while (body.lastChild !== guard) body.removeChild(body.lastChild); + + // Wipe out the link to the pdfdoc so it can be GC'ed. + for (var i = 0; i < manifest.length; i++) { + if (manifest[i].pdfDoc) { + manifest[i].pdfDoc.destroy(); + delete manifest[i].pdfDoc; + } + } } function nextTask() { - // If there is a pdfDoc on the last task executed, destroy it to free memory. - if (task && task.pdfDoc) { - task.pdfDoc.destroy(); - delete task.pdfDoc; - } cleanup(); if (currentTaskIdx == manifest.length) { diff --git a/test/test.py b/test/test.py index 65def5d8e..256200587 100644 --- a/test/test.py +++ b/test/test.py @@ -323,18 +323,18 @@ def verifyPDFs(manifestList): if os.access(f, os.R_OK): fileMd5 = hashlib.md5(open(f, 'rb').read()).hexdigest() if 'md5' not in item: - print 'ERROR: Missing md5 for file "' + f + '".', + print 'WARNING: Missing md5 for file "' + f + '".', print 'Hash for current file is "' + fileMd5 + '"' error = True continue md5 = item['md5'] if fileMd5 != md5: - print 'ERROR: MD5 of file "' + f + '" does not match file.', + print 'WARNING: MD5 of file "' + f + '" does not match file.', print 'Expected "' + md5 + '" computed "' + fileMd5 + '"' error = True continue else: - print 'ERROR: Unable to open file for reading "' + f + '".' + print 'WARNING: Unable to open file for reading "' + f + '".' error = True return not error @@ -365,7 +365,8 @@ def setUp(options): downloadLinkedPDFs(manifestList) if not verifyPDFs(manifestList): - raise Exception('ERROR: failed to verify pdfs.') + print 'Unable to verify the checksum for the files that are used for testing.' + print 'Please re-download the files, or adjust the MD5 checksum in the manifest for the files listed above.\n' for b in testBrowsers: State.taskResults[b.name] = { } diff --git a/test/test_manifest.json b/test/test_manifest.json index 8085506a2..0bac41d34 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -19,7 +19,7 @@ }, { "id": "intelisa-load", "file": "pdfs/intelisa.pdf", - "md5": "f3ed5487d1afa34d8b77c0c734a95c79", + "md5": "f5712097d29287a97f1278839814f682", "link": true, "rounds": 1, "type": "load" @@ -194,7 +194,7 @@ }, { "id": "f1040", "file": "pdfs/f1040.pdf", - "md5": "7323b50c6d28d959b8b4b92c469b2469", + "md5": "b59272ce19b4a0c5808c8861441b0741", "link": true, "rounds": 1, "type": "load"