From f32e65b19f601fa2b2a97c11c215387c07ffc017 Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Wed, 25 Sep 2013 10:32:04 -0700 Subject: [PATCH] Read multi-byte character codes based on codespace ranges. --- make.js | 3 +- src/core/cmap.js | 460 +++++++++++++++++++++++++++++++++++++++ src/core/evaluator.js | 128 ++--------- src/core/fonts.js | 29 ++- src/worker_loader.js | 1 + test/pdfs/bug898853.pdf | Bin 0 -> 15861 bytes test/test_manifest.json | 7 + test/unit/cmap_spec.js | 86 ++++++++ test/unit/unit_test.html | 2 + 9 files changed, 592 insertions(+), 124 deletions(-) create mode 100644 src/core/cmap.js create mode 100644 test/pdfs/bug898853.pdf create mode 100644 test/unit/cmap_spec.js diff --git a/make.js b/make.js index 45d14519b..02142b2b7 100644 --- a/make.js +++ b/make.js @@ -306,7 +306,8 @@ target.bundle = function(args) { 'core/worker.js', 'core/jpx.js', 'core/jbig2.js', - 'core/bidi.js' + 'core/bidi.js', + 'core/cmap.js' ]; var EXT_SRC_FILES = [ diff --git a/src/core/cmap.js b/src/core/cmap.js new file mode 100644 index 000000000..aa76128f7 --- /dev/null +++ b/src/core/cmap.js @@ -0,0 +1,460 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/* Copyright 2012 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer, + isStream */ + +'use strict'; + +var CMAP_CODESPACES = { + 'Adobe-CNS1-0': [[], [0, 14335]], + 'Adobe-CNS1-1': [[], [0, 17407]], + 'Adobe-CNS1-2': [[], [0, 17663]], + 'Adobe-CNS1-3': [[], [0, 18943]], + 'Adobe-CNS1-4': [[], [0, 19199]], + 'Adobe-CNS1-5': [[], [0, 19199]], + 'Adobe-CNS1-6': [[], [0, 19199]], + 'Adobe-CNS1-UCS2': [[], [0, 65535]], + 'B5-H': [[0, 128], [41280, 65278]], + 'B5-V': [[0, 128], [41280, 65278]], + 'B5pc-H': [[0, 128, 253, 255], [41280, 64766]], + 'B5pc-V': [[0, 128, 253, 255], [41280, 64766]], + 'CNS-EUC-H': [[0, 128], [41377, 65278], [], + [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]], + 'CNS-EUC-V': [[0, 128], [41377, 65278], [], + [2392957345, 2392981246, 2393022881, 2393046782, 2393088417, 2393112318]], + 'CNS1-H': [[], [8481, 32382]], + 'CNS1-V': [[], [8481, 32382]], + 'CNS2-H': [[], [8481, 32382]], + 'CNS2-V': [[], [8481, 32382]], + 'ETen-B5-H': [[0, 128], [41280, 65278]], + 'ETen-B5-V': [[0, 128], [41280, 65278]], + 'ETenms-B5-H': [[0, 128], [41280, 65278]], + 'ETenms-B5-V': [[0, 128], [41280, 65278]], + 'ETHK-B5-H': [[0, 128], [34624, 65278]], + 'ETHK-B5-V': [[0, 128], [34624, 65278]], + 'HKdla-B5-H': [[0, 128], [41280, 65278]], + 'HKdla-B5-V': [[0, 128], [41280, 65278]], + 'HKdlb-B5-H': [[0, 128], [36416, 65278]], + 'HKdlb-B5-V': [[0, 128], [36416, 65278]], + 'HKgccs-B5-H': [[0, 128], [35392, 65278]], + 'HKgccs-B5-V': [[0, 128], [35392, 65278]], + 'HKm314-B5-H': [[0, 128], [41280, 65278]], + 'HKm314-B5-V': [[0, 128], [41280, 65278]], + 'HKm471-B5-H': [[0, 128], [41280, 65278]], + 'HKm471-B5-V': [[0, 128], [41280, 65278]], + 'HKscs-B5-H': [[0, 128], [34624, 65278]], + 'HKscs-B5-V': [[0, 128], [34624, 65278]], + 'UniCNS-UCS2-H': [[], [0, 55295, 57344, 65535]], + 'UniCNS-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'UniCNS-UTF16-H': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'UniCNS-UTF16-V': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'Adobe-GB1-0': [[], [0, 7935]], + 'Adobe-GB1-1': [[], [0, 9983]], + 'Adobe-GB1-2': [[], [0, 22271]], + 'Adobe-GB1-3': [[], [0, 22527]], + 'Adobe-GB1-4': [[], [0, 29183]], + 'Adobe-GB1-5': [[], [0, 30463]], + 'Adobe-GB1-UCS2': [[], [0, 65535]], + 'GB-EUC-H': [[0, 128], [41377, 65278]], + 'GB-EUC-V': [[0, 128], [41377, 65278]], + 'GB-H': [[], [8481, 32382]], + 'GB-V': [[], [8481, 32382]], + 'GBK-EUC-H': [[0, 128], [33088, 65278]], + 'GBK-EUC-V': [[0, 128], [33088, 65278]], + 'GBK2K-H': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]], + 'GBK2K-V': [[0, 127], [33088, 65278], [], [2167439664, 4265213497]], + 'GBKp-EUC-H': [[0, 128], [33088, 65278]], + 'GBKp-EUC-V': [[0, 128], [33088, 65278]], + 'GBpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]], + 'GBpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]], + 'GBT-EUC-H': [[0, 128], [41377, 65278]], + 'GBT-EUC-V': [[0, 128], [41377, 65278]], + 'GBT-H': [[], [8481, 32382]], + 'GBT-V': [[], [8481, 32382]], + 'GBTpc-EUC-H': [[0, 128, 253, 255], [41377, 64766]], + 'GBTpc-EUC-V': [[0, 128, 253, 255], [41377, 64766]], + 'UniGB-UCS2-H': [[], [0, 55295, 57344, 65535]], + 'UniGB-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'UniGB-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], + 'UniGB-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], + '78-EUC-H': [[0, 128], [36512, 36575, 41377, 65278]], + '78-EUC-V': [[0, 128], [36512, 36575, 41377, 65278]], + '78-H': [[], [8481, 32382]], + '78-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '78-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '78-V': [[], [8481, 32382]], + '78ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '78ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '83pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], + '90ms-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '90ms-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '90msp-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '90msp-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + '90pv-RKSJ-H': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], + '90pv-RKSJ-V': [[0, 128, 160, 223, 253, 255], [33088, 40956, 57408, 64764]], + 'Add-H': [[], [8481, 32382]], + 'Add-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'Add-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'Add-V': [[], [8481, 32382]], + 'Adobe-Japan1-0': [[], [0, 8447]], + 'Adobe-Japan1-1': [[], [0, 8447]], + 'Adobe-Japan1-2': [[], [0, 8959]], + 'Adobe-Japan1-3': [[], [0, 9471]], + 'Adobe-Japan1-4': [[], [0, 15615]], + 'Adobe-Japan1-5': [[], [0, 20479]], + 'Adobe-Japan1-6': [[], [0, 23295]], + 'Adobe-Japan1-UCS2': [[], [0, 65535]], + 'Adobe-Japan2-0': [[], [0, 6143]], + 'EUC-H': [[0, 128], [36512, 36575, 41377, 65278]], + 'EUC-V': [[0, 128], [36512, 36575, 41377, 65278]], + 'Ext-H': [[], [8481, 32382]], + 'Ext-RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'Ext-RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'Ext-V': [[], [8481, 32382]], + 'H': [[], [8481, 32382]], + 'Hankaku': [[0, 255], []], + 'Hiragana': [[0, 255], []], + 'Hojo-EUC-H': [[], [], [9413025, 9436926], []], + 'Hojo-EUC-V': [[], [], [9413025, 9436926], []], + 'Hojo-H': [[], [8481, 32382]], + 'Hojo-V': [[], [8481, 32382]], + 'Katakana': [[0, 255], []], + 'NWP-H': [[], [8481, 32382]], + 'NWP-V': [[], [8481, 32382]], + 'RKSJ-H': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'RKSJ-V': [[0, 128, 160, 223], [33088, 40956, 57408, 64764]], + 'Roman': [[0, 255], []], + 'UniHojo-UCS2-H': [[], [0, 55295, 57344, 65535]], + 'UniHojo-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'UniHojo-UTF16-H': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'UniHojo-UTF16-V': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'UniJIS-UCS2-H': [[], [0, 55295, 57344, 65535]], + 'UniJIS-UCS2-HW-H': [[], [0, 55295, 57344, 65535]], + 'UniJIS-UCS2-HW-V': [[], [0, 55295, 57344, 65535]], + 'UniJIS-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'UniJIS-UTF16-H': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'UniJIS-UTF16-V': [[], [0, 55295, 57344, 65535], [], + [3623934976, 3690979327]], + 'UniJISPro-UCS2-HW-V': [[], [0, 55295, 57344, 65535]], + 'UniJISPro-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'V': [[], [8481, 32382]], + 'WP-Symbol': [[0, 255], []], + 'Adobe-Korea1-0': [[], [0, 9471]], + 'Adobe-Korea1-1': [[], [0, 18175]], + 'Adobe-Korea1-2': [[], [0, 18431]], + 'Adobe-Korea1-UCS2': [[], [0, 65535]], + 'KSC-EUC-H': [[0, 128], [41377, 65278]], + 'KSC-EUC-V': [[0, 128], [41377, 65278]], + 'KSC-H': [[], [8481, 32382]], + 'KSC-Johab-H': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]], + 'KSC-Johab-V': [[0, 128], [33857, 54270, 55345, 57086, 57393, 63998]], + 'KSC-V': [[], [8481, 32382]], + 'KSCms-UHC-H': [[0, 128], [33089, 65278]], + 'KSCms-UHC-HW-H': [[0, 128], [33089, 65278]], + 'KSCms-UHC-HW-V': [[0, 128], [33089, 65278]], + 'KSCms-UHC-V': [[0, 128], [33089, 65278]], + 'KSCpc-EUC-H': [[0, 132, 254, 255], [41281, 65022]], + 'KSCpc-EUC-V': [[0, 132, 254, 255], [41281, 65022]], + 'UniKS-UCS2-H': [[], [0, 55295, 57344, 65535]], + 'UniKS-UCS2-V': [[], [0, 55295, 57344, 65535]], + 'UniKS-UTF16-H': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]], + 'UniKS-UTF16-V': [[], [0, 55295, 57344, 65535], [], [3623934976, 3690979327]] +}; + +// CMap, not to be confused with TrueType's cmap. +var CMap = (function CMapClosure() { + function CMap() { + // Codespace ranges are stored as follows: + // [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]] + // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...] + this.codespaceRanges = [[], [], [], []]; + this.map = []; + this.vertical = false; + } + CMap.prototype = { + addCodespaceRange: function(n, low, high) { + this.codespaceRanges[n - 1].push(low, high); + }, + + mapRange: function(low, high, dstLow) { + var lastByte = dstLow.length - 1; + while (low <= high) { + this.map[low] = dstLow; + // Only the last byte has to be incremented. + dstLow = dstLow.substr(0, lastByte) + + String.fromCharCode(dstLow.charCodeAt(lastByte) + 1); + ++low; + } + }, + + mapRangeToArray: function(low, high, array) { + var i = 0; + while (low <= high) { + this.map[low] = array[i++]; + ++low; + } + }, + + mapOne: function(src, dst) { + this.map[src] = dst; + }, + + lookup: function(code) { + return this.map[code]; + }, + + readCharCode: function(str, offset) { + var c = 0; + var codespaceRanges = this.codespaceRanges; + var codespaceRangesLen = this.codespaceRanges.length; + // 9.7.6.2 CMap Mapping + // The code length is at most 4. + for (var n = 0; n < codespaceRangesLen; n++) { + c = ((c << 8) | str.charCodeAt(offset + n)) >>> 0; + // Check each codespace range to see if it falls within. + var codespaceRange = codespaceRanges[n]; + for (var k = 0, kk = codespaceRange.length; k < kk;) { + var low = codespaceRange[k++]; + var high = codespaceRange[k++]; + if (c >= low && c <= high) { + return [c, n + 1]; + } + } + } + + return [0, 1]; + } + + }; + return CMap; +})(); + +var IdentityCMap = (function IdentityCMapClosure() { + function IdentityCMap(vertical, n) { + CMap.call(this); + this.vertical = vertical; + this.addCodespaceRange(n, 0, 0xffff); + this.mapRange(0, 0xffff, '\u0000'); + } + Util.inherit(IdentityCMap, CMap, {}); + + return IdentityCMap; +})(); + +var CMapFactory = (function CMapFactoryClosure() { + function strToInt(str) { + var a = 0; + for (var i = 0; i < str.length; i++) { + a = (a << 8) | str.charCodeAt(i); + } + return a >>> 0; + } + + function expectString(obj) { + if (!isString(obj)) { + error('Malformed CMap: expected string.'); + } + } + + function expectInt(obj) { + if (!isInt(obj)) { + error('Malformed CMap: expected int.'); + } + } + + function parseBfChar(cMap, lexer) { + while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } + if (isCmd(obj, 'endbfchar')) { + return; + } + expectString(obj); + var src = strToInt(obj); + obj = lexer.getObj(); + // TODO are /dstName used? + expectString(obj); + var dst = obj; + cMap.mapOne(src, dst); + } + } + + function parseBfRange(cMap, lexer) { + while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } + if (isCmd(obj, 'endbfrange')) { + return; + } + expectString(obj); + var low = strToInt(obj); + obj = lexer.getObj(); + expectString(obj); + var high = strToInt(obj); + obj = lexer.getObj(); + if (isInt(obj) || isString(obj)) { + var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj; + cMap.mapRange(low, high, dstLow); + } else if (isCmd(obj, '[')) { + obj = lexer.getObj(); + var array = []; + while (!isCmd(obj, ']') && !isEOF(obj)) { + array.push(obj); + obj = lexer.getObj(); + } + cMap.mapRangeToArray(low, high, array); + } else { + break; + } + } + error('Invalid bf range.'); + } + + function parseCidChar(cMap, lexer) { + while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } + if (isCmd(obj, 'endcidchar')) { + return; + } + expectString(obj); + var src = strToInt(obj); + obj = lexer.getObj(); + expectInt(obj); + var dst = String.fromCharCode(obj); + cMap.mapOne(src, dst); + } + } + + function parseCidRange(cMap, lexer) { + while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } + if (isCmd(obj, 'endcidrange')) { + return; + } + expectString(obj); + var low = strToInt(obj); + obj = lexer.getObj(); + expectString(obj); + var high = strToInt(obj); + obj = lexer.getObj(); + expectInt(obj); + var dstLow = String.fromCharCode(obj); + cMap.mapRange(low, high, dstLow); + } + } + + function parseCodespaceRange(cMap, lexer) { + while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } + if (isCmd(obj, 'endcodespacerange')) { + return; + } + if (!isString(obj)) { + break; + } + var low = strToInt(obj); + obj = lexer.getObj(); + if (!isString(obj)) { + break; + } + var high = strToInt(obj); + cMap.addCodespaceRange(obj.length, low, high); + } + error('Invalid codespace range.'); + } + + function parseCmap(cMap, lexer) { + objLoop: while (true) { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } else if (isCmd(obj)) { + switch (obj.cmd) { + case 'endcMap': + break objLoop; + case 'usecMap': + // TODO + break; + case 'begincodespacerange': + parseCodespaceRange(cMap, lexer); + break; + case 'beginbfchar': + parseBfChar(cMap, lexer); + break; + case 'begincidchar': + parseCidChar(cMap, lexer); + break; + case 'beginbfrange': + parseBfRange(cMap, lexer); + break; + case 'begincidrange': + parseCidRange(cMap, lexer); + break; + } + } + } + } + return { + create: function (encoding) { + if (isName(encoding)) { + switch (encoding.name) { + case 'Identity-H': + return new IdentityCMap(false, 2); + case 'Identity-V': + return new IdentityCMap(true, 2); + default: + if (encoding.name in CMAP_CODESPACES) { + // XXX: Temporary hack so the correct amount of bytes are read in + // CMap.readCharCode. + var cMap = new CMap(); + cMap.codespaceRanges = CMAP_CODESPACES[encoding.name]; + return cMap; + } + return null; + } + } else if (isStream(encoding)) { + var cMap = new CMap(); + var lexer = new Lexer(encoding); + try { + parseCmap(cMap, lexer); + } catch (e) { + warn('Invalid CMap data. ' + e); + } + return cMap; + } + error('Encoding required.'); + } + }; +})(); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index dafdddb50..c11713323 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -20,7 +20,7 @@ isStream, isString, JpegStream, Lexer, Metrics, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts, TilingPattern, TODO, warn, Util, Promise, - RefSetCache, isRef, TextRenderingMode */ + RefSetCache, isRef, TextRenderingMode, CMapFactory */ 'use strict'; @@ -1010,119 +1010,24 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (!isIdentityMap) error('ToUnicode file cmap translation not implemented'); } else if (isStream(cmapObj)) { - var tokens = []; - var token = ''; - var beginArrayToken = {}; - - var cmap = cmapObj.getBytes(cmapObj.length); - for (var i = 0, ii = cmap.length; i < ii; i++) { - var octet = cmap[i]; - if (octet == 0x20 || octet == 0x0D || octet == 0x0A || - octet == 0x3C || octet == 0x5B || octet == 0x5D) { - switch (token) { - case 'usecmap': - error('usecmap is not implemented'); - break; - - case 'beginbfchar': - case 'beginbfrange': - case 'begincidchar': - case 'begincidrange': - token = ''; - tokens = []; - break; - - case 'endcidrange': - case 'endbfrange': - for (var j = 0, jj = tokens.length; j < jj; j += 3) { - var startRange = tokens[j]; - var endRange = tokens[j + 1]; - var code = tokens[j + 2]; - if (code == 0xFFFF) { - // CMap is broken, assuming code == startRange - code = startRange; - } - if (isArray(code)) { - var codeindex = 0; - while (startRange <= endRange) { - charToUnicode[startRange] = code[codeindex++]; - ++startRange; - } - } else { - while (startRange <= endRange) { - charToUnicode[startRange] = code++; - ++startRange; - } - } - } - break; - - case 'endcidchar': - case 'endbfchar': - for (var j = 0, jj = tokens.length; j < jj; j += 2) { - var index = tokens[j]; - var code = tokens[j + 1]; - charToUnicode[index] = code; - } - break; - - case '': - break; - - default: - if (token[0] >= '0' && token[0] <= '9') - token = parseInt(token, 10); // a number - tokens.push(token); - token = ''; + var cmap = CMapFactory.create(cmapObj).map; + // Convert UTF-16BE + for (var i in cmap) { + var token = cmap[i]; + var str = []; + for (var k = 0; k < token.length; k += 2) { + var w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); + if ((w1 & 0xF800) !== 0xD800) { // w1 < 0xD800 || w1 > 0xDFFF + str.push(w1); + continue; } - switch (octet) { - case 0x5B: - // begin list parsing - tokens.push(beginArrayToken); - break; - case 0x5D: - // collect array items - var items = [], item; - while (tokens.length && - (item = tokens.pop()) != beginArrayToken) - items.unshift(item); - tokens.push(items); - break; - } - } else if (octet == 0x3E) { - if (token.length) { - // Heuristic: guessing chars size by checking numbers sizes - // in the CMap entries. - if (token.length == 2 && properties.composite) - properties.wideChars = false; - - if (token.length <= 4) { - // parsing hex number - tokens.push(parseInt(token, 16)); - token = ''; - } else { - // parsing hex UTF-16BE numbers - var str = []; - for (var k = 0, kk = token.length; k < kk; k += 4) { - var b = parseInt(token.substr(k, 4), 16); - if (b <= 0x10) { - k += 4; - b = (b << 16) | parseInt(token.substr(k, 4), 16); - b -= 0x10000; - str.push(0xD800 | (b >> 10)); - str.push(0xDC00 | (b & 0x3FF)); - break; - } - str.push(b); - } - tokens.push(String.fromCharCode.apply(String, str)); - token = ''; - } - } - } else { - token += String.fromCharCode(octet); + k += 2; + var w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1); + str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000); } + cmap[i] = String.fromCharCode.apply(String, str); } + return cmap; } return charToUnicode; }, @@ -1409,6 +1314,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { properties.cidEncoding = cidEncoding.name; properties.vertical = /-V$/.test(cidEncoding.name); } + properties.cmap = CMapFactory.create(cidEncoding); } this.extractWidths(dict, xref, descriptor, properties); this.extractDataStructures(dict, baseDict, xref, properties); diff --git a/src/core/fonts.js b/src/core/fonts.js index 6cc4badd6..154c3f087 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -18,7 +18,7 @@ ExpertSubsetCharset, FileReaderSync, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, Stream, stringToBytes, TextDecoder, TODO, warn, Lexer, Util, - FONT_IDENTITY_MATRIX, FontRendererFactory, shadow */ + FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString */ 'use strict'; @@ -2182,6 +2182,7 @@ var Font = (function FontClosure() { this.composite = properties.composite; this.wideChars = properties.wideChars; this.hasEncoding = properties.hasEncoding; + this.cmap = properties.cmap; this.fontMatrix = properties.fontMatrix; if (properties.type == 'Type3') { @@ -3701,7 +3702,7 @@ var Font = (function FontClosure() { var dupFirstEntry = false; if (properties.type == 'CIDFontType2' && properties.toUnicode && - properties.toUnicode[0] > 0) { + properties.toUnicode[0] > '\u0000') { // oracle's defect (see 3427), duplicating first entry dupFirstEntry = true; numGlyphs++; @@ -4250,8 +4251,12 @@ var Font = (function FontClosure() { var unicode = toUnicode[i]; var fontCharCode = typeof unicode === 'object' ? unusedUnicode++ : unicode; - if (typeof unicode !== 'undefined') + if (typeof unicode !== 'undefined') { + if (isString(fontCharCode) && fontCharCode.length === 1) { + fontCharCode = fontCharCode.charCodeAt(0); + } result[i] = fontCharCode; + } } return result; }, @@ -4264,7 +4269,7 @@ var Font = (function FontClosure() { var isIdentityMap = toUnicode.length === 0; for (var i = firstChar, ii = lastChar; i <= ii; i++) { // TODO missing map the character according font's CMap - map[i] = isIdentityMap ? i : toUnicode[i]; + map[i] = isIdentityMap ? String.fromCharCode(i) : toUnicode[i]; } } else { for (var i = firstChar, ii = lastChar; i <= ii; i++) { @@ -4272,7 +4277,7 @@ var Font = (function FontClosure() { if (!glyph) glyph = properties.baseEncoding[i]; if (!!glyph && (glyph in GlyphsUnicode)) - map[i] = GlyphsUnicode[glyph]; + map[i] = String.fromCharCode(GlyphsUnicode[glyph]); } } this.toUnicode = map; @@ -4535,15 +4540,15 @@ var Font = (function FontClosure() { warn('Unsupported CMap: ' + cidEncoding); } } - if (!converter && this.wideChars) { + if (!converter && this.cmap) { + var i = 0; // composite fonts have multi-byte strings convert the string from // single-byte to multi-byte - // XXX assuming CIDFonts are two-byte - later need to extract the - // correct byte encoding according to the PDF spec - var length = chars.length - 1; // looping over two bytes at a time so - // loop should never end on the last byte - for (var i = 0; i < length; i++) { - var charcode = int16([chars.charCodeAt(i++), chars.charCodeAt(i)]); + while (i < chars.length) { + var c = this.cmap.readCharCode(chars, i); + var charcode = c[0]; + var length = c[1]; + i += length; var glyph = this.charToGlyph(charcode); glyphs.push(glyph); // placing null after each word break charcode (ASCII SPACE) diff --git a/src/worker_loader.js b/src/worker_loader.js index e59299e2b..736c6bf96 100644 --- a/src/worker_loader.js +++ b/src/worker_loader.js @@ -34,6 +34,7 @@ var files = [ 'core/cidmaps.js', 'core/crypto.js', 'core/evaluator.js', + 'core/cmap.js', 'core/fonts.js', 'core/font_renderer.js', 'core/glyphlist.js', diff --git a/test/pdfs/bug898853.pdf b/test/pdfs/bug898853.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ab3bbc8c56836f810ab593ac8fdcd985b79fecbc GIT binary patch literal 15861 zcmajG19TPcr6v7C?yH9ZY06bmbW4q$6w0ma1yMXTg)XAGbf)OXUivNeUGmD4vhb_B4n zeMl%k@$f(y+ZcUH{w4fxmA{fuw9>{#=K2D*ZUD^>6*>R|9UXv~4xkN1D`@L%;{>3G zqLnl^a{S=3uz%Y8;QW`>PtyNw^%I4I@rUxiV9+WUJK8!s7=8fzL@Z+a!TNhb4`BWD z`4{M?*-xi7P9K_%09N{cGyWC9U&;UN>(dYvt)jDm)88C?W`YiiR>;`V@Iy!6#_5y) ziJXD?U&v_%^c{`=x~CO!aCR~`HFl6vQgkw+mNGXr`!FSJV`yt+Zet3d6*u}oVeaHk zE%uRNTV)&b4~8*-f${$d>J#O^K}i|gm^zsOm{>nUaddJp*0+WdP=ccWOe#Q0=p%6Y zzh@>;WI#kf0zk4KzrsKgpJ(I%B?~BFrO&(dFU5aF{TKSb9sF~v|A|^i74Q+Optz8t zyQ7n_wYZIm?cba#7@L}Zm~aP>@qgTSV{#~3SqCFyhtHtM{vFf@QPJ7X&dS*O^Cr># zlllKip7yruU=jmrg9n9^VY#jg$EPpfb|CdPBzt!|vQ4FkX0BryRfEmEd%mQHf zpwrWRlo-oL4Qm6~J}&-B(9?e~J_WP^^b7!c#(#)RA4En*#!n(MfS%=C zGq%4p26g}=(_fQ}%>QGDkp;lW`VW!qllbA3iHQlo%E0iK$nbIew9WG2`jgJa#{QT7 z$^Vr7d;ANC`Gdgtm&W`F=o9&;68ndeHh`HGz|8jdn)y@zuh@scKf!$3{Y3g{@1HnW z833#wwmxXAA5fS9pD0*AlkzF@dBOHKTvm1f+lSW=F*e2z=BF;>2fokaC-H9_fAji5 zWd8?0`$yX7nEpu--AA_Q*#C;s|E<0cke~MHKGOA(#!nOf{S^PL>VMamh`E)Mu>*is z#7f`ESjhOZ?4fA?RoU$H^#4`bZduwYV>Y`i@a@l(PHWhxgbqxIkmw+wnUQer02v;? z*kV^dHO9Ch@ON)D!cp%?Oxf z9h#HC8^MmG=ePdN>kJ#XP@R{TE0?BUB@No4LqoqWO?@9-HhDs5d4E|p=7#iLXf~Yv z==`Sp_I9_Ho4aY-A&je?pW5)Fh+!@kF4w zeGEQ$Ip92^ib7mYIX;b;FNi=!sbbxH8nw~KpnmlPZ8;y``Gw_${`Ce}B)phnFxW7D z_psP7d)6d$vQDpaItRTan|q!st!@6ewKfTOu&cSTQ6-RdEZH()L?80d;uNJ)?eUHkbNRXN1IEK;N0x94_%h zKOS0jc9=R1W~7?l@4oag{QA~&Vtl@FN01Ht97yd@i4J+xrOxRORM;!M`RFSG@Vs|e zF21!kalBfrVx|P7&aIFjNQH@nn5A>sTe8#c!^5$&=3C)e)UHC-x7++aNpD~|Y~kP? zVY9k*L#1yf<05O%(0JSDjQjWsU52swx2g{quDNyhWVwvYr%y9&X1yciuP`}<>!ydY z99S4x;*PU&nRo2^m5<}oafQ4HzYy&Di@M@PpD2ILj*KpOt{fmeOV6rLrxIlcH_WQn z-cx0NZBXY}o}+^Kx?ysKc1nNocFD5r-HrS6fzrw3O7#-$nsd^02A8rvxkL1_u~yWk zN5fcNO3M=C7VS^(aDfi`V^NQrvUniBZ}Jq_WH|=iVNF};wC=!WUROjHzb}d}h;P+w ztXCp0%#-y6q00_n(pT6q!R|tw$ z{A0giZpd$AYGwS{h!j71kjmeVq;Dr?{Mk+aQBwQY1vCAJ(8ro$>iDsIGtjfL{o`Cl z-}+#H20CvqTLULrO{h=-qv7fXzpTn-Wwc0e7WSAyM}_ZOEMG{=XO zXTe%+w7zT9wp`n}_qP0{S>3b{z_w${zeJmtYGz4TsR2WojSVWj6A_58; z3o8j316fABE_Nds4;h05-Y>Ve**r6xMF@ECXuBoy-vTM z(6tj22=ez44SM5jpuQ7ukr7ed@gkJ{qMjL&qzZi@lv6lHpM8B7tDHRSLfcDnH|l=R z3*t&VA~wi}w3H`!0`%^!z>?0r5jzM>;AvnB!>Mu!xc%KRi0- zn+8D#)jSeNbX<;@zF48ob|OTe850Ey7nKO2qk+&Gm$#P|p|*ZxfngYR0zIN3!S;5T z1XdL@KG`)d8L?OXyW-b4eZr!E>AdE=_Zq~vMA$f`M0k7{MC6LZloTes8IdR&hA^=N zCCM?;{dZAznoX4Jt|nz+4GIbzTyax&VY#YMVY>&>zQCAj$-TE&`+iD zrui~E+U&2MmYA4N$go^Ydnw#>?{j9VvUF9D7h+6Y1G@!R0Qir;!xW( zmfBqXDAF8@r*>`dT98<#C{{MFaNr!IiLvf0u3NT3@Jy~U(QVw<@NCFg@$fmRTwK+} z(qOHB%PweX)Kaw}YKp8l*s%9q9XmZcTV3&7ifw|c)iujC>Kb^9c%^<0VaZ;xvQV6F zR9-D?L^&I5y0L7kT%S6QKPLF2{Rit$O-4_Knsm&5$bRY_&T6a~Z`gXD!aY zbJ&HQB;{P=jELEv70@%1GZpLjJ6zid!cOANY}eoG5BTl0yxF=#w%bkD?l0tTQ14+i z2b-6u*$vgvlj;XWSS6aJlE18fy`Jjr(M|XC;ScKv_i2POg$BYNVAF8EoL*KFF4S3z zYKx}EByxf|X)W4|4!z<)bEdd@&6H`AN|(T9=W}|Wa#c7Are0|qv}K(stnGRKYSBJv zdvsl0pIAEkxnnkDAG3qwjZMlf<046ow3#|G(%_g*u%IDDFI!(Y9T!t$??z@-v_&25 z`~U?$8%eBa{v8A>SD%dJqTVe%F{k#nxc8Ux5^safONJTvq%-c9)o!S`q)4zCPO8u{ zi41G>@X%ert0YmVd@(r5*C@mt8~o|5QcW@C7(QDdQjkNWy~k~b0kr4x3W(DqvC5L+ zIo*&PCs;OlUrY}jk)@m$vo2aEE|>h;-V0jHFW;~mZLRp9N^ivnA0hWA2mge-!1U(s zU3_f>D(@av9;b~-`+f(JzSAbf2_s}P8{VCmx^ORXP1$Is%f*#*-hA%*==zi; zzNWkRP7$-ar1g^bsx-1@9QT~9V^GMtYLQa-Ya&eT6rF18k~fQg4dj!A>K%h7ElLv#PkhO;KPERDv1@UUgzcTaAzn*9fqo|mM_@u@g}%7cB%1+$NlXE zOTN)@3kKKohI@5ttV}d>-X-vD<x0(c@ zr$I$v&QwI)8xZo=tc$?{YyCcrqP4K40j*uZUjmc=yaQ{~jD%Y`74aG8z{#5Y*p&JPRYai8?u77P`r2+Umw3C8*>7>!Vh^b z|3FvmlWP#>Rt(r3Q_aJIdLH7!H+Yg=w#7rl#Udmw;2nxlorTFPlZ_t-&!F2 zQ=azvNX_0-F;AaV3`Sj*yvCW%U~P|ugw-3x3DIA;wbwaX*l(J7arwpjdm#y}@C%$s z9dG@>&PlXESj3yt0~KpmZz0n~y&jnjOM@y@6QePw{r?E+Z9ehZO>WI4!LSdOgE807 zCmZY$Vu+D)pD|-PxQhdBdefIINuw=8Gsky772AWM(Z=12*;TsBZ2w{sR*=^$ zFD_>q#4h3<4CD$SPMW}eIDbi54=%fQEO1e`F@tn)jb(P|hT_qihJ3ahS)q!%)3jFn zqqmQ-kI8L8Z@XuI+u!sOjxJ}Drg3j0RrYY!a`ciW+w|>ad%}4Rk4t~m$}~wADU^cG z@;xj>>nDzJ{kK2I;tL9U)IC6teIKS)bjqKb{ z#VlseZ)CmW|H#d#8c4oGQ}u4DeNvg=lCAmfzY(voSzu$DTaG16FPacy^do`ndfW&5 ze0K%9X((r!;&Os|F8(B#YesQsq?r^qu^J{lxq&f4mor4e`&jAP9H!Dk;N@8h9UY2y z>*|F($FT)C7iTE^s)3-vvHCI6#XxIvu7w23{ z8iZ`mV#6B?oG)tmDXu7AVqM$+3CIy2$N#yqzmYIm?Y!%Z2K;P z?WGC)yoC)uvB9)P#g)ruuw}N^DjL_BXAy|{79^d;n!P26u^r%OYEHL_#i{3cUr~^= zHo<9s3XfoUxkwm(!2DG1?f*ns?XbBf~Z&>4uhVgm-5mM32yH zRzWZ#Z6#j{A%VX4*xJ2K=X{OZdVA@aK)U{;0i;fa;T0l z;q`s`f!Nd^5ko)Oj+RZbXm??z(*EWmtXg?k=Z4Okq{1yLbS=RjA7e*UKcEfpfc1C- z9m$0T|^eBXL#p%HoJ&c-jKNUy%N-c1W7GMCe%;?ll%)1)x zBXakJEASiPq#x6gVLwbgRK2*!Fmo?{!Ami~10hvTd4hQIUW3&Xqr5e}eY)GWLmVe- znb-1o@*Pm9vSk0l1LwPC%(GRl#GNX_+8ZtOC<*+AicE`T`iR9#SnW*I>Hi*r)t`#A zsYyw;h+x5x5H&M03_A#MN-j1fOi1l{6i<&7LLy>nL_PCk1}W*ZjG29C5pUX0TD%WU zCpmWa0(5|r;gl3pXGeCPb0r_1+zn=m3~hVs(i+MpR;hfF(DT8lpLD{-h^IM4<04zH z5#>||Vw=!k`=wWhZnb!(_BoT+j+G(p@{{Hh00jAhp&NzWkC7Yl#C_-#T75vhrz*=zX2P1%Z4g%yp%^+33 z4qZhp_YlqCdLCh30)<&J7|3c%JQNr28La|`GMm+j@n`UOY27MC(fV?r8as&$n%@MpE~7G1&cZ1)>@&_CmYKw`c9hsXt#t z+ao-PfMeGlB*onm8CQ$Qb*<2%x=O$Q$)jv8LX9O^<%CY zhyHP*FOqtMvkBbN+LdfgiyK8-7qKF5YbR`R5h#Z~G~cmo=mituh}}?0Gs?HD zP#VCGE2Xq01^SuxjGIAOmg!Ix*_!i4G-X{L56awTF_%X`Vghx;u>Og`Zi*3*FGv*K zZ9#O&X7W(wqo%O5RV`pY;cYI zn(41icCaMV{)@q0FM)oB@^29CbEnVZSV_u^rl!#xQFP82uAohts&KqXWIfrcdO_MH zQX4;cmD(mVH5{8Qu5e)7)`|S83+i_wZ@$&C$-ni3hO*$ZNp*4Du&TW`+z^g09==KzrdEo$ghscZ33eqa*3judXRuMsmOFJWB!FsR|zVAq1o)JkF4iQ$Y^FHC)`Ljlhr z#b#mFE?&EqS$0D*H9X!<=P{eJr!l4L1mhC}=DC!d&fz}CTKRH|c%lJ8AzN38L1?8GWnjq)WKSWodJI*Ko60ZO8dhLNn;VdNJxc4^7y#sU{w^H$ z?n0p$0oedBN&k{&FpHKJd-~t_8NxR|?t`yRvraGLPVFWRTNnejAA8M}^v}O5nlg)C zr&6&`{A8Du$yN7~a4e~7I+U*&x)Dcl$~`JFCE2Y&j+1NaKT?+-RX}F-t#xW_3aO^U=;qSUW$q ztH_7L%gH;(gghc<`3HB5X{^7-X{+ygXT!4PiZzSE>Ey!E7%QpKQmt|*u0nL}StYPS z)Br>X)afecU48|EZO~7;VL$Nfg3VJfcC*pzL=|JpzFc4cjGnqQGJ5web6htx0V&%tZG|a6H3&U2GYOO|iH; z)6Hy8-BcH`mW-N;`n836({`^mPJ1S(pNvO#IP<``;B23N5Sy__*;rV!l`37dc)?Zg z(gjuteIXFb^~Up(`Gpg)>bD7)WAk_P4#f7aYUEL^0~-di5$F5Au{|v{cjs}Cm;&2C z4O@KQUh>s=9AA%_ku`BJ5fD#9MSG%=nu}J(96P6W$yXgp%o9l!A}On@JBwSst|U%B zSv;g~Y2`3yvumdLf^c0Xo0%1KUQspOw(CEZV_U1Xhqod7lFX=f=&4J2wX@*+M@3nW zmBn??i$c%u4IJEA|1vRwC-!EFn25UO0`7#0Al&NC&Qx?y&d@Yc^H5e)kJq+KVU_B% zo2&_-U~3w(Q?^!fan^}nltXaqv1Lfc+ikM_VN!mv9j3Q&f1K4lO0J|f{bNLNp>uf- z3Z#L~8TXg-Yn(@1nQx8v6}jN^qrUEtP2UsyrBN4Hx;TStoVYuEvQ89dgkEO7XC^!_ zj(pZ_UTf@D7CK5y9#lYYk<+;JjJgll7vx$EiZ>m*6y&T9cC<1Z0yAAaTI(`+V*|^6 zn>!NvM7Hf;(VoaY@6r>-1dSn9^{$1{jU*89xQxTN4tZrmmSx}*i*Ya_;Wu4XDH>gG zC4Wu;yI?2ZqH94G)g#HlB%!FK+2hl8MYdg76P8U(3I;2Cgh0kwUlbkjNF26;Ni*~a zzOioorsKu>{kRmV68xQezp>xaG@VacWVn}M(ntpEPm@vNBce=kUG=7$)#j0@tJ!45 zT3C;70&jq9geY_A!47`c>)q~^Z5NEs?9YR^9_|K(lu$ju07zvF@pZoi!4#_6gmwWQ zKk^=bAazW{tLSuU(J88o1rby~h)-0kKNJTC&vlJncR70WX%ta2l6%)^jwXr4w_wV*ieS6)D*v{YczWQBBp+>D_NfNgPQTUl$%dGXiglW+`Wn*YmOUY z8nM6+t=r0+p2){1m5>8kS#maW}8RVYmxG8e| z@eu4Jr*IhoFD-Uiy-lP;>{FS_TPe?l%Pe=r#<>JFpSOhY*gwVU&+necbKUQatyw4gp{$TbB%lJo9?8Y#6-F{C=9 z&8rvgulE#g+}yPKeBES!#v>ymqKBq%SBjOH>%L%(JLZR>Vp3n7IE)~6(XhT)x*I~6 zfG&y|pz#J~H;45PZxe{^U6One(knRRyVDxWx3z8&W%BppLC)&ox%|Z}lyCUuCoCp) zRT4$Jpp|Aj18`OZ9@;7n6?l^aBtsvE{7~`?Ik>_-j#8kAI(LnkoZLhC0{x2mXvdJO z5vtj_QPAM^*e|kY{rX9NQ_0tu&|kTnsfx9lnIbj5Jx6msr|I8eBG>pn+ne<>dA@A}$-v?}h+iSGZo;m^!+Sc~*}a<=v~@mbfj zus8|~u6dm<*4@?x-SB#Cp+-=1NZusaRW&w+--_%eNvqi-@XB30nVY@k(yR^VI%Hia zLx-osr;q#x^AO^4@3Dcq4ZBbhrNaPp)=3c_f1NnAN+!($k78HGBao)`tieEWx*&Mz z*qLokhsVKW`pAw*unZnIN8xF83f~6!VLs3*=r9s;K1`6~uGm{BV9yFdTIf2k{y+F=bTs$7*>bp0ut}Hxef5Q}N*gaCR8=B-Z zIzZbxO-5yBbquCfGK`;e03BtFD{tz9Lmn<@%%;> z)~YHx!K8l?`yfA}L1# z`WQ(*cw?Um(kAe1`Rz!EFl2r&pIwoiBR@ElZ7|?+~P~FHzZ$FHhm~vZ6A!06G4fQ))Y3+4j&fDsoq4ja+S)XEbIq z?sG%TCcIzZjv59`L`;ZP6cB#b{CSrjrQ{3=DwgnM>`7*zR+6@cfTu%31R_x&&8a{mT)R<^v0*f7 z<;<515T9M}=^0dhgR6aoE2U9LFqz?PsVyuq> z+$l%E_Jbgd0nm_hH-I)0w^NAA_*o+&(Igv45I;j$?LVao4bzi3RBit08MNd^=5HH$@Dp4roTa_=GPw1k9 z+Se*gRxv!KMuN+)sy3WIoN6~@fI?FrnIXX;v|bebc#{dK$v1PAmNZY*zxWehZ#$&v zR{N#z(`YRW(Any~FDn*5ExsEQmRc^T^>QCA}FAOddI}*qS?Fp_I%Edky}hnFfsX06pycsaCcIevZA{y zdDaZ;elziMd@>v%VlL_E6x3x#p35WQZqQp07WVMD(j*0LDtF;JSS#1k`(g87GDLN3 zHdxw$)dw5NqtK zDMN{pK6G^Gg3F~J3|tvpu^}^al2>G9u!G}i{Nk46HJ~06*NJ`S5(D7$PxzSYWPzq& z4-;~v;akZ7SYoG9i)jHOSj-XW2XmL&rsAnxQd{06-iHMz9;aE@4KZJ}-}WuBU`@~8 zNgIc(1YON;u5+%MUfYY5(qfgo<1T7wO5u9Xd~iZY3QC9onVR8C2kQvW`h*l;IZYMV z;Z)iZM;#IEY6BM#9rP|>1*|&24c?c+mfB>txD%P&T+qbRFEM8y|A@?^SV20u3*<>o zpx?f*a~R%MA@ic2QG6w1tizi{nBp1o&+CV!vuUpU7Z86urHd zM{}6qa5l<=x0m5ec$RT3qXWkm!6ZFl^oaEB}RqfD;h@ito4MC=zZ5eAP-H zrf)~`;vHF5oud+<68zrZ8sR|7fk7>!gJ(cYwJ3W&9_5QrMsL@|o_r~9Z{Joc^tG_? zG<)d-i-F{lG0w=xpK3>T-o#hbw7l|G;gsg`+P1DL!_`6~BzLnEE_)QSm0rrEvnni5 zI-HV%?BR;*;(a^OBz7vzfaPu1#k0t!JG4K{)1Q4Oz3mm`Bv;8rwn-nJ>20a9p?o|H2>{;wRkbwoLY&g4!m~yrO8L5Liu0V3hD70i&JUZJL=-i~ zRHN+iFLF=z@F-@0lqgI%3NF&CDG|9z`%|pWEI=FfV-T5bi7hN%XQJmFuIw9Lg2e56 zb-Wzn2h;a$6z-@_G6{`Wdi^sG6TI2q%~kZ*7uu9cORdYc#=G6xn|Ko^53_U80lghoYHR1tW!@ zw`^@`w&-Au(y=e+pdW}SC2^z$6_oO#@J0XEUKbLV34^D@GQT1NNKvfcFmT)Cfp4Db zSGhk{WVgRd2#y)@vhra~l$GbA=58ss!Wf&FZXfUc(%|a^rNJg)%wj z2<H&?<*cRFFqf{)(G-xVg1gp zw$0WOX(6I~zLi7rZlK{hCBMz-zT5|rZ5x^kzIs>Do-ub$RnlOR{>snXd34rPXtYu8 zQ!@5gDR!!A-iL3}-+4l=A`X{Xj*B1cGcs23SWKbTn2Qd{*JbWn43%o`F|C%?{i#s} zCDlT;k|eld{K$^PRjJ^`)aC#llf&yIcyojnb@`B77LKKQjdj?*hjj60Uaz8>yB4XD z#Q>8H{VCuy&At|c6&&oR;LD##)VSJbP2KZQb2#3Bm!7c@Q&my0^WXNND2DboHkc^G z6OG9Z(<=Pp$-Spmje8Vb4R>EpY;VfPZ}JMSij}DCT5Zm?^sFB1$Z>?pXt&k8#S6Pq+5y6nUvulmJq^OgdND85mLoIqZCUDXOOWx%XS zVLtm*Xy7%u$d4wtU>*H={`l|t*nmZyu;!Y_VD)qc;g$ttJ_Y3EE%39!79uRM-ye*& zYiNTr_r`f_DAVR#(oJ+ayOp4DpOx~m&R^V2-a|2GCoZP9bFEMXS5UmWI_A^T6FcC_ zw(esxRuj?LiUyS&%Jzw-C*_iZ&)R^xv#dOG!yyYH%dnklZHVh@=v=Nv+FhwXWSx1n z`6|<;m$J>SN2+%TIG4#kDb<0GCrE6@WbHMPn5_hN0}9jBc$l1uVgoZ!>}s-?;O>=O zbyzs<<6R!h2P{)85L0a?)Fw#3DvYegR`L{*Q!ROW@#cCmlpq>eGy-d>AhI17`lLGV ziRHLlOlDDgGI8ElYPKb-QUiCE=9LD$ERrs1BN8s%Z$sGwShewRkSZY~*Auy8O?fXC zr??pecd+2?;ZU?@cMqsFJ+fkHxEm|Q`Z~MpTbobm@S-^#81oGb0+{saJ%18DXm5Is zw}nDUGLhb-P_s|cQgF9csyXgBA(+eVJ5z!U8PrNA>7oWA8;H`UM=tGzT9dbPLnIu@ zFU9xb@=r&?9%gl&QqUovQVyEma@!T^Db?M&+l?$bUXLOMr#*&Hs%o?b@+KD}w*}$} zZxrvaSu`qSg;z8?rfji~4h|ZKg>Pr54M`JDTq;?B90}R%T66TvA0%g9n$Eh#%(Goa zEbm3M3uJwVZ|dfRokCt!m^0&AokcZKUy<8mazZ_0U&F~&Z*!1-c`9@vzYRGX@l?GS z;?66jW)sj?xl8HDbZwF>U{o<7vC=jn#Cg?>RG8*V3QBNEBR%bAbnV{8VcIo*Ygiz# zk$)=@z+!9MyQoU>J*!>Vk6LNm-<+_58SY2VU}j{tgr%>71b0a_Z4 zm3x|Zns(|xPLMjrISf8ZI$SbhIx4sqeqD5p>|Xp5_fqx}{US@LTam6JuVSylu5wUS ztp=$Aqf%3~L5YpLjsk*GfN~V^9N{r=0cFL#v@*uA7!a z8T6S5`yt(^;A05p?UX+-?`}vpM98ukD3l17<6S&*A(Jt#A0Aj$-Lq36%4s${;mXox z-Y=9wTv`RecElCws(zPfa#U-@h`os7-w&rBFEELSZ6b%Ndw8$ME7#7a2b|uG7UZol zJ5V_s6&2`0aY(;nw}LF{b>Zx;SY^xa*3nxb@XhAPoY znc)-u0!vwajNLJr{irsTz;rsM6l1hmX5NZ}OA%;3C%(wl0~4lrANts2vj2QqKe4}0 zMss0Q6p>FAUIWd6o<#M&&+iUTaQ8Gqo|3`o=1gOVhIkVlm+v&QlSXogMZJYgqL+N>%7G|W#DSDpl9&AGT(ZG+)YDf;qGiS!<=)1YQED+ zV}XgM);VTeZ!A3X@v}02>tmUYw7L!y$70Tj!LiX_7PPI3z|sn`Gi7M7D0#(c6&daD zDYg`2di688jJ3%TOYXG%Rji${wobWiu$_oH3A3ErkRS_NqR^N^%~R6i=D7N2`>_ST zLd9mOIm1+EsSk~i2L@a(6yd^;iOs72!mJoF`(7Xlc`<%?FSxrChhPNZAe~eUPi9E- zh21OJq&dR_E<-(B$Hzg~gNq#41RW8ve_SCyh%_29Oo-!qp527Hbir&9*k_v4hfhmalJ@#g*Apunk!Zf#x^22smebHE01{5KU_d`bQlto|~u zV~OqHsS>KR+|}zg`9DC4tzxFPP}fEEEOBE4kBgcESL`27ful>}8l?Pu=jXSkCp-n} zPhQHBOJu9M^2%O|^4=`^zg{@;NV_`^r3X0tDfm7lGI6ij=HSoLm%Eazn&vE#=LvgOBvKZ56G^f?Zl4+`S;4(1!hQR7G7ci1Jve`r3 zBG=ler1*S=JnZ%WY)aIjxY=6!uF?~{FeYo!q$KAF^4ch5xwh8VdcB0ea@seskXSr< zpH6Ri&`I}WdM(!>F^nIl9xX2^1jj&iG>~NaLKPdhOMy2IQBpNxX3I|W)StBI>RY(> zM&eSD&9|qV9}O9|jOpq2l!CPrKr~jd zzM*rNF3h^GRLzxne|=n?vY14ng1XJ+(H~XX=m}Ra*IU$`hS^U{e^Mg=NP|7!P z$oUq^MoB;BNdw|I+@5addk95~dC2D&W?Ry96_DU`P!;?y?zz-|_rnVT8G;gP3dg7Nqt9p4^u zG^`kUkcid5=M6bS={D6NS73}Lv_m^Cc5OGvH@c;1uBT_$q*p+&;Lf42i+CbF0to?$VWftfZ>4X-4&HC{Eky*kU~(2eT8e4(V2UIR;3}+ds&E1 z0E`V(y`;>60#iNhMrvEl=J!Il$Hq4X)cOWniS7+bk1mTCQL0b(EkLE;*)73$)*IfQ zSfQmP1>at$i{=fYK0GX*cUdR}4SOjXJ&fqf)QoFM`) z>f8P6QrsVmwee+kWLClq3slHk@G1)w4npNqQDW0M*W8_7IwdMW!8Ncm@PvO{h=gFk zr(}(qu=>pSTw^bnkvyLLn9qB}9(}h9%m*0&8|_Ll+5L}Dr^r*vr~qA!0d^4sjSN!c zj1KRy-K9t$5$OEix&|=D^==P9WYi3jFavR74Mo6M=5z@S2DNI>{NadL)azvXZX+)x zV+bk1unUZk`;g(^CjE%`bwDv92*kDlg54PP<%>ETcVdjw%P|ODn7b7K;W~+zQT_A_rv0G6&5&_rXFozqBkAqn~ZocUyN8IB6K|iDWR9N2$hh z&-!iW;@J;C8p;h%XOXAgCyA~W=_IP+OqPhI0keL|2j(K<@KhK=r=0LNTq9pXE6T>eef)$$=2~|Z^C)R5r zrm%!-H@M3}PEvEG+u<8-1b5dUv|vwyK~7s*v99)cq^B&LG3KHMB_(mk;T_@hvRmj^ zf*FWB>O69`Si}ZoGz?mh6nBJ<_m}uk7ZXx)CZHuDLFE`>34+jcqFX$zf|b~^1N@AbM5~n8M#Tf*6@WXT!nZLL69U9xnN3MuR0$x`dvDt&TBodAK+8; zG$$44ga~vZf&Ro6SDvwHflf$6q`AvNxH0G#5n@cY%WZ`(Swv5HF3EU}_N5Ry0`2Y# zK2QaI1V%(bDP{(_=(f~$gcQ8+VjK=I)v8o<)2}avVGK+3r>h{+Czc9CzEm+Bj)!Cg zsj&F%sPO@iLJnY4Lu+tuU>9R{qDRmd3fzirUl!TG8Xcl?F8M2EeB{UF=*mRu28We0 z^91ycL1#4d5YPCG4K5$rL*1i{xrDt?Scj`(v^v(Mf_63d<;TM8gH);$$k0XT-l20s z9+LmBF(fw!V-u*4sUoP4vCn^f04&TOJ6V7U4AjTyAwA5$IP~-%qo02z05%_-j~T{) zbLc*&7yrXy`45MIj{Re{_w(~thxNZWAEQ72;e3o_{>K{K$86;P=zNSQ{}+elzx;g6 zJO776&-5`(^>05Pqsaf^Fnmmf{)a>VUmPa7|3cv8p#L$<H literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 9b5146877..6b245d729 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -869,6 +869,13 @@ "link": true, "type": "eq" }, + { "id": "bug898853.pdf", + "file": "pdfs/bug898853.pdf", + "md5": "37c37702bf98d33f9f74e2380c4d1a3f", + "rounds": 1, + "type": "eq", + "about": "Has a multi-byte char codes." + }, { "id": "issue1912", "file": "pdfs/issue1912.pdf", "md5": "15305b7c2cba971e7423de3f6ad38fef", diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js new file mode 100644 index 000000000..be0e41268 --- /dev/null +++ b/test/unit/cmap_spec.js @@ -0,0 +1,86 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ +/* globals expect, it, describe, StringStream, Lexer, CMapFactory */ + +'use strict'; + +describe('cmap', function() { + it('parses beginbfchar', function() { + var str = '2 beginbfchar\n' + + '<03> <00>\n' + + '<04> <01>\n' + + 'endbfchar\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.lookup(0x03)).toEqual(String.fromCharCode(0x00)); + expect(cmap.lookup(0x04)).toEqual(String.fromCharCode(0x01)); + expect(cmap.lookup(0x05)).toBeUndefined(); + }); + it('parses beginbfrange with range', function() { + var str = '1 beginbfrange\n' + + '<06> <0B> 0\n' + + 'endbfrange\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.lookup(0x05)).toBeUndefined(); + expect(cmap.lookup(0x06)).toEqual(String.fromCharCode(0x00)); + expect(cmap.lookup(0x0B)).toEqual(String.fromCharCode(0x05)); + expect(cmap.lookup(0x0C)).toBeUndefined(); + }); + it('parses beginbfrange with array', function() { + var str = '1 beginbfrange\n' + + '<0D> <12> [ 0 1 2 3 4 5 ]\n' + + 'endbfrange\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.lookup(0x0C)).toBeUndefined(); + expect(cmap.lookup(0x0D)).toEqual(0x00); + expect(cmap.lookup(0x12)).toEqual(0x05); + expect(cmap.lookup(0x13)).toBeUndefined(); + }); + it('parses begincidchar', function() { + var str = '1 begincidchar\n' + + '<14> 0\n' + + 'endcidchar\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.lookup(0x14)).toEqual(String.fromCharCode(0x00)); + expect(cmap.lookup(0x15)).toBeUndefined(); + }); + it('parses begincidrange', function() { + var str = '1 begincidrange\n' + + '<0016> <001B> 0\n' + + 'endcidrange\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.lookup(0x15)).toBeUndefined(); + expect(cmap.lookup(0x16)).toEqual(String.fromCharCode(0x00)); + expect(cmap.lookup(0x1B)).toEqual(String.fromCharCode(0x05)); + expect(cmap.lookup(0x1C)).toBeUndefined(); + }); + it('decodes codespace ranges', function() { + var str = '1 begincodespacerange\n' + + '<01> <02>\n' + + '<00000003> <00000004>\n' + + 'endcodespacerange\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + var c = cmap.readCharCode(String.fromCharCode(1), 0); + expect(c[0]).toEqual(1); + expect(c[1]).toEqual(1); + c = cmap.readCharCode(String.fromCharCode(0, 0, 0, 3), 0); + expect(c[0]).toEqual(3); + expect(c[1]).toEqual(4); + }); + it('decodes 4 byte codespace ranges', function() { + var str = '1 begincodespacerange\n' + + '<8EA1A1A1> <8EA1FEFE>\n' + + 'endcodespacerange\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + var c = cmap.readCharCode(String.fromCharCode(0x8E, 0xA1, 0xA1, 0xA1), 0); + expect(c[0]).toEqual(0x8EA1A1A1); + expect(c[1]).toEqual(4); + }); +}); + diff --git a/test/unit/unit_test.html b/test/unit/unit_test.html index 225e873fa..df640b66c 100644 --- a/test/unit/unit_test.html +++ b/test/unit/unit_test.html @@ -26,6 +26,7 @@ + @@ -49,6 +50,7 @@ +