2013-09-26 02:32:04 +09:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
'use strict';
|
|
|
|
|
2015-11-22 01:32:47 +09:00
|
|
|
(function (root, factory) {
|
|
|
|
if (typeof define === 'function' && define.amd) {
|
|
|
|
define('pdfjs/core/cmap', ['exports', 'pdfjs/shared/util',
|
|
|
|
'pdfjs/core/primitives', 'pdfjs/core/stream', 'pdfjs/core/parser'],
|
|
|
|
factory);
|
|
|
|
} else if (typeof exports !== 'undefined') {
|
|
|
|
factory(exports, require('../shared/util.js'), require('./primitives.js'),
|
|
|
|
require('./stream.js'), require('./parser.js'));
|
|
|
|
} else {
|
|
|
|
factory((root.pdfjsCoreCMap = {}), root.pdfjsSharedUtil,
|
|
|
|
root.pdfjsCorePrimitives, root.pdfjsCoreStream, root.pdfjsCoreParser);
|
|
|
|
}
|
|
|
|
}(this, function (exports, sharedUtil, corePrimitives, coreStream, coreParser) {
|
|
|
|
|
|
|
|
var Util = sharedUtil.Util;
|
|
|
|
var assert = sharedUtil.assert;
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
var warn = sharedUtil.warn;
|
2015-11-22 01:32:47 +09:00
|
|
|
var error = sharedUtil.error;
|
|
|
|
var isInt = sharedUtil.isInt;
|
|
|
|
var isString = sharedUtil.isString;
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
var MissingDataException = sharedUtil.MissingDataException;
|
2017-02-12 23:54:41 +09:00
|
|
|
var CMapCompressionType = sharedUtil.CMapCompressionType;
|
2017-01-27 21:34:37 +09:00
|
|
|
var isEOF = corePrimitives.isEOF;
|
2015-11-22 01:32:47 +09:00
|
|
|
var isName = corePrimitives.isName;
|
|
|
|
var isCmd = corePrimitives.isCmd;
|
|
|
|
var isStream = corePrimitives.isStream;
|
2017-02-12 23:54:41 +09:00
|
|
|
var Stream = coreStream.Stream;
|
2015-11-22 01:32:47 +09:00
|
|
|
var Lexer = coreParser.Lexer;
|
|
|
|
|
2014-02-12 03:27:09 +09:00
|
|
|
var BUILT_IN_CMAPS = [
|
|
|
|
// << Start unicode maps.
|
|
|
|
'Adobe-GB1-UCS2',
|
|
|
|
'Adobe-CNS1-UCS2',
|
|
|
|
'Adobe-Japan1-UCS2',
|
|
|
|
'Adobe-Korea1-UCS2',
|
|
|
|
// >> End unicode maps.
|
|
|
|
'78-EUC-H',
|
|
|
|
'78-EUC-V',
|
|
|
|
'78-H',
|
|
|
|
'78-RKSJ-H',
|
|
|
|
'78-RKSJ-V',
|
|
|
|
'78-V',
|
|
|
|
'78ms-RKSJ-H',
|
|
|
|
'78ms-RKSJ-V',
|
|
|
|
'83pv-RKSJ-H',
|
|
|
|
'90ms-RKSJ-H',
|
|
|
|
'90ms-RKSJ-V',
|
|
|
|
'90msp-RKSJ-H',
|
|
|
|
'90msp-RKSJ-V',
|
|
|
|
'90pv-RKSJ-H',
|
|
|
|
'90pv-RKSJ-V',
|
|
|
|
'Add-H',
|
|
|
|
'Add-RKSJ-H',
|
|
|
|
'Add-RKSJ-V',
|
|
|
|
'Add-V',
|
|
|
|
'Adobe-CNS1-0',
|
|
|
|
'Adobe-CNS1-1',
|
|
|
|
'Adobe-CNS1-2',
|
|
|
|
'Adobe-CNS1-3',
|
|
|
|
'Adobe-CNS1-4',
|
|
|
|
'Adobe-CNS1-5',
|
|
|
|
'Adobe-CNS1-6',
|
|
|
|
'Adobe-GB1-0',
|
|
|
|
'Adobe-GB1-1',
|
|
|
|
'Adobe-GB1-2',
|
|
|
|
'Adobe-GB1-3',
|
|
|
|
'Adobe-GB1-4',
|
|
|
|
'Adobe-GB1-5',
|
|
|
|
'Adobe-Japan1-0',
|
|
|
|
'Adobe-Japan1-1',
|
|
|
|
'Adobe-Japan1-2',
|
|
|
|
'Adobe-Japan1-3',
|
|
|
|
'Adobe-Japan1-4',
|
|
|
|
'Adobe-Japan1-5',
|
|
|
|
'Adobe-Japan1-6',
|
|
|
|
'Adobe-Korea1-0',
|
|
|
|
'Adobe-Korea1-1',
|
|
|
|
'Adobe-Korea1-2',
|
|
|
|
'B5-H',
|
|
|
|
'B5-V',
|
|
|
|
'B5pc-H',
|
|
|
|
'B5pc-V',
|
|
|
|
'CNS-EUC-H',
|
|
|
|
'CNS-EUC-V',
|
|
|
|
'CNS1-H',
|
|
|
|
'CNS1-V',
|
|
|
|
'CNS2-H',
|
|
|
|
'CNS2-V',
|
|
|
|
'ETHK-B5-H',
|
|
|
|
'ETHK-B5-V',
|
|
|
|
'ETen-B5-H',
|
|
|
|
'ETen-B5-V',
|
|
|
|
'ETenms-B5-H',
|
|
|
|
'ETenms-B5-V',
|
|
|
|
'EUC-H',
|
|
|
|
'EUC-V',
|
|
|
|
'Ext-H',
|
|
|
|
'Ext-RKSJ-H',
|
|
|
|
'Ext-RKSJ-V',
|
|
|
|
'Ext-V',
|
|
|
|
'GB-EUC-H',
|
|
|
|
'GB-EUC-V',
|
|
|
|
'GB-H',
|
|
|
|
'GB-V',
|
|
|
|
'GBK-EUC-H',
|
|
|
|
'GBK-EUC-V',
|
|
|
|
'GBK2K-H',
|
|
|
|
'GBK2K-V',
|
|
|
|
'GBKp-EUC-H',
|
|
|
|
'GBKp-EUC-V',
|
|
|
|
'GBT-EUC-H',
|
|
|
|
'GBT-EUC-V',
|
|
|
|
'GBT-H',
|
|
|
|
'GBT-V',
|
|
|
|
'GBTpc-EUC-H',
|
|
|
|
'GBTpc-EUC-V',
|
|
|
|
'GBpc-EUC-H',
|
|
|
|
'GBpc-EUC-V',
|
|
|
|
'H',
|
|
|
|
'HKdla-B5-H',
|
|
|
|
'HKdla-B5-V',
|
|
|
|
'HKdlb-B5-H',
|
|
|
|
'HKdlb-B5-V',
|
|
|
|
'HKgccs-B5-H',
|
|
|
|
'HKgccs-B5-V',
|
|
|
|
'HKm314-B5-H',
|
|
|
|
'HKm314-B5-V',
|
|
|
|
'HKm471-B5-H',
|
|
|
|
'HKm471-B5-V',
|
|
|
|
'HKscs-B5-H',
|
|
|
|
'HKscs-B5-V',
|
|
|
|
'Hankaku',
|
|
|
|
'Hiragana',
|
|
|
|
'KSC-EUC-H',
|
|
|
|
'KSC-EUC-V',
|
|
|
|
'KSC-H',
|
|
|
|
'KSC-Johab-H',
|
|
|
|
'KSC-Johab-V',
|
|
|
|
'KSC-V',
|
|
|
|
'KSCms-UHC-H',
|
|
|
|
'KSCms-UHC-HW-H',
|
|
|
|
'KSCms-UHC-HW-V',
|
|
|
|
'KSCms-UHC-V',
|
|
|
|
'KSCpc-EUC-H',
|
|
|
|
'KSCpc-EUC-V',
|
|
|
|
'Katakana',
|
|
|
|
'NWP-H',
|
|
|
|
'NWP-V',
|
|
|
|
'RKSJ-H',
|
|
|
|
'RKSJ-V',
|
|
|
|
'Roman',
|
|
|
|
'UniCNS-UCS2-H',
|
|
|
|
'UniCNS-UCS2-V',
|
|
|
|
'UniCNS-UTF16-H',
|
|
|
|
'UniCNS-UTF16-V',
|
|
|
|
'UniCNS-UTF32-H',
|
|
|
|
'UniCNS-UTF32-V',
|
|
|
|
'UniCNS-UTF8-H',
|
|
|
|
'UniCNS-UTF8-V',
|
|
|
|
'UniGB-UCS2-H',
|
|
|
|
'UniGB-UCS2-V',
|
|
|
|
'UniGB-UTF16-H',
|
|
|
|
'UniGB-UTF16-V',
|
|
|
|
'UniGB-UTF32-H',
|
|
|
|
'UniGB-UTF32-V',
|
|
|
|
'UniGB-UTF8-H',
|
|
|
|
'UniGB-UTF8-V',
|
|
|
|
'UniJIS-UCS2-H',
|
|
|
|
'UniJIS-UCS2-HW-H',
|
|
|
|
'UniJIS-UCS2-HW-V',
|
|
|
|
'UniJIS-UCS2-V',
|
|
|
|
'UniJIS-UTF16-H',
|
|
|
|
'UniJIS-UTF16-V',
|
|
|
|
'UniJIS-UTF32-H',
|
|
|
|
'UniJIS-UTF32-V',
|
|
|
|
'UniJIS-UTF8-H',
|
|
|
|
'UniJIS-UTF8-V',
|
|
|
|
'UniJIS2004-UTF16-H',
|
|
|
|
'UniJIS2004-UTF16-V',
|
|
|
|
'UniJIS2004-UTF32-H',
|
|
|
|
'UniJIS2004-UTF32-V',
|
|
|
|
'UniJIS2004-UTF8-H',
|
|
|
|
'UniJIS2004-UTF8-V',
|
|
|
|
'UniJISPro-UCS2-HW-V',
|
|
|
|
'UniJISPro-UCS2-V',
|
|
|
|
'UniJISPro-UTF8-V',
|
|
|
|
'UniJISX0213-UTF32-H',
|
|
|
|
'UniJISX0213-UTF32-V',
|
|
|
|
'UniJISX02132004-UTF32-H',
|
|
|
|
'UniJISX02132004-UTF32-V',
|
|
|
|
'UniKS-UCS2-H',
|
|
|
|
'UniKS-UCS2-V',
|
|
|
|
'UniKS-UTF16-H',
|
|
|
|
'UniKS-UTF16-V',
|
|
|
|
'UniKS-UTF32-H',
|
|
|
|
'UniKS-UTF32-V',
|
|
|
|
'UniKS-UTF8-H',
|
|
|
|
'UniKS-UTF8-V',
|
|
|
|
'V',
|
|
|
|
'WP-Symbol'];
|
2013-09-26 02:32:04 +09:00
|
|
|
|
|
|
|
// CMap, not to be confused with TrueType's cmap.
|
|
|
|
var CMap = (function CMapClosure() {
|
2014-02-12 03:27:09 +09:00
|
|
|
function CMap(builtInCMap) {
|
2013-09-26 02:32:04 +09:00
|
|
|
// Codespace ranges are stored as follows:
|
|
|
|
// [[1BytePairs], [2BytePairs], [3BytePairs], [4BytePairs]]
|
|
|
|
// where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
|
|
|
|
this.codespaceRanges = [[], [], [], []];
|
2014-02-12 03:27:09 +09:00
|
|
|
this.numCodespaceRanges = 0;
|
2014-08-01 15:46:37 +09:00
|
|
|
// Map entries have one of two forms.
|
|
|
|
// - cid chars are 16-bit unsigned integers, stored as integers.
|
|
|
|
// - bf chars are variable-length byte sequences, stored as strings, with
|
|
|
|
// one byte per character.
|
2014-07-30 12:30:16 +09:00
|
|
|
this._map = [];
|
2015-03-06 23:01:26 +09:00
|
|
|
this.name = '';
|
2013-09-26 02:32:04 +09:00
|
|
|
this.vertical = false;
|
2014-02-12 03:27:09 +09:00
|
|
|
this.useCMap = null;
|
|
|
|
this.builtInCMap = builtInCMap;
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
CMap.prototype = {
|
2017-04-27 19:58:44 +09:00
|
|
|
addCodespaceRange(n, low, high) {
|
2013-09-26 02:32:04 +09:00
|
|
|
this.codespaceRanges[n - 1].push(low, high);
|
2014-02-12 03:27:09 +09:00
|
|
|
this.numCodespaceRanges++;
|
2013-09-26 02:32:04 +09:00
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapCidRange(low, high, dstLow) {
|
2014-08-01 15:46:37 +09:00
|
|
|
while (low <= high) {
|
|
|
|
this._map[low++] = dstLow++;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapBfRange(low, high, dstLow) {
|
2013-09-26 02:32:04 +09:00
|
|
|
var lastByte = dstLow.length - 1;
|
|
|
|
while (low <= high) {
|
2014-08-01 15:46:37 +09:00
|
|
|
this._map[low++] = dstLow;
|
2013-09-26 02:32:04 +09:00
|
|
|
// Only the last byte has to be incremented.
|
|
|
|
dstLow = dstLow.substr(0, lastByte) +
|
|
|
|
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapBfRangeToArray(low, high, array) {
|
2014-06-02 21:29:54 +09:00
|
|
|
var i = 0, ii = array.length;
|
|
|
|
while (low <= high && i < ii) {
|
2014-07-30 12:30:16 +09:00
|
|
|
this._map[low] = array[i++];
|
2013-09-26 02:32:04 +09:00
|
|
|
++low;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2014-08-01 15:46:37 +09:00
|
|
|
// This is used for both bf and cid chars.
|
2017-04-27 19:58:44 +09:00
|
|
|
mapOne(src, dst) {
|
2014-07-30 12:30:16 +09:00
|
|
|
this._map[src] = dst;
|
2013-09-26 02:32:04 +09:00
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
lookup(code) {
|
2014-07-30 12:30:16 +09:00
|
|
|
return this._map[code];
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
contains(code) {
|
2014-07-30 12:30:16 +09:00
|
|
|
return this._map[code] !== undefined;
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
forEach(callback) {
|
2014-07-30 12:33:43 +09:00
|
|
|
// Most maps have fewer than 65536 entries, and for those we use normal
|
|
|
|
// array iteration. But really sparse tables are possible -- e.g. with
|
|
|
|
// indices in the *billions*. For such tables we use for..in, which isn't
|
|
|
|
// ideal because it stringifies the indices for all present elements, but
|
|
|
|
// it does avoid iterating over every undefined entry.
|
2017-05-25 00:36:39 +09:00
|
|
|
let map = this._map;
|
|
|
|
let length = map.length;
|
2014-07-30 12:33:43 +09:00
|
|
|
if (length <= 0x10000) {
|
2017-05-25 00:36:39 +09:00
|
|
|
for (let i = 0; i < length; i++) {
|
2014-07-30 12:33:43 +09:00
|
|
|
if (map[i] !== undefined) {
|
|
|
|
callback(i, map[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2017-05-25 00:36:39 +09:00
|
|
|
for (let i in map) {
|
2014-07-30 12:33:43 +09:00
|
|
|
callback(i, map[i]);
|
|
|
|
}
|
2014-07-30 12:30:16 +09:00
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
charCodeOf(value) {
|
2017-05-25 00:36:39 +09:00
|
|
|
// `Array.prototype.indexOf` is *extremely* inefficient for arrays which
|
|
|
|
// are both very sparse and very large (see issue8372.pdf).
|
|
|
|
let map = this._map;
|
|
|
|
if (map.length <= 0x10000) {
|
|
|
|
return map.indexOf(value);
|
|
|
|
}
|
|
|
|
for (let charCode in map) {
|
|
|
|
if (map[charCode] === value) {
|
|
|
|
return (charCode | 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
2014-07-30 12:30:16 +09:00
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
getMap() {
|
2014-07-30 12:30:16 +09:00
|
|
|
return this._map;
|
2013-09-26 02:32:04 +09:00
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
readCharCode(str, offset, out) {
|
2013-09-26 02:32:04 +09:00
|
|
|
var c = 0;
|
|
|
|
var codespaceRanges = this.codespaceRanges;
|
|
|
|
var codespaceRangesLen = this.codespaceRanges.length;
|
|
|
|
// 9.7.6.2 CMap Mapping
|
|
|
|
// The code length is at most 4.
|
|
|
|
for (var n = 0; n < codespaceRangesLen; n++) {
|
|
|
|
c = ((c << 8) | str.charCodeAt(offset + n)) >>> 0;
|
|
|
|
// Check each codespace range to see if it falls within.
|
|
|
|
var codespaceRange = codespaceRanges[n];
|
|
|
|
for (var k = 0, kk = codespaceRange.length; k < kk;) {
|
|
|
|
var low = codespaceRange[k++];
|
|
|
|
var high = codespaceRange[k++];
|
|
|
|
if (c >= low && c <= high) {
|
2014-08-11 14:27:04 +09:00
|
|
|
out.charcode = c;
|
|
|
|
out.length = n + 1;
|
|
|
|
return;
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-08-11 14:27:04 +09:00
|
|
|
out.charcode = 0;
|
|
|
|
out.length = 1;
|
2015-03-06 23:01:26 +09:00
|
|
|
},
|
|
|
|
|
2015-09-19 23:54:19 +09:00
|
|
|
get length() {
|
|
|
|
return this._map.length;
|
|
|
|
},
|
|
|
|
|
2015-03-06 23:01:26 +09:00
|
|
|
get isIdentityCMap() {
|
|
|
|
if (!(this.name === 'Identity-H' || this.name === 'Identity-V')) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (this._map.length !== 0x10000) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
for (var i = 0; i < 0x10000; i++) {
|
|
|
|
if (this._map[i] !== i) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
return CMap;
|
|
|
|
})();
|
|
|
|
|
2014-08-05 15:38:43 +09:00
|
|
|
// A special case of CMap, where the _map array implicitly has a length of
|
2015-03-06 23:01:26 +09:00
|
|
|
// 65536 and each element is equal to its index.
|
2013-09-26 02:32:04 +09:00
|
|
|
var IdentityCMap = (function IdentityCMapClosure() {
|
|
|
|
function IdentityCMap(vertical, n) {
|
|
|
|
CMap.call(this);
|
|
|
|
this.vertical = vertical;
|
|
|
|
this.addCodespaceRange(n, 0, 0xffff);
|
|
|
|
}
|
|
|
|
Util.inherit(IdentityCMap, CMap, {});
|
|
|
|
|
2014-08-05 15:38:43 +09:00
|
|
|
IdentityCMap.prototype = {
|
|
|
|
addCodespaceRange: CMap.prototype.addCodespaceRange,
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapCidRange(low, high, dstLow) {
|
2014-08-05 15:38:43 +09:00
|
|
|
error('should not call mapCidRange');
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapBfRange(low, high, dstLow) {
|
2014-08-05 15:38:43 +09:00
|
|
|
error('should not call mapBfRange');
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapBfRangeToArray(low, high, array) {
|
2014-08-05 15:38:43 +09:00
|
|
|
error('should not call mapBfRangeToArray');
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
mapOne(src, dst) {
|
2014-08-05 15:38:43 +09:00
|
|
|
error('should not call mapCidOne');
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
lookup(code) {
|
2014-08-05 15:38:43 +09:00
|
|
|
return (isInt(code) && code <= 0xffff) ? code : undefined;
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
contains(code) {
|
2014-08-05 15:38:43 +09:00
|
|
|
return isInt(code) && code <= 0xffff;
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
forEach(callback) {
|
2014-08-05 15:38:43 +09:00
|
|
|
for (var i = 0; i <= 0xffff; i++) {
|
|
|
|
callback(i, i);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
charCodeOf(value) {
|
2014-08-05 15:38:43 +09:00
|
|
|
return (isInt(value) && value <= 0xffff) ? value : -1;
|
|
|
|
},
|
|
|
|
|
2017-04-27 19:58:44 +09:00
|
|
|
getMap() {
|
2014-08-05 15:38:43 +09:00
|
|
|
// Sometimes identity maps must be instantiated, but it's rare.
|
|
|
|
var map = new Array(0x10000);
|
|
|
|
for (var i = 0; i <= 0xffff; i++) {
|
|
|
|
map[i] = i;
|
|
|
|
}
|
|
|
|
return map;
|
|
|
|
},
|
|
|
|
|
2015-03-06 23:01:26 +09:00
|
|
|
readCharCode: CMap.prototype.readCharCode,
|
|
|
|
|
2015-09-19 23:54:19 +09:00
|
|
|
get length() {
|
|
|
|
return 0x10000;
|
|
|
|
},
|
|
|
|
|
2015-03-06 23:01:26 +09:00
|
|
|
get isIdentityCMap() {
|
|
|
|
error('should not access .isIdentityCMap');
|
|
|
|
}
|
2014-08-05 15:38:43 +09:00
|
|
|
};
|
|
|
|
|
2013-09-26 02:32:04 +09:00
|
|
|
return IdentityCMap;
|
|
|
|
})();
|
|
|
|
|
2014-03-15 03:22:02 +09:00
|
|
|
var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|
|
|
function hexToInt(a, size) {
|
|
|
|
var n = 0;
|
|
|
|
for (var i = 0; i <= size; i++) {
|
|
|
|
n = (n << 8) | a[i];
|
|
|
|
}
|
|
|
|
return n >>> 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
function hexToStr(a, size) {
|
2014-07-23 14:09:54 +09:00
|
|
|
// This code is hot. Special-case some common values to avoid creating an
|
|
|
|
// object with subarray().
|
2014-08-06 11:55:59 +09:00
|
|
|
if (size === 1) {
|
2014-07-23 14:09:54 +09:00
|
|
|
return String.fromCharCode(a[0], a[1]);
|
|
|
|
}
|
2014-08-06 11:55:59 +09:00
|
|
|
if (size === 3) {
|
2014-07-23 14:09:54 +09:00
|
|
|
return String.fromCharCode(a[0], a[1], a[2], a[3]);
|
|
|
|
}
|
2014-03-15 03:22:02 +09:00
|
|
|
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
function addHex(a, b, size) {
|
|
|
|
var c = 0;
|
|
|
|
for (var i = size; i >= 0; i--) {
|
|
|
|
c += a[i] + b[i];
|
|
|
|
a[i] = c & 255;
|
|
|
|
c >>= 8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function incHex(a, size) {
|
|
|
|
var c = 1;
|
|
|
|
for (var i = size; i >= 0 && c > 0; i--) {
|
|
|
|
c += a[i];
|
|
|
|
a[i] = c & 255;
|
|
|
|
c >>= 8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var MAX_NUM_SIZE = 16;
|
|
|
|
var MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
|
|
|
|
|
|
|
|
function BinaryCMapStream(data) {
|
|
|
|
this.buffer = data;
|
|
|
|
this.pos = 0;
|
|
|
|
this.end = data.length;
|
|
|
|
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
BinaryCMapStream.prototype = {
|
2017-04-27 19:58:44 +09:00
|
|
|
readByte() {
|
2014-03-15 03:22:02 +09:00
|
|
|
if (this.pos >= this.end) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return this.buffer[this.pos++];
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readNumber() {
|
2014-03-15 03:22:02 +09:00
|
|
|
var n = 0;
|
|
|
|
var last;
|
|
|
|
do {
|
|
|
|
var b = this.readByte();
|
|
|
|
if (b < 0) {
|
|
|
|
error('unexpected EOF in bcmap');
|
|
|
|
}
|
|
|
|
last = !(b & 0x80);
|
|
|
|
n = (n << 7) | (b & 0x7F);
|
|
|
|
} while (!last);
|
|
|
|
return n;
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readSigned() {
|
2014-03-15 03:22:02 +09:00
|
|
|
var n = this.readNumber();
|
|
|
|
return (n & 1) ? ~(n >>> 1) : n >>> 1;
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readHex(num, size) {
|
2014-03-15 03:22:02 +09:00
|
|
|
num.set(this.buffer.subarray(this.pos,
|
|
|
|
this.pos + size + 1));
|
|
|
|
this.pos += size + 1;
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readHexNumber(num, size) {
|
2014-03-15 03:22:02 +09:00
|
|
|
var last;
|
|
|
|
var stack = this.tmpBuf, sp = 0;
|
|
|
|
do {
|
|
|
|
var b = this.readByte();
|
|
|
|
if (b < 0) {
|
|
|
|
error('unexpected EOF in bcmap');
|
|
|
|
}
|
|
|
|
last = !(b & 0x80);
|
|
|
|
stack[sp++] = b & 0x7F;
|
|
|
|
} while (!last);
|
|
|
|
var i = size, buffer = 0, bufferSize = 0;
|
|
|
|
while (i >= 0) {
|
|
|
|
while (bufferSize < 8 && stack.length > 0) {
|
|
|
|
buffer = (stack[--sp] << bufferSize) | buffer;
|
|
|
|
bufferSize += 7;
|
|
|
|
}
|
|
|
|
num[i] = buffer & 255;
|
|
|
|
i--;
|
|
|
|
buffer >>= 8;
|
|
|
|
bufferSize -= 8;
|
|
|
|
}
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readHexSigned(num, size) {
|
2014-03-15 03:22:02 +09:00
|
|
|
this.readHexNumber(num, size);
|
|
|
|
var sign = num[size] & 1 ? 255 : 0;
|
|
|
|
var c = 0;
|
|
|
|
for (var i = 0; i <= size; i++) {
|
|
|
|
c = ((c & 1) << 8) | num[i];
|
|
|
|
num[i] = (c >> 1) ^ sign;
|
|
|
|
}
|
|
|
|
},
|
2017-04-27 19:58:44 +09:00
|
|
|
readString() {
|
2014-03-15 03:22:02 +09:00
|
|
|
var len = this.readNumber();
|
|
|
|
var s = '';
|
|
|
|
for (var i = 0; i < len; i++) {
|
|
|
|
s += String.fromCharCode(this.readNumber());
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
function processBinaryCMap(data, cMap, extend) {
|
|
|
|
return new Promise(function (resolve, reject) {
|
2016-02-29 01:20:29 +09:00
|
|
|
var stream = new BinaryCMapStream(data);
|
|
|
|
var header = stream.readByte();
|
|
|
|
cMap.vertical = !!(header & 1);
|
|
|
|
|
|
|
|
var useCMap = null;
|
|
|
|
var start = new Uint8Array(MAX_NUM_SIZE);
|
|
|
|
var end = new Uint8Array(MAX_NUM_SIZE);
|
|
|
|
var char = new Uint8Array(MAX_NUM_SIZE);
|
|
|
|
var charCode = new Uint8Array(MAX_NUM_SIZE);
|
|
|
|
var tmp = new Uint8Array(MAX_NUM_SIZE);
|
|
|
|
var code;
|
|
|
|
|
|
|
|
var b;
|
|
|
|
while ((b = stream.readByte()) >= 0) {
|
|
|
|
var type = b >> 5;
|
|
|
|
if (type === 7) { // metadata, e.g. comment or usecmap
|
|
|
|
switch (b & 0x1F) {
|
|
|
|
case 0:
|
|
|
|
stream.readString(); // skipping comment
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
useCMap = stream.readString();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
continue;
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
var sequence = !!(b & 0x10);
|
|
|
|
var dataSize = b & 15;
|
2014-03-15 03:22:02 +09:00
|
|
|
|
2016-02-29 01:20:29 +09:00
|
|
|
assert(dataSize + 1 <= MAX_NUM_SIZE);
|
2014-03-15 03:22:02 +09:00
|
|
|
|
2016-02-29 01:20:29 +09:00
|
|
|
var ucs2DataSize = 1;
|
|
|
|
var subitemsCount = stream.readNumber();
|
|
|
|
var i;
|
|
|
|
switch (type) {
|
|
|
|
case 0: // codespacerange
|
|
|
|
stream.readHex(start, dataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
|
|
|
|
hexToInt(end, dataSize));
|
2016-02-29 01:20:29 +09:00
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(end, dataSize);
|
|
|
|
stream.readHexNumber(start, dataSize);
|
|
|
|
addHex(start, end, dataSize);
|
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
|
|
|
|
hexToInt(end, dataSize));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 1: // notdefrange
|
|
|
|
stream.readHex(start, dataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
code = stream.readNumber();
|
2016-02-29 01:20:29 +09:00
|
|
|
// undefined range, skipping
|
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(end, dataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
stream.readHexNumber(start, dataSize);
|
|
|
|
addHex(start, end, dataSize);
|
2016-02-29 01:20:29 +09:00
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
code = stream.readNumber();
|
|
|
|
// nop
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
break;
|
|
|
|
case 2: // cidchar
|
|
|
|
stream.readHex(char, dataSize);
|
|
|
|
code = stream.readNumber();
|
|
|
|
cMap.mapOne(hexToInt(char, dataSize), code);
|
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(char, dataSize);
|
|
|
|
if (!sequence) {
|
|
|
|
stream.readHexNumber(tmp, dataSize);
|
|
|
|
addHex(char, tmp, dataSize);
|
|
|
|
}
|
|
|
|
code = stream.readSigned() + (code + 1);
|
|
|
|
cMap.mapOne(hexToInt(char, dataSize), code);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 3: // cidrange
|
|
|
|
stream.readHex(start, dataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
code = stream.readNumber();
|
2014-08-01 15:46:37 +09:00
|
|
|
cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
|
|
|
code);
|
2016-02-29 01:20:29 +09:00
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(end, dataSize);
|
|
|
|
if (!sequence) {
|
|
|
|
stream.readHexNumber(start, dataSize);
|
|
|
|
addHex(start, end, dataSize);
|
|
|
|
} else {
|
|
|
|
start.set(end);
|
|
|
|
}
|
|
|
|
stream.readHexNumber(end, dataSize);
|
|
|
|
addHex(end, start, dataSize);
|
|
|
|
code = stream.readNumber();
|
|
|
|
cMap.mapCidRange(hexToInt(start, dataSize),
|
|
|
|
hexToInt(end, dataSize), code);
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
break;
|
|
|
|
case 4: // bfchar
|
|
|
|
stream.readHex(char, ucs2DataSize);
|
|
|
|
stream.readHex(charCode, dataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
cMap.mapOne(hexToInt(char, ucs2DataSize),
|
|
|
|
hexToStr(charCode, dataSize));
|
2016-02-29 01:20:29 +09:00
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(char, ucs2DataSize);
|
|
|
|
if (!sequence) {
|
|
|
|
stream.readHexNumber(tmp, ucs2DataSize);
|
|
|
|
addHex(char, tmp, ucs2DataSize);
|
|
|
|
}
|
|
|
|
incHex(charCode, dataSize);
|
|
|
|
stream.readHexSigned(tmp, dataSize);
|
|
|
|
addHex(charCode, tmp, dataSize);
|
|
|
|
cMap.mapOne(hexToInt(char, ucs2DataSize),
|
2014-08-01 15:46:37 +09:00
|
|
|
hexToStr(charCode, dataSize));
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
break;
|
|
|
|
case 5: // bfrange
|
|
|
|
stream.readHex(start, ucs2DataSize);
|
2014-03-15 03:22:02 +09:00
|
|
|
stream.readHexNumber(end, ucs2DataSize);
|
|
|
|
addHex(end, start, ucs2DataSize);
|
|
|
|
stream.readHex(charCode, dataSize);
|
2014-08-01 15:46:37 +09:00
|
|
|
cMap.mapBfRange(hexToInt(start, ucs2DataSize),
|
|
|
|
hexToInt(end, ucs2DataSize),
|
|
|
|
hexToStr(charCode, dataSize));
|
2016-02-29 01:20:29 +09:00
|
|
|
for (i = 1; i < subitemsCount; i++) {
|
|
|
|
incHex(end, ucs2DataSize);
|
|
|
|
if (!sequence) {
|
|
|
|
stream.readHexNumber(start, ucs2DataSize);
|
|
|
|
addHex(start, end, ucs2DataSize);
|
|
|
|
} else {
|
|
|
|
start.set(end);
|
|
|
|
}
|
|
|
|
stream.readHexNumber(end, ucs2DataSize);
|
|
|
|
addHex(end, start, ucs2DataSize);
|
|
|
|
stream.readHex(charCode, dataSize);
|
|
|
|
cMap.mapBfRange(hexToInt(start, ucs2DataSize),
|
|
|
|
hexToInt(end, ucs2DataSize),
|
|
|
|
hexToStr(charCode, dataSize));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2017-02-12 23:54:41 +09:00
|
|
|
reject(new Error('processBinaryCMap: Unknown type: ' + type));
|
|
|
|
return;
|
2016-02-29 01:20:29 +09:00
|
|
|
}
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
|
|
|
|
2016-02-29 01:20:29 +09:00
|
|
|
if (useCMap) {
|
2017-02-12 23:54:41 +09:00
|
|
|
resolve(extend(useCMap));
|
|
|
|
return;
|
2016-02-29 01:20:29 +09:00
|
|
|
}
|
2017-02-12 23:54:41 +09:00
|
|
|
resolve(cMap);
|
2016-02-29 01:20:29 +09:00
|
|
|
});
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
function BinaryCMapReader() {}
|
|
|
|
|
|
|
|
BinaryCMapReader.prototype = {
|
2017-02-12 23:54:41 +09:00
|
|
|
process: processBinaryCMap,
|
2014-03-15 03:22:02 +09:00
|
|
|
};
|
|
|
|
|
|
|
|
return BinaryCMapReader;
|
|
|
|
})();
|
|
|
|
|
2013-09-26 02:32:04 +09:00
|
|
|
var CMapFactory = (function CMapFactoryClosure() {
|
|
|
|
function strToInt(str) {
|
|
|
|
var a = 0;
|
|
|
|
for (var i = 0; i < str.length; i++) {
|
|
|
|
a = (a << 8) | str.charCodeAt(i);
|
|
|
|
}
|
|
|
|
return a >>> 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
function expectString(obj) {
|
|
|
|
if (!isString(obj)) {
|
|
|
|
error('Malformed CMap: expected string.');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function expectInt(obj) {
|
|
|
|
if (!isInt(obj)) {
|
|
|
|
error('Malformed CMap: expected int.');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseBfChar(cMap, lexer) {
|
|
|
|
while (true) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isCmd(obj, 'endbfchar')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
expectString(obj);
|
|
|
|
var src = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
// TODO are /dstName used?
|
|
|
|
expectString(obj);
|
|
|
|
var dst = obj;
|
|
|
|
cMap.mapOne(src, dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseBfRange(cMap, lexer) {
|
|
|
|
while (true) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isCmd(obj, 'endbfrange')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
expectString(obj);
|
|
|
|
var low = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
expectString(obj);
|
|
|
|
var high = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
if (isInt(obj) || isString(obj)) {
|
|
|
|
var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj;
|
2014-08-01 15:46:37 +09:00
|
|
|
cMap.mapBfRange(low, high, dstLow);
|
2013-09-26 02:32:04 +09:00
|
|
|
} else if (isCmd(obj, '[')) {
|
|
|
|
obj = lexer.getObj();
|
|
|
|
var array = [];
|
|
|
|
while (!isCmd(obj, ']') && !isEOF(obj)) {
|
|
|
|
array.push(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
}
|
2014-08-01 15:46:37 +09:00
|
|
|
cMap.mapBfRangeToArray(low, high, array);
|
2013-09-26 02:32:04 +09:00
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
error('Invalid bf range.');
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseCidChar(cMap, lexer) {
|
|
|
|
while (true) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isCmd(obj, 'endcidchar')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
expectString(obj);
|
|
|
|
var src = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
expectInt(obj);
|
2014-08-01 15:46:37 +09:00
|
|
|
var dst = obj;
|
2013-09-26 02:32:04 +09:00
|
|
|
cMap.mapOne(src, dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseCidRange(cMap, lexer) {
|
|
|
|
while (true) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isCmd(obj, 'endcidrange')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
expectString(obj);
|
|
|
|
var low = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
expectString(obj);
|
|
|
|
var high = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
expectInt(obj);
|
2014-08-01 15:46:37 +09:00
|
|
|
var dstLow = obj;
|
|
|
|
cMap.mapCidRange(low, high, dstLow);
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseCodespaceRange(cMap, lexer) {
|
|
|
|
while (true) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isCmd(obj, 'endcodespacerange')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!isString(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
var low = strToInt(obj);
|
|
|
|
obj = lexer.getObj();
|
|
|
|
if (!isString(obj)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
var high = strToInt(obj);
|
|
|
|
cMap.addCodespaceRange(obj.length, low, high);
|
|
|
|
}
|
|
|
|
error('Invalid codespace range.');
|
|
|
|
}
|
|
|
|
|
2014-02-12 03:27:09 +09:00
|
|
|
function parseWMode(cMap, lexer) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isInt(obj)) {
|
|
|
|
cMap.vertical = !!obj;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-06 23:01:26 +09:00
|
|
|
function parseCMapName(cMap, lexer) {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isName(obj) && isString(obj.name)) {
|
|
|
|
cMap.name = obj.name;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
function parseCMap(cMap, lexer, fetchBuiltInCMap, useCMap) {
|
2014-02-12 03:27:09 +09:00
|
|
|
var previous;
|
|
|
|
var embededUseCMap;
|
2013-09-26 02:32:04 +09:00
|
|
|
objLoop: while (true) {
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
try {
|
|
|
|
var obj = lexer.getObj();
|
|
|
|
if (isEOF(obj)) {
|
|
|
|
break;
|
|
|
|
} else if (isName(obj)) {
|
|
|
|
if (obj.name === 'WMode') {
|
|
|
|
parseWMode(cMap, lexer);
|
|
|
|
} else if (obj.name === 'CMapName') {
|
|
|
|
parseCMapName(cMap, lexer);
|
|
|
|
}
|
|
|
|
previous = obj;
|
|
|
|
} else if (isCmd(obj)) {
|
|
|
|
switch (obj.cmd) {
|
|
|
|
case 'endcmap':
|
|
|
|
break objLoop;
|
|
|
|
case 'usecmap':
|
|
|
|
if (isName(previous)) {
|
|
|
|
embededUseCMap = previous.name;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'begincodespacerange':
|
|
|
|
parseCodespaceRange(cMap, lexer);
|
|
|
|
break;
|
|
|
|
case 'beginbfchar':
|
|
|
|
parseBfChar(cMap, lexer);
|
|
|
|
break;
|
|
|
|
case 'begincidchar':
|
|
|
|
parseCidChar(cMap, lexer);
|
|
|
|
break;
|
|
|
|
case 'beginbfrange':
|
|
|
|
parseBfRange(cMap, lexer);
|
|
|
|
break;
|
|
|
|
case 'begincidrange':
|
|
|
|
parseCidRange(cMap, lexer);
|
|
|
|
break;
|
|
|
|
}
|
2014-02-12 03:27:09 +09:00
|
|
|
}
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
} catch (ex) {
|
|
|
|
if (ex instanceof MissingDataException) {
|
|
|
|
throw ex;
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
warn('Invalid cMap data: ' + ex);
|
|
|
|
continue;
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
}
|
2014-02-12 03:27:09 +09:00
|
|
|
|
|
|
|
if (!useCMap && embededUseCMap) {
|
|
|
|
// Load the usecmap definition from the file only if there wasn't one
|
|
|
|
// specified.
|
|
|
|
useCMap = embededUseCMap;
|
|
|
|
}
|
|
|
|
if (useCMap) {
|
2017-02-12 23:54:41 +09:00
|
|
|
return extendCMap(cMap, fetchBuiltInCMap, useCMap);
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
Catch errors and continue parsing in `parseCMap` (issue 7492)
After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.
The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.
Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.
Fixes 7492.
2016-07-18 23:01:02 +09:00
|
|
|
return Promise.resolve(cMap);
|
2014-03-15 03:22:02 +09:00
|
|
|
}
|
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
function extendCMap(cMap, fetchBuiltInCMap, useCMap) {
|
|
|
|
return createBuiltInCMap(useCMap, fetchBuiltInCMap).then(function(newCMap) {
|
2016-02-29 01:20:29 +09:00
|
|
|
cMap.useCMap = newCMap;
|
|
|
|
// If there aren't any code space ranges defined clone all the parent ones
|
|
|
|
// into this cMap.
|
|
|
|
if (cMap.numCodespaceRanges === 0) {
|
|
|
|
var useCodespaceRanges = cMap.useCMap.codespaceRanges;
|
|
|
|
for (var i = 0; i < useCodespaceRanges.length; i++) {
|
|
|
|
cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
|
|
|
|
}
|
|
|
|
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
|
2014-02-12 03:27:09 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
// Merge the map into the current one, making sure not to override
|
|
|
|
// any previously defined entries.
|
|
|
|
cMap.useCMap.forEach(function(key, value) {
|
|
|
|
if (!cMap.contains(key)) {
|
|
|
|
cMap.mapOne(key, cMap.useCMap.lookup(key));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
return cMap;
|
2014-07-30 12:30:16 +09:00
|
|
|
});
|
2014-02-12 03:27:09 +09:00
|
|
|
}
|
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
function createBuiltInCMap(name, fetchBuiltInCMap) {
|
2014-02-12 03:27:09 +09:00
|
|
|
if (name === 'Identity-H') {
|
2016-02-29 01:20:29 +09:00
|
|
|
return Promise.resolve(new IdentityCMap(false, 2));
|
2014-02-12 03:27:09 +09:00
|
|
|
} else if (name === 'Identity-V') {
|
2016-02-29 01:20:29 +09:00
|
|
|
return Promise.resolve(new IdentityCMap(true, 2));
|
2014-02-12 03:27:09 +09:00
|
|
|
}
|
|
|
|
if (BUILT_IN_CMAPS.indexOf(name) === -1) {
|
2017-03-28 19:08:44 +09:00
|
|
|
return Promise.reject(new Error('Unknown CMap name: ' + name));
|
2014-02-12 03:27:09 +09:00
|
|
|
}
|
2017-02-12 23:54:41 +09:00
|
|
|
assert(fetchBuiltInCMap, 'Built-in CMap parameters are not provided.');
|
2014-03-15 03:22:02 +09:00
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
return fetchBuiltInCMap(name).then(function (data) {
|
|
|
|
var cMapData = data.cMapData, compressionType = data.compressionType;
|
|
|
|
var cMap = new CMap(true);
|
2014-02-12 03:27:09 +09:00
|
|
|
|
2017-02-12 23:54:41 +09:00
|
|
|
if (compressionType === CMapCompressionType.BINARY) {
|
|
|
|
return new BinaryCMapReader().process(cMapData, cMap,
|
|
|
|
function (useCMap) {
|
|
|
|
return extendCMap(cMap, fetchBuiltInCMap, useCMap);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
assert(compressionType === CMapCompressionType.NONE,
|
|
|
|
'TODO: Only BINARY/NONE CMap compression is currently supported.');
|
|
|
|
// Uncompressed CMap.
|
|
|
|
var lexer = new Lexer(new Stream(cMapData));
|
|
|
|
return parseCMap(cMap, lexer, fetchBuiltInCMap, null);
|
2016-02-29 01:20:29 +09:00
|
|
|
});
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
2014-02-12 03:27:09 +09:00
|
|
|
|
2013-09-26 02:32:04 +09:00
|
|
|
return {
|
2017-04-27 19:58:44 +09:00
|
|
|
create(params) {
|
2017-02-12 23:54:41 +09:00
|
|
|
var encoding = params.encoding;
|
|
|
|
var fetchBuiltInCMap = params.fetchBuiltInCMap;
|
|
|
|
var useCMap = params.useCMap;
|
|
|
|
|
2013-09-26 02:32:04 +09:00
|
|
|
if (isName(encoding)) {
|
2017-02-12 23:54:41 +09:00
|
|
|
return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
|
2013-09-26 02:32:04 +09:00
|
|
|
} else if (isStream(encoding)) {
|
|
|
|
var cMap = new CMap();
|
|
|
|
var lexer = new Lexer(encoding);
|
2017-02-12 23:54:41 +09:00
|
|
|
return parseCMap(cMap, lexer, fetchBuiltInCMap, useCMap).then(
|
2016-02-29 01:20:29 +09:00
|
|
|
function (parsedCMap) {
|
|
|
|
if (parsedCMap.isIdentityCMap) {
|
2017-02-12 23:54:41 +09:00
|
|
|
return createBuiltInCMap(parsedCMap.name, fetchBuiltInCMap);
|
2016-02-29 01:20:29 +09:00
|
|
|
}
|
|
|
|
return parsedCMap;
|
|
|
|
});
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
2016-02-29 01:20:29 +09:00
|
|
|
return Promise.reject(new Error('Encoding required.'));
|
2013-09-26 02:32:04 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
})();
|
2015-11-22 01:32:47 +09:00
|
|
|
|
|
|
|
exports.CMap = CMap;
|
|
|
|
exports.CMapFactory = CMapFactory;
|
|
|
|
exports.IdentityCMap = IdentityCMap;
|
|
|
|
}));
|