diff --git a/src/core/binary_cmap.js b/src/core/binary_cmap.js new file mode 100644 index 000000000..0a160eed9 --- /dev/null +++ b/src/core/binary_cmap.js @@ -0,0 +1,326 @@ +/* Copyright 2012 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { FormatError } from "../shared/util.js"; + +function hexToInt(a, size) { + let n = 0; + for (let i = 0; i <= size; i++) { + n = (n << 8) | a[i]; + } + return n >>> 0; +} + +function hexToStr(a, size) { + // This code is hot. Special-case some common values to avoid creating an + // object with subarray(). + if (size === 1) { + return String.fromCharCode(a[0], a[1]); + } + if (size === 3) { + return String.fromCharCode(a[0], a[1], a[2], a[3]); + } + return String.fromCharCode.apply(null, a.subarray(0, size + 1)); +} + +function addHex(a, b, size) { + let c = 0; + for (let i = size; i >= 0; i--) { + c += a[i] + b[i]; + a[i] = c & 255; + c >>= 8; + } +} + +function incHex(a, size) { + let c = 1; + for (let i = size; i >= 0 && c > 0; i--) { + c += a[i]; + a[i] = c & 255; + c >>= 8; + } +} + +const MAX_NUM_SIZE = 16; +const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8) + +class BinaryCMapStream { + constructor(data) { + this.buffer = data; + this.pos = 0; + this.end = data.length; + this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE); + } + + readByte() { + if (this.pos >= this.end) { + return -1; + } + return this.buffer[this.pos++]; + } + + readNumber() { + let n = 0; + let last; + do { + const b = this.readByte(); + if (b < 0) { + throw new FormatError("unexpected EOF in bcmap"); + } + last = !(b & 0x80); + n = (n << 7) | (b & 0x7f); + } while (!last); + return n; + } + + readSigned() { + const n = this.readNumber(); + return n & 1 ? ~(n >>> 1) : n >>> 1; + } + + readHex(num, size) { + num.set(this.buffer.subarray(this.pos, this.pos + size + 1)); + this.pos += size + 1; + } + + readHexNumber(num, size) { + let last; + const stack = this.tmpBuf; + let sp = 0; + do { + const b = this.readByte(); + if (b < 0) { + throw new FormatError("unexpected EOF in bcmap"); + } + last = !(b & 0x80); + stack[sp++] = b & 0x7f; + } while (!last); + let i = size, + buffer = 0, + bufferSize = 0; + while (i >= 0) { + while (bufferSize < 8 && stack.length > 0) { + buffer |= stack[--sp] << bufferSize; + bufferSize += 7; + } + num[i] = buffer & 255; + i--; + buffer >>= 8; + bufferSize -= 8; + } + } + + readHexSigned(num, size) { + this.readHexNumber(num, size); + const sign = num[size] & 1 ? 255 : 0; + let c = 0; + for (let i = 0; i <= size; i++) { + c = ((c & 1) << 8) | num[i]; + num[i] = (c >> 1) ^ sign; + } + } + + readString() { + const len = this.readNumber(); + let s = ""; + for (let i = 0; i < len; i++) { + s += String.fromCharCode(this.readNumber()); + } + return s; + } +} + +class BinaryCMapReader { + async process(data, cMap, extend) { + const stream = new BinaryCMapStream(data); + const header = stream.readByte(); + cMap.vertical = !!(header & 1); + + let useCMap = null; + const start = new Uint8Array(MAX_NUM_SIZE); + const end = new Uint8Array(MAX_NUM_SIZE); + const char = new Uint8Array(MAX_NUM_SIZE); + const charCode = new Uint8Array(MAX_NUM_SIZE); + const tmp = new Uint8Array(MAX_NUM_SIZE); + let code; + + let b; + while ((b = stream.readByte()) >= 0) { + const type = b >> 5; + if (type === 7) { + // metadata, e.g. comment or usecmap + switch (b & 0x1f) { + case 0: + stream.readString(); // skipping comment + break; + case 1: + useCMap = stream.readString(); + break; + } + continue; + } + const sequence = !!(b & 0x10); + const dataSize = b & 15; + + if (dataSize + 1 > MAX_NUM_SIZE) { + throw new Error("BinaryCMapReader.process: Invalid dataSize."); + } + + const ucs2DataSize = 1; + const subitemsCount = stream.readNumber(); + switch (type) { + case 0: // codespacerange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + cMap.addCodespaceRange( + dataSize + 1, + hexToInt(start, dataSize), + hexToInt(end, dataSize) + ); + for (let i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + cMap.addCodespaceRange( + dataSize + 1, + hexToInt(start, dataSize), + hexToInt(end, dataSize) + ); + } + break; + case 1: // notdefrange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + stream.readNumber(); // code + // undefined range, skipping + for (let i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + stream.readNumber(); // code + // nop + } + break; + case 2: // cidchar + stream.readHex(char, dataSize); + code = stream.readNumber(); + cMap.mapOne(hexToInt(char, dataSize), code); + for (let i = 1; i < subitemsCount; i++) { + incHex(char, dataSize); + if (!sequence) { + stream.readHexNumber(tmp, dataSize); + addHex(char, tmp, dataSize); + } + code = stream.readSigned() + (code + 1); + cMap.mapOne(hexToInt(char, dataSize), code); + } + break; + case 3: // cidrange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + cMap.mapCidRange( + hexToInt(start, dataSize), + hexToInt(end, dataSize), + code + ); + for (let i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + if (!sequence) { + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + } else { + start.set(end); + } + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + cMap.mapCidRange( + hexToInt(start, dataSize), + hexToInt(end, dataSize), + code + ); + } + break; + case 4: // bfchar + stream.readHex(char, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapOne( + hexToInt(char, ucs2DataSize), + hexToStr(charCode, dataSize) + ); + for (let i = 1; i < subitemsCount; i++) { + incHex(char, ucs2DataSize); + if (!sequence) { + stream.readHexNumber(tmp, ucs2DataSize); + addHex(char, tmp, ucs2DataSize); + } + incHex(charCode, dataSize); + stream.readHexSigned(tmp, dataSize); + addHex(charCode, tmp, dataSize); + cMap.mapOne( + hexToInt(char, ucs2DataSize), + hexToStr(charCode, dataSize) + ); + } + break; + case 5: // bfrange + stream.readHex(start, ucs2DataSize); + stream.readHexNumber(end, ucs2DataSize); + addHex(end, start, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapBfRange( + hexToInt(start, ucs2DataSize), + hexToInt(end, ucs2DataSize), + hexToStr(charCode, dataSize) + ); + for (let i = 1; i < subitemsCount; i++) { + incHex(end, ucs2DataSize); + if (!sequence) { + stream.readHexNumber(start, ucs2DataSize); + addHex(start, end, ucs2DataSize); + } else { + start.set(end); + } + stream.readHexNumber(end, ucs2DataSize); + addHex(end, start, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapBfRange( + hexToInt(start, ucs2DataSize), + hexToInt(end, ucs2DataSize), + hexToStr(charCode, dataSize) + ); + } + break; + default: + throw new Error(`BinaryCMapReader.process - unknown type: ${type}`); + } + } + + if (useCMap) { + return extend(useCMap); + } + return cMap; + } +} + +export { BinaryCMapReader }; diff --git a/src/core/cmap.js b/src/core/cmap.js index 5cde7d29f..1f50a9227 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -21,6 +21,7 @@ import { } from "../shared/util.js"; import { Cmd, EOF, isCmd, Name } from "./primitives.js"; import { BaseStream } from "./base_stream.js"; +import { BinaryCMapReader } from "./binary_cmap.js"; import { Lexer } from "./parser.js"; import { MissingDataException } from "./core_utils.js"; import { Stream } from "./stream.js"; @@ -443,319 +444,6 @@ class IdentityCMap extends CMap { } } -const BinaryCMapReader = (function BinaryCMapReaderClosure() { - function hexToInt(a, size) { - let n = 0; - for (let i = 0; i <= size; i++) { - n = (n << 8) | a[i]; - } - return n >>> 0; - } - - function hexToStr(a, size) { - // This code is hot. Special-case some common values to avoid creating an - // object with subarray(). - if (size === 1) { - return String.fromCharCode(a[0], a[1]); - } - if (size === 3) { - return String.fromCharCode(a[0], a[1], a[2], a[3]); - } - return String.fromCharCode.apply(null, a.subarray(0, size + 1)); - } - - function addHex(a, b, size) { - let c = 0; - for (let i = size; i >= 0; i--) { - c += a[i] + b[i]; - a[i] = c & 255; - c >>= 8; - } - } - - function incHex(a, size) { - let c = 1; - for (let i = size; i >= 0 && c > 0; i--) { - c += a[i]; - a[i] = c & 255; - c >>= 8; - } - } - - const MAX_NUM_SIZE = 16; - const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8) - - class BinaryCMapStream { - constructor(data) { - this.buffer = data; - this.pos = 0; - this.end = data.length; - this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE); - } - - readByte() { - if (this.pos >= this.end) { - return -1; - } - return this.buffer[this.pos++]; - } - - readNumber() { - let n = 0; - let last; - do { - const b = this.readByte(); - if (b < 0) { - throw new FormatError("unexpected EOF in bcmap"); - } - last = !(b & 0x80); - n = (n << 7) | (b & 0x7f); - } while (!last); - return n; - } - - readSigned() { - const n = this.readNumber(); - return n & 1 ? ~(n >>> 1) : n >>> 1; - } - - readHex(num, size) { - num.set(this.buffer.subarray(this.pos, this.pos + size + 1)); - this.pos += size + 1; - } - - readHexNumber(num, size) { - let last; - const stack = this.tmpBuf; - let sp = 0; - do { - const b = this.readByte(); - if (b < 0) { - throw new FormatError("unexpected EOF in bcmap"); - } - last = !(b & 0x80); - stack[sp++] = b & 0x7f; - } while (!last); - let i = size, - buffer = 0, - bufferSize = 0; - while (i >= 0) { - while (bufferSize < 8 && stack.length > 0) { - buffer |= stack[--sp] << bufferSize; - bufferSize += 7; - } - num[i] = buffer & 255; - i--; - buffer >>= 8; - bufferSize -= 8; - } - } - - readHexSigned(num, size) { - this.readHexNumber(num, size); - const sign = num[size] & 1 ? 255 : 0; - let c = 0; - for (let i = 0; i <= size; i++) { - c = ((c & 1) << 8) | num[i]; - num[i] = (c >> 1) ^ sign; - } - } - - readString() { - const len = this.readNumber(); - let s = ""; - for (let i = 0; i < len; i++) { - s += String.fromCharCode(this.readNumber()); - } - return s; - } - } - - // eslint-disable-next-line no-shadow - class BinaryCMapReader { - async process(data, cMap, extend) { - const stream = new BinaryCMapStream(data); - const header = stream.readByte(); - cMap.vertical = !!(header & 1); - - let useCMap = null; - const start = new Uint8Array(MAX_NUM_SIZE); - const end = new Uint8Array(MAX_NUM_SIZE); - const char = new Uint8Array(MAX_NUM_SIZE); - const charCode = new Uint8Array(MAX_NUM_SIZE); - const tmp = new Uint8Array(MAX_NUM_SIZE); - let code; - - let b; - while ((b = stream.readByte()) >= 0) { - const type = b >> 5; - if (type === 7) { - // metadata, e.g. comment or usecmap - switch (b & 0x1f) { - case 0: - stream.readString(); // skipping comment - break; - case 1: - useCMap = stream.readString(); - break; - } - continue; - } - const sequence = !!(b & 0x10); - const dataSize = b & 15; - - if (dataSize + 1 > MAX_NUM_SIZE) { - throw new Error("BinaryCMapReader.process: Invalid dataSize."); - } - - const ucs2DataSize = 1; - const subitemsCount = stream.readNumber(); - switch (type) { - case 0: // codespacerange - stream.readHex(start, dataSize); - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - cMap.addCodespaceRange( - dataSize + 1, - hexToInt(start, dataSize), - hexToInt(end, dataSize) - ); - for (let i = 1; i < subitemsCount; i++) { - incHex(end, dataSize); - stream.readHexNumber(start, dataSize); - addHex(start, end, dataSize); - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - cMap.addCodespaceRange( - dataSize + 1, - hexToInt(start, dataSize), - hexToInt(end, dataSize) - ); - } - break; - case 1: // notdefrange - stream.readHex(start, dataSize); - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - stream.readNumber(); // code - // undefined range, skipping - for (let i = 1; i < subitemsCount; i++) { - incHex(end, dataSize); - stream.readHexNumber(start, dataSize); - addHex(start, end, dataSize); - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - stream.readNumber(); // code - // nop - } - break; - case 2: // cidchar - stream.readHex(char, dataSize); - code = stream.readNumber(); - cMap.mapOne(hexToInt(char, dataSize), code); - for (let i = 1; i < subitemsCount; i++) { - incHex(char, dataSize); - if (!sequence) { - stream.readHexNumber(tmp, dataSize); - addHex(char, tmp, dataSize); - } - code = stream.readSigned() + (code + 1); - cMap.mapOne(hexToInt(char, dataSize), code); - } - break; - case 3: // cidrange - stream.readHex(start, dataSize); - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - code = stream.readNumber(); - cMap.mapCidRange( - hexToInt(start, dataSize), - hexToInt(end, dataSize), - code - ); - for (let i = 1; i < subitemsCount; i++) { - incHex(end, dataSize); - if (!sequence) { - stream.readHexNumber(start, dataSize); - addHex(start, end, dataSize); - } else { - start.set(end); - } - stream.readHexNumber(end, dataSize); - addHex(end, start, dataSize); - code = stream.readNumber(); - cMap.mapCidRange( - hexToInt(start, dataSize), - hexToInt(end, dataSize), - code - ); - } - break; - case 4: // bfchar - stream.readHex(char, ucs2DataSize); - stream.readHex(charCode, dataSize); - cMap.mapOne( - hexToInt(char, ucs2DataSize), - hexToStr(charCode, dataSize) - ); - for (let i = 1; i < subitemsCount; i++) { - incHex(char, ucs2DataSize); - if (!sequence) { - stream.readHexNumber(tmp, ucs2DataSize); - addHex(char, tmp, ucs2DataSize); - } - incHex(charCode, dataSize); - stream.readHexSigned(tmp, dataSize); - addHex(charCode, tmp, dataSize); - cMap.mapOne( - hexToInt(char, ucs2DataSize), - hexToStr(charCode, dataSize) - ); - } - break; - case 5: // bfrange - stream.readHex(start, ucs2DataSize); - stream.readHexNumber(end, ucs2DataSize); - addHex(end, start, ucs2DataSize); - stream.readHex(charCode, dataSize); - cMap.mapBfRange( - hexToInt(start, ucs2DataSize), - hexToInt(end, ucs2DataSize), - hexToStr(charCode, dataSize) - ); - for (let i = 1; i < subitemsCount; i++) { - incHex(end, ucs2DataSize); - if (!sequence) { - stream.readHexNumber(start, ucs2DataSize); - addHex(start, end, ucs2DataSize); - } else { - start.set(end); - } - stream.readHexNumber(end, ucs2DataSize); - addHex(end, start, ucs2DataSize); - stream.readHex(charCode, dataSize); - cMap.mapBfRange( - hexToInt(start, ucs2DataSize), - hexToInt(end, ucs2DataSize), - hexToStr(charCode, dataSize) - ); - } - break; - default: - throw new Error(`BinaryCMapReader.process - unknown type: ${type}`); - } - } - - if (useCMap) { - return extend(useCMap); - } - return cMap; - } - } - - return BinaryCMapReader; -})(); - const CMapFactory = (function CMapFactoryClosure() { function strToInt(str) { let a = 0;