Move the BinaryCMapReader into its own file

The "binary" CMap-format is specific to the PDF.js library, and is used to reduce the size of the built-in CMap data-files.
By moving this code to its own file we can remove the nowadays unnecessary closures, which helps to slightly reduce the size of this code.
This commit is contained in:
Jonas Jenwald 2023-04-21 11:43:11 +02:00
parent 434445973d
commit 244002502b
2 changed files with 327 additions and 313 deletions

326
src/core/binary_cmap.js Normal file
View File

@ -0,0 +1,326 @@
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { FormatError } from "../shared/util.js";
function hexToInt(a, size) {
let n = 0;
for (let i = 0; i <= size; i++) {
n = (n << 8) | a[i];
}
return n >>> 0;
}
function hexToStr(a, size) {
// This code is hot. Special-case some common values to avoid creating an
// object with subarray().
if (size === 1) {
return String.fromCharCode(a[0], a[1]);
}
if (size === 3) {
return String.fromCharCode(a[0], a[1], a[2], a[3]);
}
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
}
function addHex(a, b, size) {
let c = 0;
for (let i = size; i >= 0; i--) {
c += a[i] + b[i];
a[i] = c & 255;
c >>= 8;
}
}
function incHex(a, size) {
let c = 1;
for (let i = size; i >= 0 && c > 0; i--) {
c += a[i];
a[i] = c & 255;
c >>= 8;
}
}
const MAX_NUM_SIZE = 16;
const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
class BinaryCMapStream {
constructor(data) {
this.buffer = data;
this.pos = 0;
this.end = data.length;
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
}
readByte() {
if (this.pos >= this.end) {
return -1;
}
return this.buffer[this.pos++];
}
readNumber() {
let n = 0;
let last;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
n = (n << 7) | (b & 0x7f);
} while (!last);
return n;
}
readSigned() {
const n = this.readNumber();
return n & 1 ? ~(n >>> 1) : n >>> 1;
}
readHex(num, size) {
num.set(this.buffer.subarray(this.pos, this.pos + size + 1));
this.pos += size + 1;
}
readHexNumber(num, size) {
let last;
const stack = this.tmpBuf;
let sp = 0;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
stack[sp++] = b & 0x7f;
} while (!last);
let i = size,
buffer = 0,
bufferSize = 0;
while (i >= 0) {
while (bufferSize < 8 && stack.length > 0) {
buffer |= stack[--sp] << bufferSize;
bufferSize += 7;
}
num[i] = buffer & 255;
i--;
buffer >>= 8;
bufferSize -= 8;
}
}
readHexSigned(num, size) {
this.readHexNumber(num, size);
const sign = num[size] & 1 ? 255 : 0;
let c = 0;
for (let i = 0; i <= size; i++) {
c = ((c & 1) << 8) | num[i];
num[i] = (c >> 1) ^ sign;
}
}
readString() {
const len = this.readNumber();
let s = "";
for (let i = 0; i < len; i++) {
s += String.fromCharCode(this.readNumber());
}
return s;
}
}
class BinaryCMapReader {
async process(data, cMap, extend) {
const stream = new BinaryCMapStream(data);
const header = stream.readByte();
cMap.vertical = !!(header & 1);
let useCMap = null;
const start = new Uint8Array(MAX_NUM_SIZE);
const end = new Uint8Array(MAX_NUM_SIZE);
const char = new Uint8Array(MAX_NUM_SIZE);
const charCode = new Uint8Array(MAX_NUM_SIZE);
const tmp = new Uint8Array(MAX_NUM_SIZE);
let code;
let b;
while ((b = stream.readByte()) >= 0) {
const type = b >> 5;
if (type === 7) {
// metadata, e.g. comment or usecmap
switch (b & 0x1f) {
case 0:
stream.readString(); // skipping comment
break;
case 1:
useCMap = stream.readString();
break;
}
continue;
}
const sequence = !!(b & 0x10);
const dataSize = b & 15;
if (dataSize + 1 > MAX_NUM_SIZE) {
throw new Error("BinaryCMapReader.process: Invalid dataSize.");
}
const ucs2DataSize = 1;
const subitemsCount = stream.readNumber();
switch (type) {
case 0: // codespacerange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
}
break;
case 1: // notdefrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// undefined range, skipping
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// nop
}
break;
case 2: // cidchar
stream.readHex(char, dataSize);
code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), code);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, dataSize);
if (!sequence) {
stream.readHexNumber(tmp, dataSize);
addHex(char, tmp, dataSize);
}
code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), code);
}
break;
case 3: // cidrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
if (!sequence) {
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
}
break;
case 4: // bfchar
stream.readHex(char, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(tmp, ucs2DataSize);
addHex(char, tmp, ucs2DataSize);
}
incHex(charCode, dataSize);
stream.readHexSigned(tmp, dataSize);
addHex(charCode, tmp, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
case 5: // bfrange
stream.readHex(start, ucs2DataSize);
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(start, ucs2DataSize);
addHex(start, end, ucs2DataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
default:
throw new Error(`BinaryCMapReader.process - unknown type: ${type}`);
}
}
if (useCMap) {
return extend(useCMap);
}
return cMap;
}
}
export { BinaryCMapReader };

View File

@ -21,6 +21,7 @@ import {
} from "../shared/util.js";
import { Cmd, EOF, isCmd, Name } from "./primitives.js";
import { BaseStream } from "./base_stream.js";
import { BinaryCMapReader } from "./binary_cmap.js";
import { Lexer } from "./parser.js";
import { MissingDataException } from "./core_utils.js";
import { Stream } from "./stream.js";
@ -443,319 +444,6 @@ class IdentityCMap extends CMap {
}
}
const BinaryCMapReader = (function BinaryCMapReaderClosure() {
function hexToInt(a, size) {
let n = 0;
for (let i = 0; i <= size; i++) {
n = (n << 8) | a[i];
}
return n >>> 0;
}
function hexToStr(a, size) {
// This code is hot. Special-case some common values to avoid creating an
// object with subarray().
if (size === 1) {
return String.fromCharCode(a[0], a[1]);
}
if (size === 3) {
return String.fromCharCode(a[0], a[1], a[2], a[3]);
}
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
}
function addHex(a, b, size) {
let c = 0;
for (let i = size; i >= 0; i--) {
c += a[i] + b[i];
a[i] = c & 255;
c >>= 8;
}
}
function incHex(a, size) {
let c = 1;
for (let i = size; i >= 0 && c > 0; i--) {
c += a[i];
a[i] = c & 255;
c >>= 8;
}
}
const MAX_NUM_SIZE = 16;
const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
class BinaryCMapStream {
constructor(data) {
this.buffer = data;
this.pos = 0;
this.end = data.length;
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
}
readByte() {
if (this.pos >= this.end) {
return -1;
}
return this.buffer[this.pos++];
}
readNumber() {
let n = 0;
let last;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
n = (n << 7) | (b & 0x7f);
} while (!last);
return n;
}
readSigned() {
const n = this.readNumber();
return n & 1 ? ~(n >>> 1) : n >>> 1;
}
readHex(num, size) {
num.set(this.buffer.subarray(this.pos, this.pos + size + 1));
this.pos += size + 1;
}
readHexNumber(num, size) {
let last;
const stack = this.tmpBuf;
let sp = 0;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
stack[sp++] = b & 0x7f;
} while (!last);
let i = size,
buffer = 0,
bufferSize = 0;
while (i >= 0) {
while (bufferSize < 8 && stack.length > 0) {
buffer |= stack[--sp] << bufferSize;
bufferSize += 7;
}
num[i] = buffer & 255;
i--;
buffer >>= 8;
bufferSize -= 8;
}
}
readHexSigned(num, size) {
this.readHexNumber(num, size);
const sign = num[size] & 1 ? 255 : 0;
let c = 0;
for (let i = 0; i <= size; i++) {
c = ((c & 1) << 8) | num[i];
num[i] = (c >> 1) ^ sign;
}
}
readString() {
const len = this.readNumber();
let s = "";
for (let i = 0; i < len; i++) {
s += String.fromCharCode(this.readNumber());
}
return s;
}
}
// eslint-disable-next-line no-shadow
class BinaryCMapReader {
async process(data, cMap, extend) {
const stream = new BinaryCMapStream(data);
const header = stream.readByte();
cMap.vertical = !!(header & 1);
let useCMap = null;
const start = new Uint8Array(MAX_NUM_SIZE);
const end = new Uint8Array(MAX_NUM_SIZE);
const char = new Uint8Array(MAX_NUM_SIZE);
const charCode = new Uint8Array(MAX_NUM_SIZE);
const tmp = new Uint8Array(MAX_NUM_SIZE);
let code;
let b;
while ((b = stream.readByte()) >= 0) {
const type = b >> 5;
if (type === 7) {
// metadata, e.g. comment or usecmap
switch (b & 0x1f) {
case 0:
stream.readString(); // skipping comment
break;
case 1:
useCMap = stream.readString();
break;
}
continue;
}
const sequence = !!(b & 0x10);
const dataSize = b & 15;
if (dataSize + 1 > MAX_NUM_SIZE) {
throw new Error("BinaryCMapReader.process: Invalid dataSize.");
}
const ucs2DataSize = 1;
const subitemsCount = stream.readNumber();
switch (type) {
case 0: // codespacerange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
}
break;
case 1: // notdefrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// undefined range, skipping
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// nop
}
break;
case 2: // cidchar
stream.readHex(char, dataSize);
code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), code);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, dataSize);
if (!sequence) {
stream.readHexNumber(tmp, dataSize);
addHex(char, tmp, dataSize);
}
code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), code);
}
break;
case 3: // cidrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
if (!sequence) {
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
}
break;
case 4: // bfchar
stream.readHex(char, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(tmp, ucs2DataSize);
addHex(char, tmp, ucs2DataSize);
}
incHex(charCode, dataSize);
stream.readHexSigned(tmp, dataSize);
addHex(charCode, tmp, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
case 5: // bfrange
stream.readHex(start, ucs2DataSize);
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(start, ucs2DataSize);
addHex(start, end, ucs2DataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
default:
throw new Error(`BinaryCMapReader.process - unknown type: ${type}`);
}
}
if (useCMap) {
return extend(useCMap);
}
return cMap;
}
}
return BinaryCMapReader;
})();
const CMapFactory = (function CMapFactoryClosure() {
function strToInt(str) {
let a = 0;