Merge pull request #16331 from Snuffleupagus/cmap-rm-closure

Remove unnecessary closures in the CMap code
This commit is contained in:
Tim van der Meij 2023-04-22 14:58:13 +02:00 committed by GitHub
commit e304423ba1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 573 additions and 565 deletions

326
src/core/binary_cmap.js Normal file
View File

@ -0,0 +1,326 @@
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { FormatError } from "../shared/util.js";
function hexToInt(a, size) {
let n = 0;
for (let i = 0; i <= size; i++) {
n = (n << 8) | a[i];
}
return n >>> 0;
}
function hexToStr(a, size) {
// This code is hot. Special-case some common values to avoid creating an
// object with subarray().
if (size === 1) {
return String.fromCharCode(a[0], a[1]);
}
if (size === 3) {
return String.fromCharCode(a[0], a[1], a[2], a[3]);
}
return String.fromCharCode(...a.subarray(0, size + 1));
}
function addHex(a, b, size) {
let c = 0;
for (let i = size; i >= 0; i--) {
c += a[i] + b[i];
a[i] = c & 255;
c >>= 8;
}
}
function incHex(a, size) {
let c = 1;
for (let i = size; i >= 0 && c > 0; i--) {
c += a[i];
a[i] = c & 255;
c >>= 8;
}
}
const MAX_NUM_SIZE = 16;
const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
class BinaryCMapStream {
constructor(data) {
this.buffer = data;
this.pos = 0;
this.end = data.length;
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
}
readByte() {
if (this.pos >= this.end) {
return -1;
}
return this.buffer[this.pos++];
}
readNumber() {
let n = 0;
let last;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
n = (n << 7) | (b & 0x7f);
} while (!last);
return n;
}
readSigned() {
const n = this.readNumber();
return n & 1 ? ~(n >>> 1) : n >>> 1;
}
readHex(num, size) {
num.set(this.buffer.subarray(this.pos, this.pos + size + 1));
this.pos += size + 1;
}
readHexNumber(num, size) {
let last;
const stack = this.tmpBuf;
let sp = 0;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
stack[sp++] = b & 0x7f;
} while (!last);
let i = size,
buffer = 0,
bufferSize = 0;
while (i >= 0) {
while (bufferSize < 8 && stack.length > 0) {
buffer |= stack[--sp] << bufferSize;
bufferSize += 7;
}
num[i] = buffer & 255;
i--;
buffer >>= 8;
bufferSize -= 8;
}
}
readHexSigned(num, size) {
this.readHexNumber(num, size);
const sign = num[size] & 1 ? 255 : 0;
let c = 0;
for (let i = 0; i <= size; i++) {
c = ((c & 1) << 8) | num[i];
num[i] = (c >> 1) ^ sign;
}
}
readString() {
const len = this.readNumber(),
buf = new Array(len);
for (let i = 0; i < len; i++) {
buf[i] = this.readNumber();
}
return String.fromCharCode(...buf);
}
}
class BinaryCMapReader {
async process(data, cMap, extend) {
const stream = new BinaryCMapStream(data);
const header = stream.readByte();
cMap.vertical = !!(header & 1);
let useCMap = null;
const start = new Uint8Array(MAX_NUM_SIZE);
const end = new Uint8Array(MAX_NUM_SIZE);
const char = new Uint8Array(MAX_NUM_SIZE);
const charCode = new Uint8Array(MAX_NUM_SIZE);
const tmp = new Uint8Array(MAX_NUM_SIZE);
let code;
let b;
while ((b = stream.readByte()) >= 0) {
const type = b >> 5;
if (type === 7) {
// metadata, e.g. comment or usecmap
switch (b & 0x1f) {
case 0:
stream.readString(); // skipping comment
break;
case 1:
useCMap = stream.readString();
break;
}
continue;
}
const sequence = !!(b & 0x10);
const dataSize = b & 15;
if (dataSize + 1 > MAX_NUM_SIZE) {
throw new Error("BinaryCMapReader.process: Invalid dataSize.");
}
const ucs2DataSize = 1;
const subitemsCount = stream.readNumber();
switch (type) {
case 0: // codespacerange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
}
break;
case 1: // notdefrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// undefined range, skipping
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// nop
}
break;
case 2: // cidchar
stream.readHex(char, dataSize);
code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), code);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, dataSize);
if (!sequence) {
stream.readHexNumber(tmp, dataSize);
addHex(char, tmp, dataSize);
}
code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), code);
}
break;
case 3: // cidrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
if (!sequence) {
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
}
break;
case 4: // bfchar
stream.readHex(char, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(tmp, ucs2DataSize);
addHex(char, tmp, ucs2DataSize);
}
incHex(charCode, dataSize);
stream.readHexSigned(tmp, dataSize);
addHex(charCode, tmp, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
case 5: // bfrange
stream.readHex(start, ucs2DataSize);
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(start, ucs2DataSize);
addHex(start, end, ucs2DataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
default:
throw new Error(`BinaryCMapReader.process - unknown type: ${type}`);
}
}
if (useCMap) {
return extend(useCMap);
}
return cMap;
}
}
export { BinaryCMapReader };

View File

@ -21,6 +21,7 @@ import {
} from "../shared/util.js"; } from "../shared/util.js";
import { Cmd, EOF, isCmd, Name } from "./primitives.js"; import { Cmd, EOF, isCmd, Name } from "./primitives.js";
import { BaseStream } from "./base_stream.js"; import { BaseStream } from "./base_stream.js";
import { BinaryCMapReader } from "./binary_cmap.js";
import { Lexer } from "./parser.js"; import { Lexer } from "./parser.js";
import { MissingDataException } from "./core_utils.js"; import { MissingDataException } from "./core_utils.js";
import { Stream } from "./stream.js"; import { Stream } from "./stream.js";
@ -443,341 +444,27 @@ class IdentityCMap extends CMap {
} }
} }
const BinaryCMapReader = (function BinaryCMapReaderClosure() { function strToInt(str) {
function hexToInt(a, size) {
let n = 0;
for (let i = 0; i <= size; i++) {
n = (n << 8) | a[i];
}
return n >>> 0;
}
function hexToStr(a, size) {
// This code is hot. Special-case some common values to avoid creating an
// object with subarray().
if (size === 1) {
return String.fromCharCode(a[0], a[1]);
}
if (size === 3) {
return String.fromCharCode(a[0], a[1], a[2], a[3]);
}
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
}
function addHex(a, b, size) {
let c = 0;
for (let i = size; i >= 0; i--) {
c += a[i] + b[i];
a[i] = c & 255;
c >>= 8;
}
}
function incHex(a, size) {
let c = 1;
for (let i = size; i >= 0 && c > 0; i--) {
c += a[i];
a[i] = c & 255;
c >>= 8;
}
}
const MAX_NUM_SIZE = 16;
const MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
class BinaryCMapStream {
constructor(data) {
this.buffer = data;
this.pos = 0;
this.end = data.length;
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
}
readByte() {
if (this.pos >= this.end) {
return -1;
}
return this.buffer[this.pos++];
}
readNumber() {
let n = 0;
let last;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
n = (n << 7) | (b & 0x7f);
} while (!last);
return n;
}
readSigned() {
const n = this.readNumber();
return n & 1 ? ~(n >>> 1) : n >>> 1;
}
readHex(num, size) {
num.set(this.buffer.subarray(this.pos, this.pos + size + 1));
this.pos += size + 1;
}
readHexNumber(num, size) {
let last;
const stack = this.tmpBuf;
let sp = 0;
do {
const b = this.readByte();
if (b < 0) {
throw new FormatError("unexpected EOF in bcmap");
}
last = !(b & 0x80);
stack[sp++] = b & 0x7f;
} while (!last);
let i = size,
buffer = 0,
bufferSize = 0;
while (i >= 0) {
while (bufferSize < 8 && stack.length > 0) {
buffer |= stack[--sp] << bufferSize;
bufferSize += 7;
}
num[i] = buffer & 255;
i--;
buffer >>= 8;
bufferSize -= 8;
}
}
readHexSigned(num, size) {
this.readHexNumber(num, size);
const sign = num[size] & 1 ? 255 : 0;
let c = 0;
for (let i = 0; i <= size; i++) {
c = ((c & 1) << 8) | num[i];
num[i] = (c >> 1) ^ sign;
}
}
readString() {
const len = this.readNumber();
let s = "";
for (let i = 0; i < len; i++) {
s += String.fromCharCode(this.readNumber());
}
return s;
}
}
// eslint-disable-next-line no-shadow
class BinaryCMapReader {
async process(data, cMap, extend) {
const stream = new BinaryCMapStream(data);
const header = stream.readByte();
cMap.vertical = !!(header & 1);
let useCMap = null;
const start = new Uint8Array(MAX_NUM_SIZE);
const end = new Uint8Array(MAX_NUM_SIZE);
const char = new Uint8Array(MAX_NUM_SIZE);
const charCode = new Uint8Array(MAX_NUM_SIZE);
const tmp = new Uint8Array(MAX_NUM_SIZE);
let code;
let b;
while ((b = stream.readByte()) >= 0) {
const type = b >> 5;
if (type === 7) {
// metadata, e.g. comment or usecmap
switch (b & 0x1f) {
case 0:
stream.readString(); // skipping comment
break;
case 1:
useCMap = stream.readString();
break;
}
continue;
}
const sequence = !!(b & 0x10);
const dataSize = b & 15;
if (dataSize + 1 > MAX_NUM_SIZE) {
throw new Error("BinaryCMapReader.process: Invalid dataSize.");
}
const ucs2DataSize = 1;
const subitemsCount = stream.readNumber();
switch (type) {
case 0: // codespacerange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(
dataSize + 1,
hexToInt(start, dataSize),
hexToInt(end, dataSize)
);
}
break;
case 1: // notdefrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// undefined range, skipping
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
stream.readNumber(); // code
// nop
}
break;
case 2: // cidchar
stream.readHex(char, dataSize);
code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), code);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, dataSize);
if (!sequence) {
stream.readHexNumber(tmp, dataSize);
addHex(char, tmp, dataSize);
}
code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), code);
}
break;
case 3: // cidrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
if (!sequence) {
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapCidRange(
hexToInt(start, dataSize),
hexToInt(end, dataSize),
code
);
}
break;
case 4: // bfchar
stream.readHex(char, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(char, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(tmp, ucs2DataSize);
addHex(char, tmp, ucs2DataSize);
}
incHex(charCode, dataSize);
stream.readHexSigned(tmp, dataSize);
addHex(charCode, tmp, dataSize);
cMap.mapOne(
hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
case 5: // bfrange
stream.readHex(start, ucs2DataSize);
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
for (let i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(start, ucs2DataSize);
addHex(start, end, ucs2DataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapBfRange(
hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)
);
}
break;
default:
throw new Error(`BinaryCMapReader.process - unknown type: ${type}`);
}
}
if (useCMap) {
return extend(useCMap);
}
return cMap;
}
}
return BinaryCMapReader;
})();
const CMapFactory = (function CMapFactoryClosure() {
function strToInt(str) {
let a = 0; let a = 0;
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
a = (a << 8) | str.charCodeAt(i); a = (a << 8) | str.charCodeAt(i);
} }
return a >>> 0; return a >>> 0;
} }
function expectString(obj) { function expectString(obj) {
if (typeof obj !== "string") { if (typeof obj !== "string") {
throw new FormatError("Malformed CMap: expected string."); throw new FormatError("Malformed CMap: expected string.");
} }
} }
function expectInt(obj) { function expectInt(obj) {
if (!Number.isInteger(obj)) { if (!Number.isInteger(obj)) {
throw new FormatError("Malformed CMap: expected int."); throw new FormatError("Malformed CMap: expected int.");
} }
} }
function parseBfChar(cMap, lexer) { function parseBfChar(cMap, lexer) {
while (true) { while (true) {
let obj = lexer.getObj(); let obj = lexer.getObj();
if (obj === EOF) { if (obj === EOF) {
@ -794,9 +481,9 @@ const CMapFactory = (function CMapFactoryClosure() {
const dst = obj; const dst = obj;
cMap.mapOne(src, dst); cMap.mapOne(src, dst);
} }
} }
function parseBfRange(cMap, lexer) { function parseBfRange(cMap, lexer) {
while (true) { while (true) {
let obj = lexer.getObj(); let obj = lexer.getObj();
if (obj === EOF) { if (obj === EOF) {
@ -827,9 +514,9 @@ const CMapFactory = (function CMapFactoryClosure() {
} }
} }
throw new FormatError("Invalid bf range."); throw new FormatError("Invalid bf range.");
} }
function parseCidChar(cMap, lexer) { function parseCidChar(cMap, lexer) {
while (true) { while (true) {
let obj = lexer.getObj(); let obj = lexer.getObj();
if (obj === EOF) { if (obj === EOF) {
@ -845,9 +532,9 @@ const CMapFactory = (function CMapFactoryClosure() {
const dst = obj; const dst = obj;
cMap.mapOne(src, dst); cMap.mapOne(src, dst);
} }
} }
function parseCidRange(cMap, lexer) { function parseCidRange(cMap, lexer) {
while (true) { while (true) {
let obj = lexer.getObj(); let obj = lexer.getObj();
if (obj === EOF) { if (obj === EOF) {
@ -866,9 +553,9 @@ const CMapFactory = (function CMapFactoryClosure() {
const dstLow = obj; const dstLow = obj;
cMap.mapCidRange(low, high, dstLow); cMap.mapCidRange(low, high, dstLow);
} }
} }
function parseCodespaceRange(cMap, lexer) { function parseCodespaceRange(cMap, lexer) {
while (true) { while (true) {
let obj = lexer.getObj(); let obj = lexer.getObj();
if (obj === EOF) { if (obj === EOF) {
@ -889,23 +576,23 @@ const CMapFactory = (function CMapFactoryClosure() {
cMap.addCodespaceRange(obj.length, low, high); cMap.addCodespaceRange(obj.length, low, high);
} }
throw new FormatError("Invalid codespace range."); throw new FormatError("Invalid codespace range.");
} }
function parseWMode(cMap, lexer) { function parseWMode(cMap, lexer) {
const obj = lexer.getObj(); const obj = lexer.getObj();
if (Number.isInteger(obj)) { if (Number.isInteger(obj)) {
cMap.vertical = !!obj; cMap.vertical = !!obj;
} }
} }
function parseCMapName(cMap, lexer) { function parseCMapName(cMap, lexer) {
const obj = lexer.getObj(); const obj = lexer.getObj();
if (obj instanceof Name) { if (obj instanceof Name) {
cMap.name = obj.name; cMap.name = obj.name;
} }
} }
async function parseCMap(cMap, lexer, fetchBuiltInCMap, useCMap) { async function parseCMap(cMap, lexer, fetchBuiltInCMap, useCMap) {
let previous, embeddedUseCMap; let previous, embeddedUseCMap;
objLoop: while (true) { objLoop: while (true) {
try { try {
@ -963,9 +650,9 @@ const CMapFactory = (function CMapFactoryClosure() {
return extendCMap(cMap, fetchBuiltInCMap, useCMap); return extendCMap(cMap, fetchBuiltInCMap, useCMap);
} }
return cMap; return cMap;
} }
async function extendCMap(cMap, fetchBuiltInCMap, useCMap) { async function extendCMap(cMap, fetchBuiltInCMap, useCMap) {
cMap.useCMap = await createBuiltInCMap(useCMap, fetchBuiltInCMap); cMap.useCMap = await createBuiltInCMap(useCMap, fetchBuiltInCMap);
// If there aren't any code space ranges defined clone all the parent ones // If there aren't any code space ranges defined clone all the parent ones
// into this cMap. // into this cMap.
@ -985,9 +672,9 @@ const CMapFactory = (function CMapFactoryClosure() {
}); });
return cMap; return cMap;
} }
async function createBuiltInCMap(name, fetchBuiltInCMap) { async function createBuiltInCMap(name, fetchBuiltInCMap) {
if (name === "Identity-H") { if (name === "Identity-H") {
return new IdentityCMap(false, 2); return new IdentityCMap(false, 2);
} else if (name === "Identity-V") { } else if (name === "Identity-V") {
@ -1013,14 +700,10 @@ const CMapFactory = (function CMapFactoryClosure() {
return parseCMap(cMap, lexer, fetchBuiltInCMap, null); return parseCMap(cMap, lexer, fetchBuiltInCMap, null);
} }
throw new Error(`Invalid CMap "compressionType" value: ${compressionType}`); throw new Error(`Invalid CMap "compressionType" value: ${compressionType}`);
} }
return {
async create(params) {
const encoding = params.encoding;
const fetchBuiltInCMap = params.fetchBuiltInCMap;
const useCMap = params.useCMap;
class CMapFactory {
static async create({ encoding, fetchBuiltInCMap, useCMap }) {
if (encoding instanceof Name) { if (encoding instanceof Name) {
return createBuiltInCMap(encoding.name, fetchBuiltInCMap); return createBuiltInCMap(encoding.name, fetchBuiltInCMap);
} else if (encoding instanceof BaseStream) { } else if (encoding instanceof BaseStream) {
@ -1037,8 +720,7 @@ const CMapFactory = (function CMapFactoryClosure() {
return parsedCMap; return parsedCMap;
} }
throw new Error("Encoding required."); throw new Error("Encoding required.");
}, }
}; }
})();
export { CMap, CMapFactory, IdentityCMap }; export { CMap, CMapFactory, IdentityCMap };