Merge pull request #13754 from Snuffleupagus/refactor-PDFFunction

Remove the IR (internal representation) part of the `PDFFunction` parsing
This commit is contained in:
Tim van der Meij 2021-07-18 13:11:26 +02:00 committed by GitHub
commit e04386c675
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -131,492 +131,377 @@ function toNumberArray(arr) {
return arr;
}
const PDFFunction = (function PDFFunctionClosure() {
const CONSTRUCT_SAMPLED = 0;
const CONSTRUCT_INTERPOLATED = 2;
const CONSTRUCT_STICHED = 3;
const CONSTRUCT_POSTSCRIPT = 4;
class PDFFunction {
static getSampleArray(size, outputSize, bps, stream) {
let i, ii;
let length = 1;
for (i = 0, ii = size.length; i < ii; i++) {
length *= size[i];
}
length *= outputSize;
return {
getSampleArray(size, outputSize, bps, stream) {
let i, ii;
let length = 1;
for (i = 0, ii = size.length; i < ii; i++) {
length *= size[i];
const array = new Array(length);
let codeSize = 0;
let codeBuf = 0;
// 32 is a valid bps so shifting won't work
const sampleMul = 1.0 / (2.0 ** bps - 1);
const strBytes = stream.getBytes((length * bps + 7) / 8);
let strIdx = 0;
for (i = 0; i < length; i++) {
while (codeSize < bps) {
codeBuf <<= 8;
codeBuf |= strBytes[strIdx++];
codeSize += 8;
}
length *= outputSize;
codeSize -= bps;
array[i] = (codeBuf >> codeSize) * sampleMul;
codeBuf &= (1 << codeSize) - 1;
}
return array;
}
const array = new Array(length);
let codeSize = 0;
let codeBuf = 0;
// 32 is a valid bps so shifting won't work
const sampleMul = 1.0 / (2.0 ** bps - 1);
static parse({ xref, isEvalSupported, fn }) {
const dict = fn.dict || fn;
const typeNum = dict.get("FunctionType");
const strBytes = stream.getBytes((length * bps + 7) / 8);
let strIdx = 0;
for (i = 0; i < length; i++) {
while (codeSize < bps) {
codeBuf <<= 8;
codeBuf |= strBytes[strIdx++];
codeSize += 8;
}
codeSize -= bps;
array[i] = (codeBuf >> codeSize) * sampleMul;
codeBuf &= (1 << codeSize) - 1;
switch (typeNum) {
case 0:
return this.constructSampled({ xref, isEvalSupported, fn, dict });
case 1:
break;
case 2:
return this.constructInterpolated({ xref, isEvalSupported, dict });
case 3:
return this.constructStiched({ xref, isEvalSupported, dict });
case 4:
return this.constructPostScript({ xref, isEvalSupported, fn, dict });
}
throw new FormatError("Unknown type of function");
}
static parseArray({ xref, isEvalSupported, fnObj }) {
if (!Array.isArray(fnObj)) {
// not an array -- parsing as regular function
return this.parse({ xref, isEvalSupported, fn: fnObj });
}
const fnArray = [];
for (let j = 0, jj = fnObj.length; j < jj; j++) {
fnArray.push(
this.parse({ xref, isEvalSupported, fn: xref.fetchIfRef(fnObj[j]) })
);
}
return function (src, srcOffset, dest, destOffset) {
for (let i = 0, ii = fnArray.length; i < ii; i++) {
fnArray[i](src, srcOffset, dest, destOffset + i);
}
return array;
},
};
}
getIR({ xref, isEvalSupported, fn }) {
let dict = fn.dict;
if (!dict) {
dict = fn;
static constructSampled({ xref, isEvalSupported, fn, dict }) {
function toMultiArray(arr) {
const inputLength = arr.length;
const out = [];
let index = 0;
for (let i = 0; i < inputLength; i += 2) {
out[index++] = [arr[i], arr[i + 1]];
}
return out;
}
// See chapter 3, page 109 of the PDF reference
function interpolate(x, xmin, xmax, ymin, ymax) {
return ymin + (x - xmin) * ((ymax - ymin) / (xmax - xmin));
}
let domain = toNumberArray(dict.getArray("Domain"));
let range = toNumberArray(dict.getArray("Range"));
if (!domain || !range) {
throw new FormatError("No domain or range");
}
const inputSize = domain.length / 2;
const outputSize = range.length / 2;
domain = toMultiArray(domain);
range = toMultiArray(range);
const size = toNumberArray(dict.getArray("Size"));
const bps = dict.get("BitsPerSample");
const order = dict.get("Order") || 1;
if (order !== 1) {
// No description how cubic spline interpolation works in PDF32000:2008
// As in poppler, ignoring order, linear interpolation may work as good
info("No support for cubic spline interpolation: " + order);
}
let encode = toNumberArray(dict.getArray("Encode"));
if (!encode) {
encode = [];
for (let i = 0; i < inputSize; ++i) {
encode.push([0, size[i] - 1]);
}
} else {
encode = toMultiArray(encode);
}
let decode = toNumberArray(dict.getArray("Decode"));
if (!decode) {
decode = range;
} else {
decode = toMultiArray(decode);
}
const samples = this.getSampleArray(size, outputSize, bps, fn);
// const mask = 2 ** bps - 1;
return function constructSampledFn(src, srcOffset, dest, destOffset) {
// See chapter 3, page 110 of the PDF reference.
// Building the cube vertices: its part and sample index
// http://rjwagner49.com/Mathematics/Interpolation.pdf
const cubeVertices = 1 << inputSize;
const cubeN = new Float64Array(cubeVertices);
const cubeVertex = new Uint32Array(cubeVertices);
let i, j;
for (j = 0; j < cubeVertices; j++) {
cubeN[j] = 1;
}
const types = [
this.constructSampled,
null,
this.constructInterpolated,
this.constructStiched,
this.constructPostScript,
];
const typeNum = dict.get("FunctionType");
const typeFn = types[typeNum];
if (!typeFn) {
throw new FormatError("Unknown type of function");
}
return typeFn.call(this, { xref, isEvalSupported, fn, dict });
},
fromIR({ xref, isEvalSupported, IR }) {
const type = IR[0];
switch (type) {
case CONSTRUCT_SAMPLED:
return this.constructSampledFromIR({ xref, isEvalSupported, IR });
case CONSTRUCT_INTERPOLATED:
return this.constructInterpolatedFromIR({
xref,
isEvalSupported,
IR,
});
case CONSTRUCT_STICHED:
return this.constructStichedFromIR({ xref, isEvalSupported, IR });
// case CONSTRUCT_POSTSCRIPT:
default:
return this.constructPostScriptFromIR({ xref, isEvalSupported, IR });
}
},
parse({ xref, isEvalSupported, fn }) {
const IR = this.getIR({ xref, isEvalSupported, fn });
return this.fromIR({ xref, isEvalSupported, IR });
},
parseArray({ xref, isEvalSupported, fnObj }) {
if (!Array.isArray(fnObj)) {
// not an array -- parsing as regular function
return this.parse({ xref, isEvalSupported, fn: fnObj });
}
const fnArray = [];
for (let j = 0, jj = fnObj.length; j < jj; j++) {
fnArray.push(
this.parse({ xref, isEvalSupported, fn: xref.fetchIfRef(fnObj[j]) })
let k = outputSize,
pos = 1;
// Map x_i to y_j for 0 <= i < m using the sampled function.
for (i = 0; i < inputSize; ++i) {
// x_i' = min(max(x_i, Domain_2i), Domain_2i+1)
const domain_2i = domain[i][0];
const domain_2i_1 = domain[i][1];
const xi = Math.min(
Math.max(src[srcOffset + i], domain_2i),
domain_2i_1
);
}
return function (src, srcOffset, dest, destOffset) {
for (let i = 0, ii = fnArray.length; i < ii; i++) {
fnArray[i](src, srcOffset, dest, destOffset + i);
}
};
},
constructSampled({ xref, isEvalSupported, fn, dict }) {
function toMultiArray(arr) {
const inputLength = arr.length;
const out = [];
let index = 0;
for (let i = 0; i < inputLength; i += 2) {
out[index] = [arr[i], arr[i + 1]];
++index;
}
return out;
}
let domain = toNumberArray(dict.getArray("Domain"));
let range = toNumberArray(dict.getArray("Range"));
// e_i = Interpolate(x_i', Domain_2i, Domain_2i+1,
// Encode_2i, Encode_2i+1)
let e = interpolate(
xi,
domain_2i,
domain_2i_1,
encode[i][0],
encode[i][1]
);
if (!domain || !range) {
throw new FormatError("No domain or range");
}
// e_i' = min(max(e_i, 0), Size_i - 1)
const size_i = size[i];
e = Math.min(Math.max(e, 0), size_i - 1);
const inputSize = domain.length / 2;
const outputSize = range.length / 2;
domain = toMultiArray(domain);
range = toMultiArray(range);
const size = toNumberArray(dict.getArray("Size"));
const bps = dict.get("BitsPerSample");
const order = dict.get("Order") || 1;
if (order !== 1) {
// No description how cubic spline interpolation works in PDF32000:2008
// As in poppler, ignoring order, linear interpolation may work as good
info("No support for cubic spline interpolation: " + order);
}
let encode = toNumberArray(dict.getArray("Encode"));
if (!encode) {
encode = [];
for (let i = 0; i < inputSize; ++i) {
encode.push([0, size[i] - 1]);
}
} else {
encode = toMultiArray(encode);
}
let decode = toNumberArray(dict.getArray("Decode"));
if (!decode) {
decode = range;
} else {
decode = toMultiArray(decode);
}
const samples = this.getSampleArray(size, outputSize, bps, fn);
return [
CONSTRUCT_SAMPLED,
inputSize,
domain,
encode,
decode,
samples,
size,
outputSize,
2 ** bps - 1,
range,
];
},
constructSampledFromIR({ xref, isEvalSupported, IR }) {
// See chapter 3, page 109 of the PDF reference
function interpolate(x, xmin, xmax, ymin, ymax) {
return ymin + (x - xmin) * ((ymax - ymin) / (xmax - xmin));
}
return function constructSampledFromIRResult(
src,
srcOffset,
dest,
destOffset
) {
// See chapter 3, page 110 of the PDF reference.
const m = IR[1];
const domain = IR[2];
const encode = IR[3];
const decode = IR[4];
const samples = IR[5];
const size = IR[6];
const n = IR[7];
// var mask = IR[8];
const range = IR[9];
// Building the cube vertices: its part and sample index
// http://rjwagner49.com/Mathematics/Interpolation.pdf
const cubeVertices = 1 << m;
const cubeN = new Float64Array(cubeVertices);
const cubeVertex = new Uint32Array(cubeVertices);
let i, j;
// Adjusting the cube: N and vertex sample index
const e0 = e < size_i - 1 ? Math.floor(e) : e - 1; // e1 = e0 + 1;
const n0 = e0 + 1 - e; // (e1 - e) / (e1 - e0);
const n1 = e - e0; // (e - e0) / (e1 - e0);
const offset0 = e0 * k;
const offset1 = offset0 + k; // e1 * k
for (j = 0; j < cubeVertices; j++) {
cubeN[j] = 1;
}
let k = n,
pos = 1;
// Map x_i to y_j for 0 <= i < m using the sampled function.
for (i = 0; i < m; ++i) {
// x_i' = min(max(x_i, Domain_2i), Domain_2i+1)
const domain_2i = domain[i][0];
const domain_2i_1 = domain[i][1];
const xi = Math.min(
Math.max(src[srcOffset + i], domain_2i),
domain_2i_1
);
// e_i = Interpolate(x_i', Domain_2i, Domain_2i+1,
// Encode_2i, Encode_2i+1)
let e = interpolate(
xi,
domain_2i,
domain_2i_1,
encode[i][0],
encode[i][1]
);
// e_i' = min(max(e_i, 0), Size_i - 1)
const size_i = size[i];
e = Math.min(Math.max(e, 0), size_i - 1);
// Adjusting the cube: N and vertex sample index
const e0 = e < size_i - 1 ? Math.floor(e) : e - 1; // e1 = e0 + 1;
const n0 = e0 + 1 - e; // (e1 - e) / (e1 - e0);
const n1 = e - e0; // (e - e0) / (e1 - e0);
const offset0 = e0 * k;
const offset1 = offset0 + k; // e1 * k
for (j = 0; j < cubeVertices; j++) {
if (j & pos) {
cubeN[j] *= n1;
cubeVertex[j] += offset1;
} else {
cubeN[j] *= n0;
cubeVertex[j] += offset0;
}
}
k *= size_i;
pos <<= 1;
}
for (j = 0; j < n; ++j) {
// Sum all cube vertices' samples portions
let rj = 0;
for (i = 0; i < cubeVertices; i++) {
rj += samples[cubeVertex[i] + j] * cubeN[i];
}
// r_j' = Interpolate(r_j, 0, 2^BitsPerSample - 1,
// Decode_2j, Decode_2j+1)
rj = interpolate(rj, 0, 1, decode[j][0], decode[j][1]);
// y_j = min(max(r_j, range_2j), range_2j+1)
dest[destOffset + j] = Math.min(
Math.max(rj, range[j][0]),
range[j][1]
);
}
};
},
constructInterpolated({ xref, isEvalSupported, fn, dict }) {
const c0 = toNumberArray(dict.getArray("C0")) || [0];
const c1 = toNumberArray(dict.getArray("C1")) || [1];
const n = dict.get("N");
const length = c0.length;
const diff = [];
for (let i = 0; i < length; ++i) {
diff.push(c1[i] - c0[i]);
}
return [CONSTRUCT_INTERPOLATED, c0, diff, n];
},
constructInterpolatedFromIR({ xref, isEvalSupported, IR }) {
const c0 = IR[1];
const diff = IR[2];
const n = IR[3];
const length = diff.length;
return function constructInterpolatedFromIRResult(
src,
srcOffset,
dest,
destOffset
) {
const x = n === 1 ? src[srcOffset] : src[srcOffset] ** n;
for (let j = 0; j < length; ++j) {
dest[destOffset + j] = c0[j] + x * diff[j];
}
};
},
constructStiched({ xref, isEvalSupported, fn, dict }) {
const domain = toNumberArray(dict.getArray("Domain"));
if (!domain) {
throw new FormatError("No domain");
}
const inputSize = domain.length / 2;
if (inputSize !== 1) {
throw new FormatError("Bad domain for stiched function");
}
const fnRefs = dict.get("Functions");
const fns = [];
for (let i = 0, ii = fnRefs.length; i < ii; ++i) {
fns.push(
this.parse({ xref, isEvalSupported, fn: xref.fetchIfRef(fnRefs[i]) })
);
}
const bounds = toNumberArray(dict.getArray("Bounds"));
const encode = toNumberArray(dict.getArray("Encode"));
return [CONSTRUCT_STICHED, domain, bounds, encode, fns];
},
constructStichedFromIR({ xref, isEvalSupported, IR }) {
const domain = IR[1];
const bounds = IR[2];
const encode = IR[3];
const fns = IR[4];
const tmpBuf = new Float32Array(1);
return function constructStichedFromIRResult(
src,
srcOffset,
dest,
destOffset
) {
const clip = function constructStichedFromIRClip(v, min, max) {
if (v > max) {
v = max;
} else if (v < min) {
v = min;
}
return v;
};
// clip to domain
const v = clip(src[srcOffset], domain[0], domain[1]);
// calculate which bound the value is in
const length = bounds.length;
let i;
for (i = 0; i < length; ++i) {
if (v < bounds[i]) {
break;
}
}
// encode value into domain of function
let dmin = domain[0];
if (i > 0) {
dmin = bounds[i - 1];
}
let dmax = domain[1];
if (i < bounds.length) {
dmax = bounds[i];
}
const rmin = encode[2 * i];
const rmax = encode[2 * i + 1];
// Prevent the value from becoming NaN as a result
// of division by zero (fixes issue6113.pdf).
tmpBuf[0] =
dmin === dmax
? rmin
: rmin + ((v - dmin) * (rmax - rmin)) / (dmax - dmin);
// call the appropriate function
fns[i](tmpBuf, 0, dest, destOffset);
};
},
constructPostScript({ xref, isEvalSupported, fn, dict }) {
const domain = toNumberArray(dict.getArray("Domain"));
const range = toNumberArray(dict.getArray("Range"));
if (!domain) {
throw new FormatError("No domain.");
}
if (!range) {
throw new FormatError("No range.");
}
const lexer = new PostScriptLexer(fn);
const parser = new PostScriptParser(lexer);
const code = parser.parse();
return [CONSTRUCT_POSTSCRIPT, domain, range, code];
},
constructPostScriptFromIR({ xref, isEvalSupported, IR }) {
const domain = IR[1];
const range = IR[2];
const code = IR[3];
if (isEvalSupported && IsEvalSupportedCached.value) {
const compiled = new PostScriptCompiler().compile(code, domain, range);
if (compiled) {
// Compiled function consists of simple expressions such as addition,
// subtraction, Math.max, and also contains 'var' and 'return'
// statements. See the generation in the PostScriptCompiler below.
// eslint-disable-next-line no-new-func
return new Function(
"src",
"srcOffset",
"dest",
"destOffset",
compiled
);
}
}
info("Unable to compile PS function");
const numOutputs = range.length >> 1;
const numInputs = domain.length >> 1;
const evaluator = new PostScriptEvaluator(code);
// Cache the values for a big speed up, the cache size is limited though
// since the number of possible values can be huge from a PS function.
const cache = Object.create(null);
// The MAX_CACHE_SIZE is set to ~4x the maximum number of distinct values
// seen in our tests.
const MAX_CACHE_SIZE = 2048 * 4;
let cache_available = MAX_CACHE_SIZE;
const tmpBuf = new Float32Array(numInputs);
return function constructPostScriptFromIRResult(
src,
srcOffset,
dest,
destOffset
) {
let i, value;
let key = "";
const input = tmpBuf;
for (i = 0; i < numInputs; i++) {
value = src[srcOffset + i];
input[i] = value;
key += value + "_";
}
const cachedValue = cache[key];
if (cachedValue !== undefined) {
dest.set(cachedValue, destOffset);
return;
}
const output = new Float32Array(numOutputs);
const stack = evaluator.execute(input);
const stackIndex = stack.length - numOutputs;
for (i = 0; i < numOutputs; i++) {
value = stack[stackIndex + i];
let bound = range[i * 2];
if (value < bound) {
value = bound;
if (j & pos) {
cubeN[j] *= n1;
cubeVertex[j] += offset1;
} else {
bound = range[i * 2 + 1];
if (value > bound) {
value = bound;
}
cubeN[j] *= n0;
cubeVertex[j] += offset0;
}
output[i] = value;
}
if (cache_available > 0) {
cache_available--;
cache[key] = output;
k *= size_i;
pos <<= 1;
}
for (j = 0; j < outputSize; ++j) {
// Sum all cube vertices' samples portions
let rj = 0;
for (i = 0; i < cubeVertices; i++) {
rj += samples[cubeVertex[i] + j] * cubeN[i];
}
dest.set(output, destOffset);
// r_j' = Interpolate(r_j, 0, 2^BitsPerSample - 1,
// Decode_2j, Decode_2j+1)
rj = interpolate(rj, 0, 1, decode[j][0], decode[j][1]);
// y_j = min(max(r_j, range_2j), range_2j+1)
dest[destOffset + j] = Math.min(Math.max(rj, range[j][0]), range[j][1]);
}
};
}
static constructInterpolated({ xref, isEvalSupported, dict }) {
const c0 = toNumberArray(dict.getArray("C0")) || [0];
const c1 = toNumberArray(dict.getArray("C1")) || [1];
const n = dict.get("N");
const diff = [];
for (let i = 0, ii = c0.length; i < ii; ++i) {
diff.push(c1[i] - c0[i]);
}
const length = diff.length;
return function constructInterpolatedFn(src, srcOffset, dest, destOffset) {
const x = n === 1 ? src[srcOffset] : src[srcOffset] ** n;
for (let j = 0; j < length; ++j) {
dest[destOffset + j] = c0[j] + x * diff[j];
}
};
}
static constructStiched({ xref, isEvalSupported, dict }) {
const domain = toNumberArray(dict.getArray("Domain"));
if (!domain) {
throw new FormatError("No domain");
}
const inputSize = domain.length / 2;
if (inputSize !== 1) {
throw new FormatError("Bad domain for stiched function");
}
const fnRefs = dict.get("Functions");
const fns = [];
for (let i = 0, ii = fnRefs.length; i < ii; ++i) {
fns.push(
this.parse({ xref, isEvalSupported, fn: xref.fetchIfRef(fnRefs[i]) })
);
}
const bounds = toNumberArray(dict.getArray("Bounds"));
const encode = toNumberArray(dict.getArray("Encode"));
const tmpBuf = new Float32Array(1);
return function constructStichedFn(src, srcOffset, dest, destOffset) {
const clip = function constructStichedFromIRClip(v, min, max) {
if (v > max) {
v = max;
} else if (v < min) {
v = min;
}
return v;
};
},
};
})();
// clip to domain
const v = clip(src[srcOffset], domain[0], domain[1]);
// calculate which bound the value is in
const length = bounds.length;
let i;
for (i = 0; i < length; ++i) {
if (v < bounds[i]) {
break;
}
}
// encode value into domain of function
let dmin = domain[0];
if (i > 0) {
dmin = bounds[i - 1];
}
let dmax = domain[1];
if (i < bounds.length) {
dmax = bounds[i];
}
const rmin = encode[2 * i];
const rmax = encode[2 * i + 1];
// Prevent the value from becoming NaN as a result
// of division by zero (fixes issue6113.pdf).
tmpBuf[0] =
dmin === dmax
? rmin
: rmin + ((v - dmin) * (rmax - rmin)) / (dmax - dmin);
// call the appropriate function
fns[i](tmpBuf, 0, dest, destOffset);
};
}
static constructPostScript({ xref, isEvalSupported, fn, dict }) {
const domain = toNumberArray(dict.getArray("Domain"));
const range = toNumberArray(dict.getArray("Range"));
if (!domain) {
throw new FormatError("No domain.");
}
if (!range) {
throw new FormatError("No range.");
}
const lexer = new PostScriptLexer(fn);
const parser = new PostScriptParser(lexer);
const code = parser.parse();
if (isEvalSupported && IsEvalSupportedCached.value) {
const compiled = new PostScriptCompiler().compile(code, domain, range);
if (compiled) {
// Compiled function consists of simple expressions such as addition,
// subtraction, Math.max, and also contains 'var' and 'return'
// statements. See the generation in the PostScriptCompiler below.
// eslint-disable-next-line no-new-func
return new Function("src", "srcOffset", "dest", "destOffset", compiled);
}
}
info("Unable to compile PS function");
const numOutputs = range.length >> 1;
const numInputs = domain.length >> 1;
const evaluator = new PostScriptEvaluator(code);
// Cache the values for a big speed up, the cache size is limited though
// since the number of possible values can be huge from a PS function.
const cache = Object.create(null);
// The MAX_CACHE_SIZE is set to ~4x the maximum number of distinct values
// seen in our tests.
const MAX_CACHE_SIZE = 2048 * 4;
let cache_available = MAX_CACHE_SIZE;
const tmpBuf = new Float32Array(numInputs);
return function constructPostScriptFn(src, srcOffset, dest, destOffset) {
let i, value;
let key = "";
const input = tmpBuf;
for (i = 0; i < numInputs; i++) {
value = src[srcOffset + i];
input[i] = value;
key += value + "_";
}
const cachedValue = cache[key];
if (cachedValue !== undefined) {
dest.set(cachedValue, destOffset);
return;
}
const output = new Float32Array(numOutputs);
const stack = evaluator.execute(input);
const stackIndex = stack.length - numOutputs;
for (i = 0; i < numOutputs; i++) {
value = stack[stackIndex + i];
let bound = range[i * 2];
if (value < bound) {
value = bound;
} else {
bound = range[i * 2 + 1];
if (value > bound) {
value = bound;
}
}
output[i] = value;
}
if (cache_available > 0) {
cache_available--;
cache[key] = output;
}
dest.set(output, destOffset);
};
}
}
function isPDFFunction(v) {
let fnDict;