Merge pull request #4264 from fkaelberer/FasterJPXdecoding

Faster JPX decoding
This commit is contained in:
Yury Delendik 2014-02-24 13:53:34 -06:00
commit 5de3e55f70

View File

@ -1692,18 +1692,18 @@ var JpxImage = (function JpxImageClosure() {
} }
return ll; return ll;
}; };
Transform.prototype.expand = function expand(buffer, bufferPadding, step) { Transform.prototype.extend = function extend(buffer, offset, size) {
// Section F.3.7 extending... using max extension of 4 // Section F.3.7 extending... using max extension of 4
var i1 = bufferPadding - 1, j1 = bufferPadding + 1; var i1 = offset - 1, j1 = offset + 1;
var i2 = bufferPadding + step - 2, j2 = bufferPadding + step; var i2 = offset + size - 2, j2 = offset + size;
buffer[i1--] = buffer[j1++];
buffer[j2++] = buffer[i2--];
buffer[i1--] = buffer[j1++]; buffer[i1--] = buffer[j1++];
buffer[j2++] = buffer[i2--]; buffer[j2++] = buffer[i2--];
buffer[i1--] = buffer[j1++]; buffer[i1--] = buffer[j1++];
buffer[j2++] = buffer[i2--]; buffer[j2++] = buffer[i2--];
buffer[i1--] = buffer[j1++]; buffer[i1--] = buffer[j1++];
buffer[j2++] = buffer[i2--]; buffer[j2++] = buffer[i2--];
buffer[i1] = buffer[j1];
buffer[j2] = buffer[i2];
}; };
Transform.prototype.iterate = function Transform_iterate(ll, hl, lh, hh, Transform.prototype.iterate = function Transform_iterate(ll, hl, lh, hh,
u0, v0) { u0, v0) {
@ -1716,32 +1716,35 @@ var JpxImage = (function JpxImageClosure() {
var width = llWidth + hlWidth; var width = llWidth + hlWidth;
var height = llHeight + lhHeight; var height = llHeight + lhHeight;
var items = new Float32Array(width * height); var items = new Float32Array(width * height);
for (var i = 0, ii = llHeight; i < ii; i++) { var i, j, k, l;
for (i = 0; i < llHeight; i++) {
var k = i * llWidth, l = i * 2 * width; var k = i * llWidth, l = i * 2 * width;
for (var j = 0, jj = llWidth; j < jj; j++, k++, l += 2) for (var j = 0; j < llWidth; j++, k++, l += 2) {
items[l] = llItems[k]; items[l] = llItems[k];
} }
for (var i = 0, ii = hlHeight; i < ii; i++) { }
var k = i * hlWidth, l = i * 2 * width + 1; for (i = 0; i < hlHeight; i++) {
for (var j = 0, jj = hlWidth; j < jj; j++, k++, l += 2) k = i * hlWidth, l = i * 2 * width + 1;
for (j = 0; j < hlWidth; j++, k++, l += 2) {
items[l] = hlItems[k]; items[l] = hlItems[k];
} }
for (var i = 0, ii = lhHeight; i < ii; i++) { }
var k = i * lhWidth, l = (i * 2 + 1) * width; for (i = 0; i < lhHeight; i++) {
for (var j = 0, jj = lhWidth; j < jj; j++, k++, l += 2) k = i * lhWidth, l = (i * 2 + 1) * width;
for (j = 0; j < lhWidth; j++, k++, l += 2) {
items[l] = lhItems[k]; items[l] = lhItems[k];
} }
for (var i = 0, ii = hhHeight; i < ii; i++) { }
var k = i * hhWidth, l = (i * 2 + 1) * width + 1; for (i = 0; i < hhHeight; i++) {
for (var j = 0, jj = hhWidth; j < jj; j++, k++, l += 2) k = i * hhWidth, l = (i * 2 + 1) * width + 1;
for (j = 0; j < hhWidth; j++, k++, l += 2) {
items[l] = hhItems[k]; items[l] = hhItems[k];
} }
}
var bufferPadding = 4; var bufferPadding = 4;
var bufferLength = new Float32Array(Math.max(width, height) + var rowBuffer = new Float32Array(width + 2 * bufferPadding);
2 * bufferPadding);
var buffer = new Float32Array(bufferLength);
var bufferOut = new Float32Array(bufferLength);
// Section F.3.4 HOR_SR // Section F.3.4 HOR_SR
for (var v = 0; v < height; v++) { for (var v = 0; v < height; v++) {
@ -1752,21 +1755,28 @@ var JpxImage = (function JpxImageClosure() {
} }
continue; continue;
} }
var k = v * width;
var l = bufferPadding;
for (var u = 0; u < width; u++, k++, l++)
buffer[l] = items[k];
this.expand(buffer, bufferPadding, width);
this.filter(buffer, bufferPadding, width, u0, bufferOut);
k = v * width; k = v * width;
l = bufferPadding; rowBuffer.set(items.subarray(k, k + width), bufferPadding);
for (var u = 0; u < width; u++, k++, l++)
items[k] = bufferOut[l]; this.extend(rowBuffer, bufferPadding, width);
this.filter(rowBuffer, bufferPadding, width, u0, rowBuffer);
items.set(rowBuffer.subarray(bufferPadding, bufferPadding + width), k);
} }
// Accesses to the items array can take long, because it may not fit into
// CPU cache and has to be fetched from main memory. Since subsequent
// accesses to the items array are not local when reading columns, we
// have a cache miss every time. To reduce cache misses, get up to
// 'numBuffers' items at a time and store them into the individual
// buffers. The colBuffers should be small enough to fit into CPU cache.
var numBuffers = 16;
var colBuffers = [];
for (i = 0; i < numBuffers; i++) {
colBuffers.push(new Float32Array(height + 2 * bufferPadding));
}
var b, currentBuffer = 0, ll = bufferPadding + height;
// Section F.3.5 VER_SR // Section F.3.5 VER_SR
for (var u = 0; u < width; u++) { for (var u = 0; u < width; u++) {
if (height == 1) { if (height == 1) {
@ -1777,19 +1787,33 @@ var JpxImage = (function JpxImageClosure() {
continue; continue;
} }
var k = u; // if we ran out of buffers, copy several image columns at once
var l = bufferPadding; if (currentBuffer === 0) {
for (var v = 0; v < height; v++, k += width, l++) numBuffers = Math.min(width - u, numBuffers);
buffer[l] = items[k]; for (k = u, l = bufferPadding; l < ll; k += width, l++) {
for (b = 0; b < numBuffers; b++) {
this.expand(buffer, bufferPadding, height); colBuffers[b][l] = items[k + b];
this.filter(buffer, bufferPadding, height, v0, bufferOut);
k = u;
l = bufferPadding;
for (var v = 0; v < height; v++, k += width, l++)
items[k] = bufferOut[l];
} }
}
currentBuffer = numBuffers;
}
currentBuffer--;
var buffer = colBuffers[currentBuffer];
this.extend(buffer, bufferPadding, height);
this.filter(buffer, bufferPadding, height, v0, buffer);
// If this is last buffer in this group of buffers, flush all buffers.
if (currentBuffer === 0) {
k = u - numBuffers + 1;
for (l = bufferPadding; l < ll; k += width, l++) {
for (b = 0; b < numBuffers; b++) {
items[k + b] = colBuffers[b][l];
}
}
}
}
return { return {
width: width, width: width,
height: height, height: height,