From 687c9a871033de00657b6f64f429f921ad0dd10f Mon Sep 17 00:00:00 2001
From: Calixte Denizet <calixte.denizet@gmail.com>
Date: Sat, 9 Apr 2022 20:18:29 +0200
Subject: [PATCH] Improve performance of applyMaskImageData - write some uint32
 instead of uint8 to avoid the check before clamping; - unroll the loop to
 write data in the buffer - but keep a loop for the last element of a line: it
 likely doesn't hurt   that much since it's executed only for one time for
 each line; - I tested on a macbook with an Apple chip, and on Firefox nightly
 the new   code is almost 3.5x faster than before (~1.8x with Chrome).

---
 src/shared/image_utils.js | 42 +++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/src/shared/image_utils.js b/src/shared/image_utils.js
index 69d843467..ecc07bc36 100644
--- a/src/shared/image_utils.js
+++ b/src/shared/image_utils.js
@@ -13,30 +13,42 @@
  * limitations under the License.
  */
 
+import { FeatureTest } from "./util.js";
+
 function applyMaskImageData({
   src,
   srcPos = 0,
   dest,
-  destPos = 3,
+  destPos = 0,
   width,
   height,
   inverseDecode = false,
 }) {
-  const srcLength = src.byteLength;
-  const zeroMapping = inverseDecode ? 0 : 255;
-  const oneMapping = inverseDecode ? 255 : 0;
+  const opaque = FeatureTest.isLittleEndian ? 0xff000000 : 0x000000ff;
+  const [zeroMapping, oneMapping] = !inverseDecode ? [opaque, 0] : [0, opaque];
+  const widthInSource = width >> 3;
+  const widthRemainder = width & 7;
+  const srcLength = src.length;
+  dest = new Uint32Array(dest.buffer);
 
-  for (let j = 0; j < height; j++) {
-    let elem,
-      mask = 0;
-    for (let k = 0; k < width; k++) {
-      if (mask === 0) {
-        elem = srcPos < srcLength ? src[srcPos++] : 255;
-        mask = 128;
-      }
-      dest[destPos] = elem & mask ? oneMapping : zeroMapping;
-      destPos += 4;
-      mask >>= 1;
+  for (let i = 0; i < height; i++) {
+    for (const max = srcPos + widthInSource; srcPos < max; srcPos++) {
+      const elem = srcPos < srcLength ? src[srcPos] : 255;
+      dest[destPos++] = elem & 0b10000000 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b1000000 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b100000 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b10000 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b1000 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b100 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b10 ? oneMapping : zeroMapping;
+      dest[destPos++] = elem & 0b1 ? oneMapping : zeroMapping;
+    }
+    if (widthRemainder === 0) {
+      continue;
+    }
+    const elem = srcPos < srcLength ? src[srcPos++] : 255;
+    for (let j = 0; j < widthRemainder; j++) {
+      dest[destPos++] = elem & (1 << (7 - j)) ? oneMapping : zeroMapping;
     }
   }