From 3f031f69c270bedd3c404e752dc12a72324aca99 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Tue, 7 Jan 2020 19:59:16 +0100
Subject: [PATCH 1/2] Move additional worker-thread only functions from
 `src/shared/util.js` and into a `src/core/core_utils.js` instead

This moves the `log2`, `readInt8`, `readUint16`, `readUint32`, and `isSpace` functions since they are only used in the worker-thread.
---
 src/core/core_utils.js       | 38 ++++++++++++++++++++++++++++++++++++
 src/core/document.js         |  2 +-
 src/core/fonts.js            |  4 +---
 src/core/jbig2.js            | 10 ++--------
 src/core/jpx.js              | 10 ++--------
 src/core/parser.js           |  3 +--
 src/core/ps_parser.js        |  3 ++-
 src/core/stream.js           |  8 ++------
 src/core/type1_parser.js     |  3 ++-
 src/shared/util.js           | 38 ------------------------------------
 test/unit/core_utils_spec.js | 31 +++++++++++++++++++++++++++++
 test/unit/util_spec.js       | 31 -----------------------------
 12 files changed, 82 insertions(+), 99 deletions(-)

diff --git a/src/core/core_utils.js b/src/core/core_utils.js
index f754e68c7..1be5c978a 100644
--- a/src/core/core_utils.js
+++ b/src/core/core_utils.js
@@ -132,6 +132,39 @@ function toRomanNumerals(number, lowerCase = false) {
   return lowerCase ? romanStr.toLowerCase() : romanStr;
 }
 
+// Calculate the base 2 logarithm of the number `x`. This differs from the
+// native function in the sense that it returns the ceiling value and that it
+// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
+function log2(x) {
+  if (x <= 0) {
+    return 0;
+  }
+  return Math.ceil(Math.log2(x));
+}
+
+function readInt8(data, offset) {
+  return (data[offset] << 24) >> 24;
+}
+
+function readUint16(data, offset) {
+  return (data[offset] << 8) | data[offset + 1];
+}
+
+function readUint32(data, offset) {
+  return (
+    ((data[offset] << 24) |
+      (data[offset + 1] << 16) |
+      (data[offset + 2] << 8) |
+      data[offset + 3]) >>>
+    0
+  );
+}
+
+// Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
+function isSpace(ch) {
+  return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
+}
+
 export {
   getLookupTableFactory,
   MissingDataException,
@@ -139,4 +172,9 @@ export {
   XRefParseException,
   getInheritableProperty,
   toRomanNumerals,
+  log2,
+  readInt8,
+  readUint16,
+  readUint32,
+  isSpace,
 };
diff --git a/src/core/document.js b/src/core/document.js
index 420fa00c3..b58f8f352 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -23,7 +23,6 @@ import {
   isArrayEqual,
   isBool,
   isNum,
-  isSpace,
   isString,
   OPS,
   shadow,
@@ -36,6 +35,7 @@ import { Catalog, ObjectLoader, XRef } from "./obj.js";
 import { Dict, isDict, isName, isStream, Ref } from "./primitives.js";
 import {
   getInheritableProperty,
+  isSpace,
   MissingDataException,
   XRefEntryException,
   XRefParseException,
diff --git a/src/core/fonts.js b/src/core/fonts.js
index 6cb730728..1aa2ee32a 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -21,8 +21,6 @@ import {
   FormatError,
   info,
   isNum,
-  isSpace,
-  readUint32,
   shadow,
   string32,
   unreachable,
@@ -60,9 +58,9 @@ import {
   getUnicodeRangeFor,
   mapSpecialUnicodeValues,
 } from "./unicode.js";
+import { isSpace, MissingDataException, readUint32 } from "./core_utils.js";
 import { FontRendererFactory } from "./font_renderer.js";
 import { IdentityCMap } from "./cmap.js";
-import { MissingDataException } from "./core_utils.js";
 import { Stream } from "./stream.js";
 import { Type1Parser } from "./type1_parser.js";
 
diff --git a/src/core/jbig2.js b/src/core/jbig2.js
index 2eafb1e73..b5b8f6179 100644
--- a/src/core/jbig2.js
+++ b/src/core/jbig2.js
@@ -13,14 +13,8 @@
  * limitations under the License.
  */
 
-import {
-  BaseException,
-  log2,
-  readInt8,
-  readUint16,
-  readUint32,
-  shadow,
-} from "../shared/util.js";
+import { BaseException, shadow } from "../shared/util.js";
+import { log2, readInt8, readUint16, readUint32 } from "./core_utils.js";
 import { ArithmeticDecoder } from "./arithmetic_decoder.js";
 import { CCITTFaxDecoder } from "./ccitt.js";
 
diff --git a/src/core/jpx.js b/src/core/jpx.js
index 513d36b31..3e135be8d 100644
--- a/src/core/jpx.js
+++ b/src/core/jpx.js
@@ -13,14 +13,8 @@
  * limitations under the License.
  */
 
-import {
-  BaseException,
-  info,
-  log2,
-  readUint16,
-  readUint32,
-  warn,
-} from "../shared/util.js";
+import { BaseException, info, warn } from "../shared/util.js";
+import { log2, readUint16, readUint32 } from "./core_utils.js";
 import { ArithmeticDecoder } from "./arithmetic_decoder.js";
 
 class JpxError extends BaseException {
diff --git a/src/core/parser.js b/src/core/parser.js
index 3c9862962..bd92cf4ab 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -29,7 +29,6 @@ import {
   FormatError,
   info,
   isNum,
-  isSpace,
   StreamType,
   warn,
 } from "../shared/util.js";
@@ -44,11 +43,11 @@ import {
   Name,
   Ref,
 } from "./primitives.js";
+import { isSpace, MissingDataException } from "./core_utils.js";
 import { CCITTFaxStream } from "./ccitt_stream.js";
 import { Jbig2Stream } from "./jbig2_stream.js";
 import { JpegStream } from "./jpeg_stream.js";
 import { JpxStream } from "./jpx_stream.js";
-import { MissingDataException } from "./core_utils.js";
 
 const MAX_LENGTH_TO_CACHE = 1000;
 const MAX_ADLER32_LENGTH = 5552;
diff --git a/src/core/ps_parser.js b/src/core/ps_parser.js
index 7cc6b3e82..b3e873fcd 100644
--- a/src/core/ps_parser.js
+++ b/src/core/ps_parser.js
@@ -14,8 +14,9 @@
  */
 /* eslint no-var: error */
 
-import { FormatError, isSpace, shadow } from "../shared/util.js";
+import { FormatError, shadow } from "../shared/util.js";
 import { EOF } from "./primitives.js";
+import { isSpace } from "./core_utils.js";
 
 class PostScriptParser {
   constructor(lexer) {
diff --git a/src/core/stream.js b/src/core/stream.js
index 253b261dd..b0b213011 100644
--- a/src/core/stream.js
+++ b/src/core/stream.js
@@ -19,13 +19,9 @@
  * license.
  */
 
-import {
-  FormatError,
-  isSpace,
-  stringToBytes,
-  unreachable,
-} from "../shared/util.js";
+import { FormatError, stringToBytes, unreachable } from "../shared/util.js";
 import { isDict } from "./primitives.js";
+import { isSpace } from "./core_utils.js";
 
 var Stream = (function StreamClosure() {
   function Stream(arrayBuffer, start, length, dict) {
diff --git a/src/core/type1_parser.js b/src/core/type1_parser.js
index fb84c984d..ad35fa768 100644
--- a/src/core/type1_parser.js
+++ b/src/core/type1_parser.js
@@ -13,9 +13,10 @@
  * limitations under the License.
  */
 
-import { isSpace, warn } from "../shared/util.js";
 import { getEncoding } from "./encodings.js";
+import { isSpace } from "./core_utils.js";
 import { Stream } from "./stream.js";
+import { warn } from "../shared/util.js";
 
 // Hinting is currently disabled due to unknown problems on windows
 // in tracemonkey and various other pdfs with type1 fonts.
diff --git a/src/shared/util.js b/src/shared/util.js
index e32617c0b..6df2ee036 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -547,34 +547,6 @@ function string32(value) {
   );
 }
 
-// Calculate the base 2 logarithm of the number `x`. This differs from the
-// native function in the sense that it returns the ceiling value and that it
-// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
-function log2(x) {
-  if (x <= 0) {
-    return 0;
-  }
-  return Math.ceil(Math.log2(x));
-}
-
-function readInt8(data, start) {
-  return (data[start] << 24) >> 24;
-}
-
-function readUint16(data, offset) {
-  return (data[offset] << 8) | data[offset + 1];
-}
-
-function readUint32(data, offset) {
-  return (
-    ((data[offset] << 24) |
-      (data[offset + 1] << 16) |
-      (data[offset + 2] << 8) |
-      data[offset + 3]) >>>
-    0
-  );
-}
-
 // Lazy test the endianness of the platform
 // NOTE: This will be 'true' for simulated TypedArrays
 function isLittleEndian() {
@@ -835,11 +807,6 @@ function isArrayEqual(arr1, arr2) {
   });
 }
 
-// Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
-function isSpace(ch) {
-  return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
-}
-
 /**
  * Promise Capability object.
  *
@@ -949,15 +916,10 @@ export {
   isEmptyObj,
   isNum,
   isString,
-  isSpace,
   isSameOrigin,
   createValidAbsoluteUrl,
   isLittleEndian,
   isEvalSupported,
-  log2,
-  readInt8,
-  readUint16,
-  readUint32,
   removeNullCharacters,
   setVerbosityLevel,
   shadow,
diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js
index a4184c9f8..9e0590847 100644
--- a/test/unit/core_utils_spec.js
+++ b/test/unit/core_utils_spec.js
@@ -16,6 +16,8 @@
 import { Dict, Ref } from "../../src/core/primitives.js";
 import {
   getInheritableProperty,
+  isSpace,
+  log2,
   toRomanNumerals,
 } from "../../src/core/core_utils.js";
 import { XRefMock } from "./test_utils.js";
@@ -180,4 +182,33 @@ describe("core_utils", function() {
       expect(toRomanNumerals(2019, /* lowercase = */ true)).toEqual("mmxix");
     });
   });
+
+  describe("log2", function() {
+    it("handles values smaller than/equal to zero", function() {
+      expect(log2(0)).toEqual(0);
+      expect(log2(-1)).toEqual(0);
+    });
+
+    it("handles values larger than zero", function() {
+      expect(log2(1)).toEqual(0);
+      expect(log2(2)).toEqual(1);
+      expect(log2(3)).toEqual(2);
+      expect(log2(3.14)).toEqual(2);
+    });
+  });
+
+  describe("isSpace", function() {
+    it("handles space characters", function() {
+      expect(isSpace(0x20)).toEqual(true);
+      expect(isSpace(0x09)).toEqual(true);
+      expect(isSpace(0x0d)).toEqual(true);
+      expect(isSpace(0x0a)).toEqual(true);
+    });
+
+    it("handles non-space characters", function() {
+      expect(isSpace(0x0b)).toEqual(false);
+      expect(isSpace(null)).toEqual(false);
+      expect(isSpace(undefined)).toEqual(false);
+    });
+  });
 });
diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js
index 38f83632e..99fb3fe97 100644
--- a/test/unit/util_spec.js
+++ b/test/unit/util_spec.js
@@ -22,9 +22,7 @@ import {
   isEmptyObj,
   isNum,
   isSameOrigin,
-  isSpace,
   isString,
-  log2,
   removeNullCharacters,
   string32,
   stringToBytes,
@@ -118,21 +116,6 @@ describe("util", function() {
     });
   });
 
-  describe("isSpace", function() {
-    it("handles space characters", function() {
-      expect(isSpace(0x20)).toEqual(true);
-      expect(isSpace(0x09)).toEqual(true);
-      expect(isSpace(0x0d)).toEqual(true);
-      expect(isSpace(0x0a)).toEqual(true);
-    });
-
-    it("handles non-space characters", function() {
-      expect(isSpace(0x0b)).toEqual(false);
-      expect(isSpace(null)).toEqual(false);
-      expect(isSpace(undefined)).toEqual(false);
-    });
-  });
-
   describe("isString", function() {
     it("handles string values", function() {
       expect(isString("foo")).toEqual(true);
@@ -147,20 +130,6 @@ describe("util", function() {
     });
   });
 
-  describe("log2", function() {
-    it("handles values smaller than/equal to zero", function() {
-      expect(log2(0)).toEqual(0);
-      expect(log2(-1)).toEqual(0);
-    });
-
-    it("handles values larger than zero", function() {
-      expect(log2(1)).toEqual(0);
-      expect(log2(2)).toEqual(1);
-      expect(log2(3)).toEqual(2);
-      expect(log2(3.14)).toEqual(2);
-    });
-  });
-
   describe("string32", function() {
     it("converts unsigned 32-bit integers to strings", function() {
       expect(string32(0x74727565)).toEqual("true");

From 188b320e182bc9d81b2daa5176f279fadf3c03d6 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Tue, 7 Jan 2020 20:22:14 +0100
Subject: [PATCH 2/2] Convert `src/core/jpg.js` to use the `readUint16` helper
 function in `src/core/core_utils.js`, rather than re-implementing it twice

The other image decoders, i.e. the JBIG2 and JPEG 2000 ones, are using the common helper function `readUint16`. Most likely, the only reason that the JPEG decoder is doing it this way is because it originated *outside* of the PDF.js library.
Hence we can simply re-factor `src/core/jpg.js` to use the common `readUint16` helper function, which is especially nice given that the functionality was essentially *duplicated* in the code.
---
 src/core/jpg.js | 65 +++++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/src/core/jpg.js b/src/core/jpg.js
index a17948e89..12d732814 100644
--- a/src/core/jpg.js
+++ b/src/core/jpg.js
@@ -14,6 +14,7 @@
  */
 
 import { assert, BaseException, warn } from "../shared/util.js";
+import { readUint16 } from "./core_utils.js";
 
 class JpegError extends BaseException {
   constructor(msg) {
@@ -148,8 +149,10 @@ var JpegImage = (function JpegImageClosure() {
         var nextByte = data[offset++];
         if (nextByte) {
           if (nextByte === 0xdc && parseDNLMarker) {
-            offset += 2; // Skip data length.
-            const scanLines = (data[offset++] << 8) | data[offset++];
+            offset += 2; // Skip marker length.
+
+            const scanLines = readUint16(data, offset);
+            offset += 2;
             if (scanLines > 0 && scanLines !== frame.scanLines) {
               throw new DNLMarkerError(
                 "Found DNL marker (0xFFDC) while parsing scan data",
@@ -706,17 +709,13 @@ var JpegImage = (function JpegImageClosure() {
   }
 
   function findNextFileMarker(data, currentPos, startPos = currentPos) {
-    function peekUint16(pos) {
-      return (data[pos] << 8) | data[pos + 1];
-    }
-
     const maxPos = data.length - 1;
     var newPos = startPos < currentPos ? startPos : currentPos;
 
     if (currentPos >= maxPos) {
       return null; // Don't attempt to read non-existent data and just return.
     }
-    var currentMarker = peekUint16(currentPos);
+    var currentMarker = readUint16(data, currentPos);
     if (currentMarker >= 0xffc0 && currentMarker <= 0xfffe) {
       return {
         invalid: null,
@@ -724,12 +723,12 @@ var JpegImage = (function JpegImageClosure() {
         offset: currentPos,
       };
     }
-    var newMarker = peekUint16(newPos);
+    var newMarker = readUint16(data, newPos);
     while (!(newMarker >= 0xffc0 && newMarker <= 0xfffe)) {
       if (++newPos >= maxPos) {
         return null; // Don't attempt to read non-existent data and just return.
       }
-      newMarker = peekUint16(newPos);
+      newMarker = readUint16(data, newPos);
     }
     return {
       invalid: currentMarker.toString(16),
@@ -740,15 +739,10 @@ var JpegImage = (function JpegImageClosure() {
 
   JpegImage.prototype = {
     parse(data, { dnlScanLines = null } = {}) {
-      function readUint16() {
-        var value = (data[offset] << 8) | data[offset + 1];
-        offset += 2;
-        return value;
-      }
-
       function readDataBlock() {
-        var length = readUint16();
-        var endOffset = offset + length - 2;
+        const length = readUint16(data, offset);
+        offset += 2;
+        let endOffset = offset + length - 2;
 
         var fileMarker = findNextFileMarker(data, endOffset, offset);
         if (fileMarker && fileMarker.invalid) {
@@ -796,12 +790,15 @@ var JpegImage = (function JpegImageClosure() {
       var quantizationTables = [];
       var huffmanTablesAC = [],
         huffmanTablesDC = [];
-      var fileMarker = readUint16();
+
+      let fileMarker = readUint16(data, offset);
+      offset += 2;
       if (fileMarker !== /* SOI (Start of Image) = */ 0xffd8) {
         throw new JpegError("SOI not found");
       }
+      fileMarker = readUint16(data, offset);
+      offset += 2;
 
-      fileMarker = readUint16();
       markerLoop: while (fileMarker !== /* EOI (End of Image) = */ 0xffd9) {
         var i, j, l;
         switch (fileMarker) {
@@ -868,7 +865,8 @@ var JpegImage = (function JpegImageClosure() {
             break;
 
           case 0xffdb: // DQT (Define Quantization Tables)
-            var quantizationTablesLength = readUint16();
+            const quantizationTablesLength = readUint16(data, offset);
+            offset += 2;
             var quantizationTablesEnd = quantizationTablesLength + offset - 2;
             var z;
             while (offset < quantizationTablesEnd) {
@@ -884,7 +882,8 @@ var JpegImage = (function JpegImageClosure() {
                 // 16 bit values
                 for (j = 0; j < 64; j++) {
                   z = dctZigZag[j];
-                  tableData[z] = readUint16();
+                  tableData[z] = readUint16(data, offset);
+                  offset += 2;
                 }
               } else {
                 throw new JpegError("DQT - invalid table spec");
@@ -899,14 +898,17 @@ var JpegImage = (function JpegImageClosure() {
             if (frame) {
               throw new JpegError("Only single frame JPEGs supported");
             }
-            readUint16(); // skip data length
+            offset += 2; // Skip marker length.
+
             frame = {};
             frame.extended = fileMarker === 0xffc1;
             frame.progressive = fileMarker === 0xffc2;
             frame.precision = data[offset++];
-            const sofScanLines = readUint16();
+            const sofScanLines = readUint16(data, offset);
+            offset += 2;
             frame.scanLines = dnlScanLines || sofScanLines;
-            frame.samplesPerLine = readUint16();
+            frame.samplesPerLine = readUint16(data, offset);
+            offset += 2;
             frame.components = [];
             frame.componentIds = {};
             var componentsCount = data[offset++],
@@ -939,7 +941,8 @@ var JpegImage = (function JpegImageClosure() {
             break;
 
           case 0xffc4: // DHT (Define Huffman Tables)
-            var huffmanLength = readUint16();
+            const huffmanLength = readUint16(data, offset);
+            offset += 2;
             for (i = 2; i < huffmanLength; ) {
               var huffmanTableSpec = data[offset++];
               var codeLengths = new Uint8Array(16);
@@ -960,8 +963,10 @@ var JpegImage = (function JpegImageClosure() {
             break;
 
           case 0xffdd: // DRI (Define Restart Interval)
-            readUint16(); // skip data length
-            resetInterval = readUint16();
+            offset += 2; // Skip marker length.
+
+            resetInterval = readUint16(data, offset);
+            offset += 2;
             break;
 
           case 0xffda: // SOS (Start of Scan)
@@ -971,7 +976,8 @@ var JpegImage = (function JpegImageClosure() {
             // parse DNL markers during re-parsing of the JPEG scan data.
             const parseDNLMarker = ++numSOSMarkers === 1 && !dnlScanLines;
 
-            readUint16(); // scanLength
+            offset += 2; // Skip marker length.
+
             var selectorsCount = data[offset++];
             var components = [],
               component;
@@ -1055,7 +1061,8 @@ var JpegImage = (function JpegImageClosure() {
               "JpegImage.parse - unknown marker: " + fileMarker.toString(16)
             );
         }
-        fileMarker = readUint16();
+        fileMarker = readUint16(data, offset);
+        offset += 2;
       }
 
       this.width = frame.samplesPerLine;