From 2eaa708e3a6b0a07ee79e1312beaa436d9757343 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Wed, 16 Nov 2022 12:05:00 +0100
Subject: [PATCH] Combine the `stringToUTF16String` and `stringToUTF16BEString`
 helper functions

Given that these functions are virtually identical, with the latter only adding a BOM, we can combine the two. Furthermore, since both functions were only used on the worker-thread, there's no reason to duplicate this functionality in both of the `pdf.js` and `pdf.worker.js` files.
---
 src/core/annotation.js       | 16 ++++++++++++----
 src/core/core_utils.js       |  5 ++++-
 src/shared/util.js           | 13 -------------
 test/unit/core_utils_spec.js | 25 +++++++++++++++++++++++++
 test/unit/util_spec.js       | 13 -------------
 5 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/core/annotation.js b/src/core/annotation.js
index 23fa29252..2582f79a0 100644
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@@ -34,7 +34,6 @@ import {
   RenderingIntentFlag,
   shadow,
   stringToPDFString,
-  stringToUTF16BEString,
   unreachable,
   Util,
   warn,
@@ -1879,7 +1878,11 @@ class WidgetAnnotation extends Annotation {
       value,
     };
 
-    const encoder = val => (isAscii(val) ? val : stringToUTF16BEString(val));
+    const encoder = val => {
+      return isAscii(val)
+        ? val
+        : stringToUTF16String(val, /* bigEndian = */ true);
+    };
     dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value));
 
     const maybeMK = this._getMKDict(rotation);
@@ -3546,14 +3549,19 @@ class FreeTextAnnotation extends MarkupAnnotation {
     freetext.set("DA", da);
     freetext.set(
       "Contents",
-      isAscii(value) ? value : stringToUTF16BEString(value)
+      isAscii(value)
+        ? value
+        : stringToUTF16String(value, /* bigEndian = */ true)
     );
     freetext.set("F", 4);
     freetext.set("Border", [0, 0, 0]);
     freetext.set("Rotate", rotation);
 
     if (user) {
-      freetext.set("T", isAscii(user) ? user : stringToUTF16BEString(user));
+      freetext.set(
+        "T",
+        isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
+      );
     }
 
     if (apRef || ap) {
diff --git a/src/core/core_utils.js b/src/core/core_utils.js
index f8ba471ee..6794f7768 100644
--- a/src/core/core_utils.js
+++ b/src/core/core_utils.js
@@ -584,8 +584,11 @@ function stringToUTF16HexString(str) {
   return buf.join("");
 }
 
-function stringToUTF16String(str) {
+function stringToUTF16String(str, bigEndian = false) {
   const buf = [];
+  if (bigEndian) {
+    buf.push("\xFE\xFF");
+  }
   for (let i = 0, ii = str.length; i < ii; i++) {
     const char = str.charCodeAt(i);
     buf.push(
diff --git a/src/shared/util.js b/src/shared/util.js
index e193268bc..4cf01b515 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -1055,18 +1055,6 @@ function isAscii(str) {
   return /^[\x00-\x7F]*$/.test(str);
 }
 
-function stringToUTF16BEString(str) {
-  const buf = ["\xFE\xFF"];
-  for (let i = 0, ii = str.length; i < ii; i++) {
-    const char = str.charCodeAt(i);
-    buf.push(
-      String.fromCharCode((char >> 8) & 0xff),
-      String.fromCharCode(char & 0xff)
-    );
-  }
-  return buf.join("");
-}
-
 function stringToUTF8String(str) {
   return decodeURIComponent(escape(str));
 }
@@ -1198,7 +1186,6 @@ export {
   string32,
   stringToBytes,
   stringToPDFString,
-  stringToUTF16BEString,
   stringToUTF8String,
   TextRenderingMode,
   UnexpectedResponseException,
diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js
index cc5c9e69e..6072855f9 100644
--- a/test/unit/core_utils_spec.js
+++ b/test/unit/core_utils_spec.js
@@ -21,6 +21,7 @@ import {
   isWhiteSpace,
   log2,
   parseXFAPath,
+  stringToUTF16String,
   toRomanNumerals,
   validateCSSFont,
 } from "../../src/core/core_utils.js";
@@ -333,4 +334,28 @@ describe("core_utils", function () {
       expect(cssFontInfo.italicAngle).toEqual("2.718");
     });
   });
+
+  describe("stringToUTF16String", function () {
+    it("should encode a string in UTF16", function () {
+      expect(stringToUTF16String("hello world")).toEqual(
+        "\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
+      );
+
+      expect(stringToUTF16String("こんにちは世界の")).toEqual(
+        "\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
+      );
+    });
+
+    it("should encode a string in UTF16BE with a BOM", function () {
+      expect(
+        stringToUTF16String("hello world", /* bigEndian = */ true)
+      ).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
+
+      expect(
+        stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
+      ).toEqual(
+        "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
+      );
+    });
+  });
 });
diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js
index 39874013e..3eab37e21 100644
--- a/test/unit/util_spec.js
+++ b/test/unit/util_spec.js
@@ -24,7 +24,6 @@ import {
   string32,
   stringToBytes,
   stringToPDFString,
-  stringToUTF16BEString,
 } from "../../src/shared/util.js";
 
 describe("util", function () {
@@ -270,16 +269,4 @@ describe("util", function () {
       );
     });
   });
-
-  describe("stringToUTF16BEString", function () {
-    it("should encode a string in UTF16BE with a BOM", function () {
-      expect(stringToUTF16BEString("hello world")).toEqual(
-        "\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
-      );
-      expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
-        "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
-          "\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
-      );
-    });
-  });
 });