Merge pull request #12292 from calixteman/encoding

Fix encoding issues when printing/saving a form with non-ascii characters
2021-01-07 22:56:42 +01:00 · 2021-01-07 22:56:42 +01:00 · 5bde4b71f8
commit 5bde4b71f8
parent 4be76c89d1 56424967f2
6 changed files with 508 additions and 57 deletions
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@ -23,10 +23,12 @@ import {
  assert,
  escapeString,
  getModificationDate,
+  isAscii,
  isString,
  OPS,
  shadow,
  stringToPDFString,
+  stringToUTF16BEString,
  unreachable,
  Util,
  warn,
@ -1222,7 +1224,7 @@ class WidgetAnnotation extends Annotation {
      appearance = newTransform.encryptString(appearance);
    }

-    dict.set("V", value);
+    dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value));
    dict.set("AP", AP);
    dict.set("M", `D:${getModificationDate()}`);

@ -1298,16 +1300,6 @@ class WidgetAnnotation extends Annotation {
    const defaultAppearance = this.data.defaultAppearance;
    const alignment = this.data.textAlignment;

-    if (this.data.comb) {
-      return this._getCombAppearance(
-        defaultAppearance,
-        value,
-        totalWidth,
-        hPadding,
-        vPadding
-      );
-    }
-
    if (this.data.multiLine) {
      return this._getMultilineAppearance(
        defaultAppearance,
@ -1322,18 +1314,34 @@ class WidgetAnnotation extends Annotation {
      );
    }

+    // TODO: need to handle chars which are not in the font.
+    const encodedString = font.encodeString(value).join("");
+
+    if (this.data.comb) {
+      return this._getCombAppearance(
+        defaultAppearance,
+        font,
+        encodedString,
+        totalWidth,
+        hPadding,
+        vPadding
+      );
+    }
+
    if (alignment === 0 || alignment > 2) {
      // Left alignment: nothing to do
      return (
        "/Tx BMC q BT " +
        defaultAppearance +
-        ` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` +
+        ` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(
+          encodedString
+        )}) Tj` +
        " ET Q EMC"
      );
    }

    const renderedText = this._renderText(
-      value,
+      encodedString,
      font,
      fontSize,
      totalWidth,
@ -1373,10 +1381,21 @@ class WidgetAnnotation extends Annotation {

  _computeFontSize(font, fontName, fontSize, height) {
    if (fontSize === null || fontSize === 0) {
-      const em = font.charsToGlyphs("M")[0].width / 1000;
-      // According to https://en.wikipedia.org/wiki/Em_(typography)
-      // an average cap height should be 70% of 1em
-      const capHeight = 0.7 * em;
+      let capHeight;
+      if (font.capHeight) {
+        capHeight = font.capHeight;
+      } else {
+        const glyphs = font.charsToGlyphs(font.encodeString("M").join(""));
+        if (glyphs.length === 1 && glyphs[0].width) {
+          const em = glyphs[0].width / 1000;
+          // According to https://en.wikipedia.org/wiki/Em_(typography)
+          // an average cap height should be 70% of 1em
+          capHeight = 0.7 * em;
+        } else {
+          capHeight = 0.7;
+        }
+      }
+
      // 1.5 * capHeight * fontSize seems to be a good value for lineHeight
      fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight)));

@ -1510,11 +1529,12 @@ class TextWidgetAnnotation extends WidgetAnnotation {
      this.data.maxLen !== null;
  }

-  _getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) {
+  _getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) {
    const combWidth = (width / this.data.maxLen).toFixed(2);
    const buf = [];
-    for (const character of text) {
-      buf.push(`(${escapeString(character)}) Tj`);
+    const positions = font.getCharPositions(text);
+    for (const [start, end] of positions) {
+      buf.push(`(${escapeString(text.substring(start, end))}) Tj`);
    }

    const renderedComb = buf.join(` ${combWidth} 0 Td `);
@ -1568,49 +1588,61 @@ class TextWidgetAnnotation extends WidgetAnnotation {
  }

  _splitLine(line, font, fontSize, width) {
-    if (line.length <= 1) {
+    // TODO: need to handle chars which are not in the font.
+    line = font.encodeString(line).join("");
+
+    const glyphs = font.charsToGlyphs(line);
+
+    if (glyphs.length <= 1) {
      // Nothing to split
      return [line];
    }

+    const positions = font.getCharPositions(line);
    const scale = fontSize / 1000;
-    const whitespace = font.charsToGlyphs(" ")[0].width * scale;
    const chunks = [];

-    let lastSpacePos = -1,
+    let lastSpacePosInStringStart = -1,
+      lastSpacePosInStringEnd = -1,
+      lastSpacePos = -1,
      startChunk = 0,
      currentWidth = 0;

-    for (let i = 0, ii = line.length; i < ii; i++) {
-      const character = line.charAt(i);
-      if (character === " ") {
-        if (currentWidth + whitespace > width) {
+    for (let i = 0, ii = glyphs.length; i < ii; i++) {
+      const [start, end] = positions[i];
+      const glyph = glyphs[i];
+      const glyphWidth = glyph.width * scale;
+      if (glyph.unicode === " ") {
+        if (currentWidth + glyphWidth > width) {
          // We can break here
-          chunks.push(line.substring(startChunk, i));
-          startChunk = i;
-          currentWidth = whitespace;
+          chunks.push(line.substring(startChunk, start));
+          startChunk = start;
+          currentWidth = glyphWidth;
+          lastSpacePosInStringStart = -1;
          lastSpacePos = -1;
        } else {
-          currentWidth += whitespace;
+          currentWidth += glyphWidth;
+          lastSpacePosInStringStart = start;
+          lastSpacePosInStringEnd = end;
          lastSpacePos = i;
        }
      } else {
-        const charWidth = font.charsToGlyphs(character)[0].width * scale;
-        if (currentWidth + charWidth > width) {
+        if (currentWidth + glyphWidth > width) {
          // We must break to the last white position (if available)
-          if (lastSpacePos !== -1) {
-            chunks.push(line.substring(startChunk, lastSpacePos + 1));
-            startChunk = i = lastSpacePos + 1;
-            lastSpacePos = -1;
+          if (lastSpacePosInStringStart !== -1) {
+            chunks.push(line.substring(startChunk, lastSpacePosInStringEnd));
+            startChunk = lastSpacePosInStringEnd;
+            i = lastSpacePos + 1;
+            lastSpacePosInStringStart = -1;
            currentWidth = 0;
          } else {
            // Just break in the middle of the word
-            chunks.push(line.substring(startChunk, i));
-            startChunk = i;
-            currentWidth = charWidth;
+            chunks.push(line.substring(startChunk, start));
+            startChunk = start;
+            currentWidth = glyphWidth;
          }
        } else {
-          currentWidth += charWidth;
+          currentWidth += glyphWidth;
        }
      }
    }
--- a/src/core/cmap.js
+++ b/src/core/cmap.js
@ -338,6 +338,22 @@ class CMap {
    out.length = 1;
  }

+  getCharCodeLength(charCode) {
+    const codespaceRanges = this.codespaceRanges;
+    for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
+      // Check each codespace range to see if it falls within.
+      const codespaceRange = codespaceRanges[n];
+      for (let k = 0, kk = codespaceRange.length; k < kk; ) {
+        const low = codespaceRange[k++];
+        const high = codespaceRange[k++];
+        if (charCode >= low && charCode <= high) {
+          return n + 1;
+        }
+      }
+    }
+    return 1;
+  }
+
  get length() {
    return this._map.length;
  }
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@ -590,6 +590,7 @@ var Font = (function FontClosure() {
    this.defaultWidth = properties.defaultWidth;
    this.composite = properties.composite;
    this.cMap = properties.cMap;
+    this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS;
    this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
    this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
    this.fontMatrix = properties.fontMatrix;
@ -3351,9 +3352,93 @@ var Font = (function FontClosure() {
      return (charsCache[charsCacheKey] = glyphs);
    },

+    /**
+     * Chars can have different sizes (depends on the encoding).
+     * @param {String} a string encoded with font encoding.
+     * @returns {Array<Array<number>>} the positions of each char in the string.
+     */
+    getCharPositions(chars) {
+      // This function doesn't use a cache because
+      // it's called only when saving or printing.
+      const positions = [];
+
+      if (this.cMap) {
+        const c = Object.create(null);
+        let i = 0;
+        while (i < chars.length) {
+          this.cMap.readCharCode(chars, i, c);
+          const length = c.length;
+          positions.push([i, i + length]);
+          i += length;
+        }
+      } else {
+        for (let i = 0, ii = chars.length; i < ii; ++i) {
+          positions.push([i, i + 1]);
+        }
+      }
+
+      return positions;
+    },
+
    get glyphCacheValues() {
      return Object.values(this.glyphCache);
    },
+
+    /**
+     * Encode a js string using font encoding.
+     * The resulting array contains an encoded string at even positions
+     * (can be empty) and a non-encoded one at odd positions.
+     * @param {String} a js string.
+     * @returns {Array<String>} an array of encoded strings or non-encoded ones.
+     */
+    encodeString(str) {
+      const buffers = [];
+      const currentBuf = [];
+
+      // buffers will contain: encoded, non-encoded, encoded, ...
+      // currentBuf is pushed in buffers each time there is a change.
+      // So when buffers.length is odd then the last string is an encoded one
+      // and currentBuf contains non-encoded chars.
+      const hasCurrentBufErrors = () => buffers.length % 2 === 1;
+
+      for (let i = 0, ii = str.length; i < ii; i++) {
+        const unicode = str.codePointAt(i);
+        if (unicode > 0xd7ff && (unicode < 0xe000 || unicode > 0xfffd)) {
+          // unicode is represented by two uint16
+          i++;
+        }
+        if (this.toUnicode) {
+          const char = String.fromCodePoint(unicode);
+          const charCode = this.toUnicode.charCodeOf(char);
+          if (charCode !== -1) {
+            if (hasCurrentBufErrors()) {
+              buffers.push(currentBuf.join(""));
+              currentBuf.length = 0;
+            }
+            const charCodeLength = this.cMap
+              ? this.cMap.getCharCodeLength(charCode)
+              : 1;
+            for (let j = charCodeLength - 1; j >= 0; j--) {
+              currentBuf.push(
+                String.fromCharCode((charCode >> (8 * j)) & 0xff)
+              );
+            }
+            continue;
+          }
+        }
+
+        // unicode can't be encoded
+        if (!hasCurrentBufErrors()) {
+          buffers.push(currentBuf.join(""));
+          currentBuf.length = 0;
+        }
+        currentBuf.push(String.fromCodePoint(unicode));
+      }
+
+      buffers.push(currentBuf.join(""));
+
+      return buffers;
+    },
  };

  return Font;
@ -3371,6 +3456,9 @@ var ErrorFont = (function ErrorFontClosure() {
    charsToGlyphs: function ErrorFont_charsToGlyphs() {
      return [];
    },
+    encodeString: function ErrorFont_encodeString(chars) {
+      return [chars];
+    },
    exportData(extraProperties = false) {
      return { error: this.error };
    },
--- a/src/shared/util.js
+++ b/src/shared/util.js
@ -842,6 +842,20 @@ function escapeString(str) {
  });
 }

+function isAscii(str) {
+  return /^[\x00-\x7F]*$/.test(str);
+}
+
+function stringToUTF16BEString(str) {
+  const buf = ["\xFE\xFF"];
+  for (let i = 0, ii = str.length; i < ii; i++) {
+    const char = str.charCodeAt(i);
+    buf.push(String.fromCharCode((char >> 8) & 0xff));
+    buf.push(String.fromCharCode(char & 0xff));
+  }
+  return buf.join("");
+}
+
 function stringToUTF8String(str) {
  return decodeURIComponent(escape(str));
 }
@ -1044,6 +1058,7 @@ export {
  getModificationDate,
  getVerbosityLevel,
  info,
+  isAscii,
  isArrayBuffer,
  isArrayEqual,
  isBool,
@ -1061,6 +1076,7 @@ export {
  string32,
  stringToBytes,
  stringToPDFString,
+  stringToUTF16BEString,
  stringToUTF8String,
  utf8StringToString,
  warn,
--- a/test/unit/annotation_spec.js
+++ b/test/unit/annotation_spec.js
@ -32,10 +32,18 @@ import {
 import { createIdFactory, XRefMock } from "./test_utils.js";
 import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js";
 import { Lexer, Parser } from "../../src/core/parser.js";
+import { DOMCMapReaderFactory } from "../../src/display/display_utils.js";
+import { isNodeJS } from "../../src/shared/is_node.js";
+import { NodeCMapReaderFactory } from "../../src/display/node_utils.js";
 import { PartialEvaluator } from "../../src/core/evaluator.js";
 import { StringStream } from "../../src/core/stream.js";
 import { WorkerTask } from "../../src/core/worker.js";

+const cMapUrl = {
+  dom: "../../external/bcmaps/",
+  node: "./external/bcmaps/",
+};
+
 describe("annotation", function () {
  class PDFManagerMock {
    constructor(params) {
@ -82,6 +90,30 @@ describe("annotation", function () {
    pdfManagerMock = new PDFManagerMock({
      docBaseUrl: null,
    });
+
+    let CMapReaderFactory;
+    if (isNodeJS) {
+      CMapReaderFactory = new NodeCMapReaderFactory({
+        baseUrl: cMapUrl.node,
+        isCompressed: true,
+      });
+    } else {
+      CMapReaderFactory = new DOMCMapReaderFactory({
+        baseUrl: cMapUrl.dom,
+        isCompressed: true,
+      });
+    }
+
+    const builtInCMapCache = new Map();
+    builtInCMapCache.set(
+      "UniJIS-UTF16-H",
+      CMapReaderFactory.fetch({ name: "UniJIS-UTF16-H" })
+    );
+    builtInCMapCache.set(
+      "Adobe-Japan1-UCS2",
+      CMapReaderFactory.fetch({ name: "Adobe-Japan1-UCS2" })
+    );
+
    idFactoryMock = createIdFactory(/* pageIndex = */ 0);
    partialEvaluator = new PartialEvaluator({
      xref: new XRefMock(),
@ -89,7 +121,9 @@ describe("annotation", function () {
      pageIndex: 0,
      idFactory: createIdFactory(/* pageIndex = */ 0),
      fontCache: new RefSetCache(),
+      builtInCMapCache,
    });
+
    done();
  });

@ -1419,7 +1453,7 @@ describe("annotation", function () {
  });

  describe("TextWidgetAnnotation", function () {
-    let textWidgetDict, fontRefObj;
+    let textWidgetDict, helvRefObj, gothRefObj;

    beforeEach(function (done) {
      textWidgetDict = new Dict();
@ -1432,11 +1466,38 @@ describe("annotation", function () {
      helvDict.set("Type", Name.get("Font"));
      helvDict.set("Subtype", Name.get("Type1"));

-      const fontRef = Ref.get(314, 0);
-      fontRefObj = { ref: fontRef, data: helvDict };
+      const gothDict = new Dict();
+      gothDict.set("BaseFont", Name.get("MSGothic"));
+      gothDict.set("Type", Name.get("Font"));
+      gothDict.set("Subtype", Name.get("Type0"));
+      gothDict.set("Encoding", Name.get("UniJIS-UTF16-H"));
+      gothDict.set("Name", Name.get("MSGothic"));
+
+      const cidSysInfoDict = new Dict();
+      cidSysInfoDict.set("Ordering", "Japan1");
+      cidSysInfoDict.set("Registry", "Adobe");
+      cidSysInfoDict.set("Supplement", "5");
+
+      const fontDescriptorDict = new Dict();
+      fontDescriptorDict.set("FontName", Name.get("MSGothic"));
+      fontDescriptorDict.set("CapHeight", "680");
+
+      const gothDescendantDict = new Dict();
+      gothDescendantDict.set("BaseFont", Name.get("MSGothic"));
+      gothDescendantDict.set("CIDSystemInfo", cidSysInfoDict);
+      gothDescendantDict.set("Subtype", Name.get("CIDFontType2"));
+      gothDescendantDict.set("Type", Name.get("Font"));
+      gothDescendantDict.set("FontDescriptor", fontDescriptorDict);
+
+      gothDict.set("DescendantFonts", [gothDescendantDict]);
+
+      const helvRef = Ref.get(314, 0);
+      const gothRef = Ref.get(159, 0);
+      helvRefObj = { ref: helvRef, data: helvDict };
+      gothRefObj = { ref: gothRef, data: gothDict };
      const resourceDict = new Dict();
      const fontDict = new Dict();
-      fontDict.set("Helv", fontRef);
+      fontDict.set("Helv", helvRef);
      resourceDict.set("Font", fontDict);

      textWidgetDict.set("DA", "/Helv 5 Tf");
@ -1447,7 +1508,7 @@ describe("annotation", function () {
    });

    afterEach(function () {
-      textWidgetDict = fontRefObj = null;
+      textWidgetDict = helvRefObj = gothRefObj = null;
    });

    it("should handle unknown text alignment, maximum length and flags", function (done) {
@ -1614,7 +1675,7 @@ describe("annotation", function () {
      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1644,6 +1705,46 @@ describe("annotation", function () {
        }, done.fail);
    });

+    it("should render regular text in Japanese for printing", function (done) {
+      textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
+      textWidgetDict.set("DA", "/Goth 5 Tf");
+
+      const textWidgetRef = Ref.get(271, 0);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        gothRefObj,
+      ]);
+      const task = new WorkerTask("test print");
+      partialEvaluator.xref = xref;
+
+      AnnotationFactory.create(
+        xref,
+        textWidgetRef,
+        pdfManagerMock,
+        idFactoryMock
+      )
+        .then(annotation => {
+          const id = annotation.data.id;
+          const annotationStorage = {};
+          annotationStorage[id] = { value: "こんにちは世界の" };
+          return annotation._getAppearance(
+            partialEvaluator,
+            task,
+            annotationStorage
+          );
+        }, done.fail)
+        .then(appearance => {
+          const utf16String =
+            "\x30\x53\x30\x93\x30\x6b\x30\x61" +
+            "\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
+          expect(appearance).toEqual(
+            "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm" +
+              ` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
+          );
+          done();
+        }, done.fail);
+    });
+
    it("should render regular text for printing using normal appearance", function (done) {
      const textWidgetRef = Ref.get(271, 0);

@ -1658,7 +1759,7 @@ describe("annotation", function () {

      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1699,7 +1800,7 @@ describe("annotation", function () {
      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1729,13 +1830,53 @@ describe("annotation", function () {
        }, done.fail);
    });

+    it("should render auto-sized text in Japanese for printing", function (done) {
+      textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
+      textWidgetDict.set("DA", "/Goth 0 Tf");
+
+      const textWidgetRef = Ref.get(271, 0);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        gothRefObj,
+      ]);
+      const task = new WorkerTask("test print");
+      partialEvaluator.xref = xref;
+
+      AnnotationFactory.create(
+        xref,
+        textWidgetRef,
+        pdfManagerMock,
+        idFactoryMock
+      )
+        .then(annotation => {
+          const id = annotation.data.id;
+          const annotationStorage = {};
+          annotationStorage[id] = { value: "こんにちは世界の" };
+          return annotation._getAppearance(
+            partialEvaluator,
+            task,
+            annotationStorage
+          );
+        }, done.fail)
+        .then(appearance => {
+          const utf16String =
+            "\x30\x53\x30\x93\x30\x6b\x30\x61" +
+            "\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
+          expect(appearance).toEqual(
+            "/Tx BMC q BT /Goth 9 Tf 1 0 0 1 0 0 Tm" +
+              ` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
+          );
+          done();
+        }, done.fail);
+    });
+
    it("should not render a password for printing", function (done) {
      textWidgetDict.set("Ff", AnnotationFieldFlag.PASSWORD);

      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1768,7 +1909,7 @@ describe("annotation", function () {
      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1808,6 +1949,45 @@ describe("annotation", function () {
        }, done.fail);
    });

+    it("should render multiline text in Japanese for printing", function (done) {
+      textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
+      textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
+      textWidgetDict.set("DA", "/Goth 5 Tf");
+
+      const textWidgetRef = Ref.get(271, 0);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        gothRefObj,
+      ]);
+      const task = new WorkerTask("test print");
+      partialEvaluator.xref = xref;
+
+      AnnotationFactory.create(
+        xref,
+        textWidgetRef,
+        pdfManagerMock,
+        idFactoryMock
+      )
+        .then(annotation => {
+          const id = annotation.data.id;
+          const annotationStorage = {};
+          annotationStorage[id] = { value: "こんにちは世界の" };
+          return annotation._getAppearance(
+            partialEvaluator,
+            task,
+            annotationStorage
+          );
+        }, done.fail)
+        .then(appearance => {
+          expect(appearance).toEqual(
+            "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 10 Tm " +
+              "2.00 -5.00 Td (\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f) Tj\n" +
+              "0.00 -5.00 Td (\x4e\x16\x75\x4c\x30\x6e) Tj ET Q EMC"
+          );
+          done();
+        }, done.fail);
+    });
+
    it("should render multiline text with various EOL for printing", function (done) {
      textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
      textWidgetDict.set("Rect", [0, 0, 128, 10]);
@ -1815,7 +1995,7 @@ describe("annotation", function () {
      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1881,7 +2061,7 @@ describe("annotation", function () {
      const textWidgetRef = Ref.get(271, 0);
      const xref = new XRefMock([
        { ref: textWidgetRef, data: textWidgetDict },
-        fontRefObj,
+        helvRefObj,
      ]);
      const task = new WorkerTask("test print");
      partialEvaluator.xref = xref;
@ -1914,9 +2094,55 @@ describe("annotation", function () {
        }, done.fail);
    });

+    it("should render comb with Japanese text for printing", function (done) {
+      textWidgetDict.set("Ff", AnnotationFieldFlag.COMB);
+      textWidgetDict.set("MaxLen", 4);
+      textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
+      textWidgetDict.set("DA", "/Goth 5 Tf");
+      textWidgetDict.set("Rect", [0, 0, 32, 10]);
+
+      const textWidgetRef = Ref.get(271, 0);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        gothRefObj,
+      ]);
+      const task = new WorkerTask("test print");
+      partialEvaluator.xref = xref;
+
+      AnnotationFactory.create(
+        xref,
+        textWidgetRef,
+        pdfManagerMock,
+        idFactoryMock
+      )
+        .then(annotation => {
+          const id = annotation.data.id;
+          const annotationStorage = {};
+          annotationStorage[id] = { value: "こんにちは世界の" };
+          return annotation._getAppearance(
+            partialEvaluator,
+            task,
+            annotationStorage
+          );
+        }, done.fail)
+        .then(appearance => {
+          expect(appearance).toEqual(
+            "/Tx BMC q BT /Goth 5 Tf 1 0 0 1 2 2 Tm" +
+              " (\x30\x53) Tj 8.00 0 Td (\x30\x93) Tj 8.00 0 Td (\x30\x6b) Tj" +
+              " 8.00 0 Td (\x30\x61) Tj 8.00 0 Td (\x30\x6f) Tj" +
+              " 8.00 0 Td (\x4e\x16) Tj 8.00 0 Td (\x75\x4c) Tj" +
+              " 8.00 0 Td (\x30\x6e) Tj ET Q EMC"
+          );
+          done();
+        }, done.fail);
+    });
+
    it("should save text", function (done) {
      const textWidgetRef = Ref.get(123, 0);
-      const xref = new XRefMock([{ ref: textWidgetRef, data: textWidgetDict }]);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        helvRefObj,
+      ]);
      partialEvaluator.xref = xref;
      const task = new WorkerTask("test save");

@ -1935,17 +2161,17 @@ describe("annotation", function () {
          expect(data.length).toEqual(2);
          const [oldData, newData] = data;
          expect(oldData.ref).toEqual(Ref.get(123, 0));
-          expect(newData.ref).toEqual(Ref.get(1, 0));
+          expect(newData.ref).toEqual(Ref.get(2, 0));

          oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
          expect(oldData.data).toEqual(
            "123 0 obj\n" +
              "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " +
              "<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " +
-              "/V (hello world) /AP << /N 1 0 R>> /M (date)>>\nendobj\n"
+              "/V (hello world) /AP << /N 2 0 R>> /M (date)>>\nendobj\n"
          );
          expect(newData.data).toEqual(
-            "1 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
+            "2 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
              "<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
              "/Tx BMC q BT /Helv 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (hello world) Tj " +
              "ET Q EMC\nendstream\nendobj\n"
@ -2039,6 +2265,55 @@ describe("annotation", function () {
          done();
        }, done.fail);
    });
+
+    it("should save Japanese text", function (done) {
+      textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
+      textWidgetDict.set("DA", "/Goth 5 Tf");
+
+      const textWidgetRef = Ref.get(123, 0);
+      const xref = new XRefMock([
+        { ref: textWidgetRef, data: textWidgetDict },
+        gothRefObj,
+      ]);
+      partialEvaluator.xref = xref;
+      const task = new WorkerTask("test save");
+
+      AnnotationFactory.create(
+        xref,
+        textWidgetRef,
+        pdfManagerMock,
+        idFactoryMock
+      )
+        .then(annotation => {
+          const annotationStorage = {};
+          annotationStorage[annotation.data.id] = { value: "こんにちは世界の" };
+          return annotation.save(partialEvaluator, task, annotationStorage);
+        }, done.fail)
+        .then(data => {
+          const utf16String =
+            "\x30\x53\x30\x93\x30\x6b\x30\x61" +
+            "\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
+          expect(data.length).toEqual(2);
+          const [oldData, newData] = data;
+          expect(oldData.ref).toEqual(Ref.get(123, 0));
+          expect(newData.ref).toEqual(Ref.get(2, 0));
+
+          oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
+          expect(oldData.data).toEqual(
+            "123 0 obj\n" +
+              "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Goth 5 Tf) /DR " +
+              "<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /Rect [0 0 32 10] " +
+              `/V (\xfe\xff${utf16String}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n`
+          );
+          expect(newData.data).toEqual(
+            "2 0 obj\n<< /Length 82 /Subtype /Form /Resources " +
+              "<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
+              `/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (${utf16String}) Tj ` +
+              "ET Q EMC\nendstream\nendobj\n"
+          );
+          done();
+        }, done.fail);
+    });
  });

  describe("ButtonWidgetAnnotation", function () {
--- a/test/unit/util_spec.js
+++ b/test/unit/util_spec.js
@ -21,6 +21,7 @@ import {
  escapeString,
  getModificationDate,
  isArrayBuffer,
+  isAscii,
  isBool,
  isNum,
  isSameOrigin,
@ -29,6 +30,7 @@ import {
  string32,
  stringToBytes,
  stringToPDFString,
+  stringToUTF16BEString,
 } from "../../src/shared/util.js";

 describe("util", function () {
@ -346,4 +348,26 @@ describe("util", function () {
      expect(encodeToXmlString(str)).toEqual(str);
    });
  });
+
+  describe("isAscii", function () {
+    it("handles ascii/non-ascii strings", function () {
+      expect(isAscii("hello world")).toEqual(true);
+      expect(isAscii("こんにちは世界の")).toEqual(false);
+      expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
+        false
+      );
+    });
+  });
+
+  describe("stringToUTF16BEString", function () {
+    it("should encode a string in UTF16BE with a BOM", function () {
+      expect(stringToUTF16BEString("hello world")).toEqual(
+        "\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
+      );
+      expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
+        "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
+          "\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
+      );
+    });
+  });
 });