Merge pull request #12292 from calixteman/encoding

Fix encoding issues when printing/saving a form with non-ascii characters
This commit is contained in:
Tim van der Meij 2021-01-07 22:56:42 +01:00 committed by GitHub
commit 5bde4b71f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 508 additions and 57 deletions

View File

@ -23,10 +23,12 @@ import {
assert,
escapeString,
getModificationDate,
isAscii,
isString,
OPS,
shadow,
stringToPDFString,
stringToUTF16BEString,
unreachable,
Util,
warn,
@ -1222,7 +1224,7 @@ class WidgetAnnotation extends Annotation {
appearance = newTransform.encryptString(appearance);
}
dict.set("V", value);
dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value));
dict.set("AP", AP);
dict.set("M", `D:${getModificationDate()}`);
@ -1298,16 +1300,6 @@ class WidgetAnnotation extends Annotation {
const defaultAppearance = this.data.defaultAppearance;
const alignment = this.data.textAlignment;
if (this.data.comb) {
return this._getCombAppearance(
defaultAppearance,
value,
totalWidth,
hPadding,
vPadding
);
}
if (this.data.multiLine) {
return this._getMultilineAppearance(
defaultAppearance,
@ -1322,18 +1314,34 @@ class WidgetAnnotation extends Annotation {
);
}
// TODO: need to handle chars which are not in the font.
const encodedString = font.encodeString(value).join("");
if (this.data.comb) {
return this._getCombAppearance(
defaultAppearance,
font,
encodedString,
totalWidth,
hPadding,
vPadding
);
}
if (alignment === 0 || alignment > 2) {
// Left alignment: nothing to do
return (
"/Tx BMC q BT " +
defaultAppearance +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` +
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(
encodedString
)}) Tj` +
" ET Q EMC"
);
}
const renderedText = this._renderText(
value,
encodedString,
font,
fontSize,
totalWidth,
@ -1373,10 +1381,21 @@ class WidgetAnnotation extends Annotation {
_computeFontSize(font, fontName, fontSize, height) {
if (fontSize === null || fontSize === 0) {
const em = font.charsToGlyphs("M")[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
const capHeight = 0.7 * em;
let capHeight;
if (font.capHeight) {
capHeight = font.capHeight;
} else {
const glyphs = font.charsToGlyphs(font.encodeString("M").join(""));
if (glyphs.length === 1 && glyphs[0].width) {
const em = glyphs[0].width / 1000;
// According to https://en.wikipedia.org/wiki/Em_(typography)
// an average cap height should be 70% of 1em
capHeight = 0.7 * em;
} else {
capHeight = 0.7;
}
}
// 1.5 * capHeight * fontSize seems to be a good value for lineHeight
fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight)));
@ -1510,11 +1529,12 @@ class TextWidgetAnnotation extends WidgetAnnotation {
this.data.maxLen !== null;
}
_getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) {
_getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) {
const combWidth = (width / this.data.maxLen).toFixed(2);
const buf = [];
for (const character of text) {
buf.push(`(${escapeString(character)}) Tj`);
const positions = font.getCharPositions(text);
for (const [start, end] of positions) {
buf.push(`(${escapeString(text.substring(start, end))}) Tj`);
}
const renderedComb = buf.join(` ${combWidth} 0 Td `);
@ -1568,49 +1588,61 @@ class TextWidgetAnnotation extends WidgetAnnotation {
}
_splitLine(line, font, fontSize, width) {
if (line.length <= 1) {
// TODO: need to handle chars which are not in the font.
line = font.encodeString(line).join("");
const glyphs = font.charsToGlyphs(line);
if (glyphs.length <= 1) {
// Nothing to split
return [line];
}
const positions = font.getCharPositions(line);
const scale = fontSize / 1000;
const whitespace = font.charsToGlyphs(" ")[0].width * scale;
const chunks = [];
let lastSpacePos = -1,
let lastSpacePosInStringStart = -1,
lastSpacePosInStringEnd = -1,
lastSpacePos = -1,
startChunk = 0,
currentWidth = 0;
for (let i = 0, ii = line.length; i < ii; i++) {
const character = line.charAt(i);
if (character === " ") {
if (currentWidth + whitespace > width) {
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const [start, end] = positions[i];
const glyph = glyphs[i];
const glyphWidth = glyph.width * scale;
if (glyph.unicode === " ") {
if (currentWidth + glyphWidth > width) {
// We can break here
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = whitespace;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
lastSpacePosInStringStart = -1;
lastSpacePos = -1;
} else {
currentWidth += whitespace;
currentWidth += glyphWidth;
lastSpacePosInStringStart = start;
lastSpacePosInStringEnd = end;
lastSpacePos = i;
}
} else {
const charWidth = font.charsToGlyphs(character)[0].width * scale;
if (currentWidth + charWidth > width) {
if (currentWidth + glyphWidth > width) {
// We must break to the last white position (if available)
if (lastSpacePos !== -1) {
chunks.push(line.substring(startChunk, lastSpacePos + 1));
startChunk = i = lastSpacePos + 1;
lastSpacePos = -1;
if (lastSpacePosInStringStart !== -1) {
chunks.push(line.substring(startChunk, lastSpacePosInStringEnd));
startChunk = lastSpacePosInStringEnd;
i = lastSpacePos + 1;
lastSpacePosInStringStart = -1;
currentWidth = 0;
} else {
// Just break in the middle of the word
chunks.push(line.substring(startChunk, i));
startChunk = i;
currentWidth = charWidth;
chunks.push(line.substring(startChunk, start));
startChunk = start;
currentWidth = glyphWidth;
}
} else {
currentWidth += charWidth;
currentWidth += glyphWidth;
}
}
}

View File

@ -338,6 +338,22 @@ class CMap {
out.length = 1;
}
getCharCodeLength(charCode) {
const codespaceRanges = this.codespaceRanges;
for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
// Check each codespace range to see if it falls within.
const codespaceRange = codespaceRanges[n];
for (let k = 0, kk = codespaceRange.length; k < kk; ) {
const low = codespaceRange[k++];
const high = codespaceRange[k++];
if (charCode >= low && charCode <= high) {
return n + 1;
}
}
}
return 1;
}
get length() {
return this._map.length;
}

View File

@ -590,6 +590,7 @@ var Font = (function FontClosure() {
this.defaultWidth = properties.defaultWidth;
this.composite = properties.composite;
this.cMap = properties.cMap;
this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS;
this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
this.fontMatrix = properties.fontMatrix;
@ -3351,9 +3352,93 @@ var Font = (function FontClosure() {
return (charsCache[charsCacheKey] = glyphs);
},
/**
* Chars can have different sizes (depends on the encoding).
* @param {String} a string encoded with font encoding.
* @returns {Array<Array<number>>} the positions of each char in the string.
*/
getCharPositions(chars) {
// This function doesn't use a cache because
// it's called only when saving or printing.
const positions = [];
if (this.cMap) {
const c = Object.create(null);
let i = 0;
while (i < chars.length) {
this.cMap.readCharCode(chars, i, c);
const length = c.length;
positions.push([i, i + length]);
i += length;
}
} else {
for (let i = 0, ii = chars.length; i < ii; ++i) {
positions.push([i, i + 1]);
}
}
return positions;
},
get glyphCacheValues() {
return Object.values(this.glyphCache);
},
/**
* Encode a js string using font encoding.
* The resulting array contains an encoded string at even positions
* (can be empty) and a non-encoded one at odd positions.
* @param {String} a js string.
* @returns {Array<String>} an array of encoded strings or non-encoded ones.
*/
encodeString(str) {
const buffers = [];
const currentBuf = [];
// buffers will contain: encoded, non-encoded, encoded, ...
// currentBuf is pushed in buffers each time there is a change.
// So when buffers.length is odd then the last string is an encoded one
// and currentBuf contains non-encoded chars.
const hasCurrentBufErrors = () => buffers.length % 2 === 1;
for (let i = 0, ii = str.length; i < ii; i++) {
const unicode = str.codePointAt(i);
if (unicode > 0xd7ff && (unicode < 0xe000 || unicode > 0xfffd)) {
// unicode is represented by two uint16
i++;
}
if (this.toUnicode) {
const char = String.fromCodePoint(unicode);
const charCode = this.toUnicode.charCodeOf(char);
if (charCode !== -1) {
if (hasCurrentBufErrors()) {
buffers.push(currentBuf.join(""));
currentBuf.length = 0;
}
const charCodeLength = this.cMap
? this.cMap.getCharCodeLength(charCode)
: 1;
for (let j = charCodeLength - 1; j >= 0; j--) {
currentBuf.push(
String.fromCharCode((charCode >> (8 * j)) & 0xff)
);
}
continue;
}
}
// unicode can't be encoded
if (!hasCurrentBufErrors()) {
buffers.push(currentBuf.join(""));
currentBuf.length = 0;
}
currentBuf.push(String.fromCodePoint(unicode));
}
buffers.push(currentBuf.join(""));
return buffers;
},
};
return Font;
@ -3371,6 +3456,9 @@ var ErrorFont = (function ErrorFontClosure() {
charsToGlyphs: function ErrorFont_charsToGlyphs() {
return [];
},
encodeString: function ErrorFont_encodeString(chars) {
return [chars];
},
exportData(extraProperties = false) {
return { error: this.error };
},

View File

@ -842,6 +842,20 @@ function escapeString(str) {
});
}
function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}
function stringToUTF16BEString(str) {
const buf = ["\xFE\xFF"];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(String.fromCharCode((char >> 8) & 0xff));
buf.push(String.fromCharCode(char & 0xff));
}
return buf.join("");
}
function stringToUTF8String(str) {
return decodeURIComponent(escape(str));
}
@ -1044,6 +1058,7 @@ export {
getModificationDate,
getVerbosityLevel,
info,
isAscii,
isArrayBuffer,
isArrayEqual,
isBool,
@ -1061,6 +1076,7 @@ export {
string32,
stringToBytes,
stringToPDFString,
stringToUTF16BEString,
stringToUTF8String,
utf8StringToString,
warn,

View File

@ -32,10 +32,18 @@ import {
import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js";
import { Lexer, Parser } from "../../src/core/parser.js";
import { DOMCMapReaderFactory } from "../../src/display/display_utils.js";
import { isNodeJS } from "../../src/shared/is_node.js";
import { NodeCMapReaderFactory } from "../../src/display/node_utils.js";
import { PartialEvaluator } from "../../src/core/evaluator.js";
import { StringStream } from "../../src/core/stream.js";
import { WorkerTask } from "../../src/core/worker.js";
const cMapUrl = {
dom: "../../external/bcmaps/",
node: "./external/bcmaps/",
};
describe("annotation", function () {
class PDFManagerMock {
constructor(params) {
@ -82,6 +90,30 @@ describe("annotation", function () {
pdfManagerMock = new PDFManagerMock({
docBaseUrl: null,
});
let CMapReaderFactory;
if (isNodeJS) {
CMapReaderFactory = new NodeCMapReaderFactory({
baseUrl: cMapUrl.node,
isCompressed: true,
});
} else {
CMapReaderFactory = new DOMCMapReaderFactory({
baseUrl: cMapUrl.dom,
isCompressed: true,
});
}
const builtInCMapCache = new Map();
builtInCMapCache.set(
"UniJIS-UTF16-H",
CMapReaderFactory.fetch({ name: "UniJIS-UTF16-H" })
);
builtInCMapCache.set(
"Adobe-Japan1-UCS2",
CMapReaderFactory.fetch({ name: "Adobe-Japan1-UCS2" })
);
idFactoryMock = createIdFactory(/* pageIndex = */ 0);
partialEvaluator = new PartialEvaluator({
xref: new XRefMock(),
@ -89,7 +121,9 @@ describe("annotation", function () {
pageIndex: 0,
idFactory: createIdFactory(/* pageIndex = */ 0),
fontCache: new RefSetCache(),
builtInCMapCache,
});
done();
});
@ -1419,7 +1453,7 @@ describe("annotation", function () {
});
describe("TextWidgetAnnotation", function () {
let textWidgetDict, fontRefObj;
let textWidgetDict, helvRefObj, gothRefObj;
beforeEach(function (done) {
textWidgetDict = new Dict();
@ -1432,11 +1466,38 @@ describe("annotation", function () {
helvDict.set("Type", Name.get("Font"));
helvDict.set("Subtype", Name.get("Type1"));
const fontRef = Ref.get(314, 0);
fontRefObj = { ref: fontRef, data: helvDict };
const gothDict = new Dict();
gothDict.set("BaseFont", Name.get("MSGothic"));
gothDict.set("Type", Name.get("Font"));
gothDict.set("Subtype", Name.get("Type0"));
gothDict.set("Encoding", Name.get("UniJIS-UTF16-H"));
gothDict.set("Name", Name.get("MSGothic"));
const cidSysInfoDict = new Dict();
cidSysInfoDict.set("Ordering", "Japan1");
cidSysInfoDict.set("Registry", "Adobe");
cidSysInfoDict.set("Supplement", "5");
const fontDescriptorDict = new Dict();
fontDescriptorDict.set("FontName", Name.get("MSGothic"));
fontDescriptorDict.set("CapHeight", "680");
const gothDescendantDict = new Dict();
gothDescendantDict.set("BaseFont", Name.get("MSGothic"));
gothDescendantDict.set("CIDSystemInfo", cidSysInfoDict);
gothDescendantDict.set("Subtype", Name.get("CIDFontType2"));
gothDescendantDict.set("Type", Name.get("Font"));
gothDescendantDict.set("FontDescriptor", fontDescriptorDict);
gothDict.set("DescendantFonts", [gothDescendantDict]);
const helvRef = Ref.get(314, 0);
const gothRef = Ref.get(159, 0);
helvRefObj = { ref: helvRef, data: helvDict };
gothRefObj = { ref: gothRef, data: gothDict };
const resourceDict = new Dict();
const fontDict = new Dict();
fontDict.set("Helv", fontRef);
fontDict.set("Helv", helvRef);
resourceDict.set("Font", fontDict);
textWidgetDict.set("DA", "/Helv 5 Tf");
@ -1447,7 +1508,7 @@ describe("annotation", function () {
});
afterEach(function () {
textWidgetDict = fontRefObj = null;
textWidgetDict = helvRefObj = gothRefObj = null;
});
it("should handle unknown text alignment, maximum length and flags", function (done) {
@ -1614,7 +1675,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1644,6 +1705,46 @@ describe("annotation", function () {
}, done.fail);
});
it("should render regular text in Japanese for printing", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm" +
` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
);
done();
}, done.fail);
});
it("should render regular text for printing using normal appearance", function (done) {
const textWidgetRef = Ref.get(271, 0);
@ -1658,7 +1759,7 @@ describe("annotation", function () {
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1699,7 +1800,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1729,13 +1830,53 @@ describe("annotation", function () {
}, done.fail);
});
it("should render auto-sized text in Japanese for printing", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 0 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 9 Tf 1 0 0 1 0 0 Tm" +
` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
);
done();
}, done.fail);
});
it("should not render a password for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.PASSWORD);
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1768,7 +1909,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1808,6 +1949,45 @@ describe("annotation", function () {
}, done.fail);
});
it("should render multiline text in Japanese for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 10 Tm " +
"2.00 -5.00 Td (\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f) Tj\n" +
"0.00 -5.00 Td (\x4e\x16\x75\x4c\x30\x6e) Tj ET Q EMC"
);
done();
}, done.fail);
});
it("should render multiline text with various EOL for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
textWidgetDict.set("Rect", [0, 0, 128, 10]);
@ -1815,7 +1995,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1881,7 +2061,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1914,9 +2094,55 @@ describe("annotation", function () {
}, done.fail);
});
it("should render comb with Japanese text for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.COMB);
textWidgetDict.set("MaxLen", 4);
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
textWidgetDict.set("Rect", [0, 0, 32, 10]);
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 2 2 Tm" +
" (\x30\x53) Tj 8.00 0 Td (\x30\x93) Tj 8.00 0 Td (\x30\x6b) Tj" +
" 8.00 0 Td (\x30\x61) Tj 8.00 0 Td (\x30\x6f) Tj" +
" 8.00 0 Td (\x4e\x16) Tj 8.00 0 Td (\x75\x4c) Tj" +
" 8.00 0 Td (\x30\x6e) Tj ET Q EMC"
);
done();
}, done.fail);
});
it("should save text", function (done) {
const textWidgetRef = Ref.get(123, 0);
const xref = new XRefMock([{ ref: textWidgetRef, data: textWidgetDict }]);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
helvRefObj,
]);
partialEvaluator.xref = xref;
const task = new WorkerTask("test save");
@ -1935,17 +2161,17 @@ describe("annotation", function () {
expect(data.length).toEqual(2);
const [oldData, newData] = data;
expect(oldData.ref).toEqual(Ref.get(123, 0));
expect(newData.ref).toEqual(Ref.get(1, 0));
expect(newData.ref).toEqual(Ref.get(2, 0));
oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
expect(oldData.data).toEqual(
"123 0 obj\n" +
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " +
"<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " +
"/V (hello world) /AP << /N 1 0 R>> /M (date)>>\nendobj\n"
"/V (hello world) /AP << /N 2 0 R>> /M (date)>>\nendobj\n"
);
expect(newData.data).toEqual(
"1 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
"2 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
"<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
"/Tx BMC q BT /Helv 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (hello world) Tj " +
"ET Q EMC\nendstream\nendobj\n"
@ -2039,6 +2265,55 @@ describe("annotation", function () {
done();
}, done.fail);
});
it("should save Japanese text", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(123, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
partialEvaluator.xref = xref;
const task = new WorkerTask("test save");
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const annotationStorage = {};
annotationStorage[annotation.data.id] = { value: "こんにちは世界の" };
return annotation.save(partialEvaluator, task, annotationStorage);
}, done.fail)
.then(data => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(data.length).toEqual(2);
const [oldData, newData] = data;
expect(oldData.ref).toEqual(Ref.get(123, 0));
expect(newData.ref).toEqual(Ref.get(2, 0));
oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
expect(oldData.data).toEqual(
"123 0 obj\n" +
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Goth 5 Tf) /DR " +
"<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /Rect [0 0 32 10] " +
`/V (\xfe\xff${utf16String}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n`
);
expect(newData.data).toEqual(
"2 0 obj\n<< /Length 82 /Subtype /Form /Resources " +
"<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
`/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (${utf16String}) Tj ` +
"ET Q EMC\nendstream\nendobj\n"
);
done();
}, done.fail);
});
});
describe("ButtonWidgetAnnotation", function () {

View File

@ -21,6 +21,7 @@ import {
escapeString,
getModificationDate,
isArrayBuffer,
isAscii,
isBool,
isNum,
isSameOrigin,
@ -29,6 +30,7 @@ import {
string32,
stringToBytes,
stringToPDFString,
stringToUTF16BEString,
} from "../../src/shared/util.js";
describe("util", function () {
@ -346,4 +348,26 @@ describe("util", function () {
expect(encodeToXmlString(str)).toEqual(str);
});
});
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});
describe("stringToUTF16BEString", function () {
it("should encode a string in UTF16BE with a BOM", function () {
expect(stringToUTF16BEString("hello world")).toEqual(
"\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
});