Merge pull request #14450 from Snuffleupagus/issue-14449
Add (basic) UTF-8 support in the `stringToPDFString` helper function (issue 14449)
This commit is contained in:
commit
e0032811cd
@ -955,28 +955,32 @@ const PDFStringTranslateTable = [
|
||||
];
|
||||
|
||||
function stringToPDFString(str) {
|
||||
const length = str.length,
|
||||
strBuf = [];
|
||||
if (str[0] === "\xFE" && str[1] === "\xFF") {
|
||||
// UTF16BE BOM
|
||||
for (let i = 2; i < length; i += 2) {
|
||||
strBuf.push(
|
||||
String.fromCharCode((str.charCodeAt(i) << 8) | str.charCodeAt(i + 1))
|
||||
);
|
||||
if (str[0] >= "\xEF") {
|
||||
let encoding;
|
||||
if (str[0] === "\xFE" && str[1] === "\xFF") {
|
||||
encoding = "utf-16be";
|
||||
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
encoding = "utf-16le";
|
||||
} else if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
|
||||
encoding = "utf-8";
|
||||
}
|
||||
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
// UTF16LE BOM
|
||||
for (let i = 2; i < length; i += 2) {
|
||||
strBuf.push(
|
||||
String.fromCharCode((str.charCodeAt(i + 1) << 8) | str.charCodeAt(i))
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for (let i = 0; i < length; ++i) {
|
||||
const code = PDFStringTranslateTable[str.charCodeAt(i)];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
|
||||
if (encoding) {
|
||||
try {
|
||||
const decoder = new TextDecoder(encoding, { fatal: true });
|
||||
const buffer = stringToBytes(str);
|
||||
return decoder.decode(buffer);
|
||||
} catch (ex) {
|
||||
warn(`stringToPDFString: "${ex}".`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// ISO Latin 1
|
||||
const strBuf = [];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const code = PDFStringTranslateTable[str.charCodeAt(i)];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
}
|
||||
return strBuf.join("");
|
||||
}
|
||||
|
||||
|
@ -159,6 +159,19 @@ describe("util", function () {
|
||||
expect(stringToPDFString(str)).toEqual("string");
|
||||
});
|
||||
|
||||
it("handles UTF-8 strings", function () {
|
||||
const simpleStr = "\xEF\xBB\xBF\x73\x74\x72\x69\x6E\x67";
|
||||
expect(stringToPDFString(simpleStr)).toEqual("string");
|
||||
|
||||
const complexStr =
|
||||
"\xEF\xBB\xBF\xE8\xA1\xA8\xE3\x83\x9D\xE3\x81\x82\x41\xE9\xB7\x97" +
|
||||
"\xC5\x92\xC3\xA9\xEF\xBC\xA2\xE9\x80\x8D\xC3\x9C\xC3\x9F\xC2\xAA" +
|
||||
"\xC4\x85\xC3\xB1\xE4\xB8\x82\xE3\x90\x80\xF0\xA0\x80\x80";
|
||||
expect(stringToPDFString(complexStr)).toEqual(
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀"
|
||||
);
|
||||
});
|
||||
|
||||
it("handles empty strings", function () {
|
||||
// ISO Latin 1
|
||||
const str1 = "";
|
||||
@ -171,6 +184,10 @@ describe("util", function () {
|
||||
// UTF-16LE
|
||||
const str3 = "\xFF\xFE";
|
||||
expect(stringToPDFString(str3)).toEqual("");
|
||||
|
||||
// UTF-8
|
||||
const str4 = "\xEF\xBB\xBF";
|
||||
expect(stringToPDFString(str4)).toEqual("");
|
||||
});
|
||||
});
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user