Re-factor the stringToPDFString helper function for UTF-16 strings

This patch changes the function to instead utilize the `TextDecoder` for both kinds of UTF-16 BOM strings.
This commit is contained in:
Jonas Jenwald 2022-01-14 18:44:38 +01:00
parent 76444888fb
commit 12d8f0b64d

View File

@ -955,38 +955,29 @@ const PDFStringTranslateTable = [
]; ];
function stringToPDFString(str) { function stringToPDFString(str) {
const length = str.length, if (str[0] >= "\xEF") {
strBuf = []; let encoding;
// UTF-16BE BOM
if (str[0] === "\xFE" && str[1] === "\xFF") { if (str[0] === "\xFE" && str[1] === "\xFF") {
for (let i = 2; i < length; i += 2) { encoding = "utf-16be";
strBuf.push( } else if (str[0] === "\xFF" && str[1] === "\xFE") {
String.fromCharCode((str.charCodeAt(i) << 8) | str.charCodeAt(i + 1)) encoding = "utf-16le";
); } else if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
encoding = "utf-8";
} }
return strBuf.join("");
} if (encoding) {
// UTF-16LE BOM
if (str[0] === "\xFF" && str[1] === "\xFE") {
for (let i = 2; i < length; i += 2) {
strBuf.push(
String.fromCharCode((str.charCodeAt(i + 1) << 8) | str.charCodeAt(i))
);
}
return strBuf.join("");
}
// UTF-8 BOM
if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
try { try {
const decoder = new TextDecoder("utf-8", { fatal: true }); const decoder = new TextDecoder(encoding, { fatal: true });
const buffer = stringToBytes(str); const buffer = stringToBytes(str);
return decoder.decode(buffer); return decoder.decode(buffer);
} catch (ex) { } catch (ex) {
warn(`stringToPDFString: "${ex}".`); warn(`stringToPDFString: "${ex}".`);
} }
} }
}
// ISO Latin 1 // ISO Latin 1
for (let i = 0; i < length; ++i) { const strBuf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
const code = PDFStringTranslateTable[str.charCodeAt(i)]; const code = PDFStringTranslateTable[str.charCodeAt(i)];
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i)); strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
} }