Decode non-ASCII values found in the xfa:datasets (PR 14735 follow-up)
*Please note:* This is possibly bad/wrong in general, but I figured that submitting it for review wouldn't hurt. It seems that even Adobe Reader doesn't handle the non-ASCII characters that appear in some of the fields correctly, however it should be pretty easy to improve things on the PDF.js side.
This commit is contained in:
parent
b0ec83262b
commit
f33ce5fc2d
@ -13,9 +13,19 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { stringToUTF8String, warn } from "../shared/util.js";
|
||||
import { parseXFAPath } from "./core_utils.js";
|
||||
import { SimpleXMLParser } from "./xml_parser.js";
|
||||
|
||||
function decodeString(str) {
|
||||
try {
|
||||
return stringToUTF8String(str);
|
||||
} catch (ex) {
|
||||
warn(`UTF-8 decoding failed: "${ex}".`);
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
class DatasetXMLParser extends SimpleXMLParser {
|
||||
constructor(options) {
|
||||
super(options);
|
||||
@ -60,10 +70,10 @@ class DatasetReader {
|
||||
|
||||
const first = node.firstChild;
|
||||
if (first && first.nodeName === "value") {
|
||||
return node.children.map(child => child.textContent);
|
||||
return node.children.map(child => decodeString(child.textContent));
|
||||
}
|
||||
|
||||
return node.textContent;
|
||||
return decodeString(node.textContent);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user