diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 8e389119f..8b28d5b03 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -3550,6 +3550,16 @@ class PartialEvaluator { code = unicode; } break; + default: + // Support (some) non-standard ligatures. + switch (glyphName) { + case "f_h": + case "f_t": + case "T_h": + toUnicode[charcode] = glyphName.replaceAll("_", ""); + continue; + } + break; } if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) { // If `baseEncodingName` is one the predefined encodings, and `code` diff --git a/test/test_manifest.json b/test/test_manifest.json index a7521f7eb..e88f2975f 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -230,7 +230,14 @@ "link": true, "type": "eq", "annotations": true - }, + }, + { "id": "issue15516", + "file": "pdfs/issue15516_reduced.pdf", + "md5": "a30be78c73d13aa6ff890834ce69adc1", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "bug946506", "file": "pdfs/bug946506.pdf", "md5": "c28911b5c31bdc337c2ce404c5971cfc", diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 3bf5970ec..4c157c3b5 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2607,23 +2607,6 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) await loadingTask.destroy(); }); - // TODO: Change this to a `text` reference test instead. - // Currently that doesn't work, since the `XMLSerializer` fails on - // the ASCII "control characters" found in the text-content. - it("gets text content with non-standard ligatures (issue issue15516)", async function () { - const loadingTask = getDocument( - buildGetDocumentParams("issue15516_reduced.pdf") - ); - const pdfDoc = await loadingTask.promise; - const pdfPage = await pdfDoc.getPage(1); - const { items } = await pdfPage.getTextContent(); - const text = mergeText(items); - - expect(text).toEqual("ffi fi ffl ff fl \x07 \x08 Ý"); - - await loadingTask.destroy(); - }); - it("gets text content with multi-byte entries, using predefined CMaps (issue 16176)", async function () { const loadingTask = getDocument( buildGetDocumentParams("issue16176.pdf", {