diff --git a/src/core/glyphlist.js b/src/core/glyphlist.js index c0878b080..1b4ffb74c 100644 --- a/src/core/glyphlist.js +++ b/src/core/glyphlist.js @@ -1828,10 +1828,13 @@ const getGlyphsUnicode = getArrayLookupTableFactory(function () { "feicoptic", 0x03e5, "female", 0x2640, "ff", 0xfb00, - "f_f", 0xfb00, // Fixes issue 11016. + "f_f", 0xfb00, "ffi", 0xfb03, + "f_f_i", 0xfb03, "ffl", 0xfb04, + "f_f_l", 0xfb04, "fi", 0xfb01, + "f_i", 0xfb01, "fifteencircle", 0x246e, "fifteenparen", 0x2482, "fifteenperiod", 0x2496, @@ -1875,6 +1878,7 @@ const getGlyphsUnicode = getArrayLookupTableFactory(function () { "fivesuperior", 0x2075, "fivethai", 0x0e55, "fl", 0xfb02, + "f_l", 0xfb02, "florin", 0x0192, "fmonospace", 0xff46, "fmsquare", 0x3399, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 74ad3ec29..1b0d41af4 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -108,6 +108,7 @@ !issue10542_reduced.pdf !issue10665_reduced.pdf !issue11016_reduced.pdf +!issue15516_reduced.pdf !issue11045.pdf !bug1057544.pdf !issue11150_reduced.pdf diff --git a/test/pdfs/issue15516_reduced.pdf b/test/pdfs/issue15516_reduced.pdf new file mode 100644 index 000000000..e4397fa2e Binary files /dev/null and b/test/pdfs/issue15516_reduced.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a5fba7314..591f94624 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2417,6 +2417,23 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) await loadingTask.destroy(); }); + // TODO: Change this to a `text` reference test instead. + // Currently that doesn't work, since the `XMLSerializer` fails on + // the ASCII "control characters" found in the text-content. + it("gets text content with non-standard ligatures (issue issue15516)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue15516_reduced.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent(); + const text = mergeText(items); + + expect(text).toEqual("ffi fi ffl ff fl \x07 \x08 Ý"); + + await loadingTask.destroy(); + }); + it("gets empty structure tree", async function () { const tree = await page.getStructTree();