diff --git a/src/core/cmap.js b/src/core/cmap.js index 23f3a174a..b9f0855da 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -242,10 +242,17 @@ class CMap { const lastByte = dstLow.length - 1; while (low <= high) { this._map[low++] = dstLow; - // Only the last byte has to be incremented. + // Only the last byte has to be incremented (in the normal case). + const nextCharCode = dstLow.charCodeAt(lastByte) + 1; + if (nextCharCode > 0xff) { + dstLow = + dstLow.substring(0, lastByte - 1) + + String.fromCharCode(dstLow.charCodeAt(lastByte - 1) + 1) + + "\x00"; + continue; + } dstLow = - dstLow.substring(0, lastByte) + - String.fromCharCode(dstLow.charCodeAt(lastByte) + 1); + dstLow.substring(0, lastByte) + String.fromCharCode(nextCharCode); } } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 5c84ef99c..6df69c87d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -338,6 +338,7 @@ !noembed-jis7.pdf !issue12504.pdf !noembed-eucjp.pdf +!bug1627427_reduced.pdf !noembed-sjis.pdf !vertical.pdf !issue13343.pdf diff --git a/test/pdfs/bug1627427_reduced.pdf b/test/pdfs/bug1627427_reduced.pdf new file mode 100644 index 000000000..d611a7383 Binary files /dev/null and b/test/pdfs/bug1627427_reduced.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index bebde825c..2bdc9c9b2 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3022,6 +3022,13 @@ "rounds": 1, "type": "eq" }, + { "id": "bug1627427", + "file": "pdfs/bug1627427_reduced.pdf", + "md5": "3ff75fcf455af49803f0f04eb071bdc3", + "link": false, + "rounds": 1, + "type": "text" + }, { "id": "issue8586", "file": "pdfs/issue8586.pdf", "md5": "16b5230364017d3b0d2d65978eb35816", diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 2af09e5f8..4382c2ed0 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1757,6 +1757,22 @@ sources, for full support with Dvips.`) await loadingTask.destroy(); }); + it("gets text content, with beginbfrange operator handled correctly (bug 1627427)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("bug1627427_reduced.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent(); + const text = mergeText(items); + + expect(text).toEqual( + "침하게 흐린 품이 눈이 올 듯하더니 눈은 아니 오고 얼다가 만 비가 추" + ); + + await loadingTask.destroy(); + }); + it("gets empty structure tree", async function () { const tree = await page.getStructTree();