diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 33b878f64..eb6c0b7fc 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2386,7 +2386,7 @@ class PartialEvaluator { // A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space // so it doesn't count as a space. - const TRACKING_SPACE_FACTOR = 0.1; + const TRACKING_SPACE_FACTOR = 0.102; // When a white <= fontSize * NOT_A_SPACE_FACTOR, there is no space // even if one is present in the text stream. @@ -2404,7 +2404,7 @@ class PartialEvaluator { // (which means a new span in the text layer). // It's useful to adjust the best as possible the span in the layer // to what is displayed in the canvas. - const SPACE_IN_FLOW_MIN_FACTOR = 0.1; + const SPACE_IN_FLOW_MIN_FACTOR = 0.102; const SPACE_IN_FLOW_MAX_FACTOR = 0.6; const self = this; @@ -2490,16 +2490,12 @@ class PartialEvaluator { const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]); textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; - textContentItem.trackingSpaceMin = - textState.fontSize * TRACKING_SPACE_FACTOR; - textContentItem.notASpace = textState.fontSize * NOT_A_SPACE_FACTOR; - textContentItem.negativeSpaceMax = - textState.fontSize * NEGATIVE_SPACE_FACTOR; - textContentItem.spaceInFlowMin = - textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR; - textContentItem.spaceInFlowMax = - textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR; - + const { fontSize } = textState; + textContentItem.trackingSpaceMin = fontSize * TRACKING_SPACE_FACTOR; + textContentItem.notASpace = fontSize * NOT_A_SPACE_FACTOR; + textContentItem.negativeSpaceMax = fontSize * NEGATIVE_SPACE_FACTOR; + textContentItem.spaceInFlowMin = fontSize * SPACE_IN_FLOW_MIN_FACTOR; + textContentItem.spaceInFlowMax = fontSize * SPACE_IN_FLOW_MAX_FACTOR; textContentItem.hasEOL = false; textContentItem.initialized = true; diff --git a/test/pdfs/issue16119.pdf.link b/test/pdfs/issue16119.pdf.link new file mode 100644 index 000000000..7b2cbecd0 --- /dev/null +++ b/test/pdfs/issue16119.pdf.link @@ -0,0 +1,2 @@ +https://github.com/mozilla/pdf.js/files/10907776/Fiskelagkagen_2016v2.1.pdf + diff --git a/test/test_manifest.json b/test/test_manifest.json index 408ade6a9..cb23055be 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -7430,5 +7430,12 @@ "link": true, "type": "eq", "forms": true + }, + { + "id": "issue16119", + "file": "pdfs/issue16119.pdf", + "md5": "76d680172c969c77c9fb650b3d822ad6", + "link": true, + "type": "other" } ] diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 00b940a94..2203eb7f9 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2368,6 +2368,26 @@ page 1 / 3`); await loadingTask.destroy(); }); + it("gets text content, with no extra spaces (issue 16119)", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + + const loadingTask = getDocument(buildGetDocumentParams("issue16119.pdf")); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent(); + const text = mergeText(items); + + expect( + text.includes( + "Engang var der i Samvirke en opskrift på en fiskelagkage, som jeg med" + ) + ).toBe(true); + + await loadingTask.destroy(); + }); + it("gets text content, with merged spaces (issue 13201)", async function () { const loadingTask = getDocument(buildGetDocumentParams("issue13201.pdf")); const pdfDoc = await loadingTask.promise;