Slightly modify the max width of a tracking space

This commit is contained in:
Calixte Denizet 2023-03-07 19:38:49 +01:00
parent ec5288caa5
commit b8dda089e2
4 changed files with 37 additions and 12 deletions

View File

@ -2386,7 +2386,7 @@ class PartialEvaluator {
// A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
// so it doesn't count as a space.
const TRACKING_SPACE_FACTOR = 0.1;
const TRACKING_SPACE_FACTOR = 0.102;
// When a white <= fontSize * NOT_A_SPACE_FACTOR, there is no space
// even if one is present in the text stream.
@ -2404,7 +2404,7 @@ class PartialEvaluator {
// (which means a new span in the text layer).
// It's useful to adjust the best as possible the span in the layer
// to what is displayed in the canvas.
const SPACE_IN_FLOW_MIN_FACTOR = 0.1;
const SPACE_IN_FLOW_MIN_FACTOR = 0.102;
const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
const self = this;
@ -2490,16 +2490,12 @@ class PartialEvaluator {
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
textContentItem.trackingSpaceMin =
textState.fontSize * TRACKING_SPACE_FACTOR;
textContentItem.notASpace = textState.fontSize * NOT_A_SPACE_FACTOR;
textContentItem.negativeSpaceMax =
textState.fontSize * NEGATIVE_SPACE_FACTOR;
textContentItem.spaceInFlowMin =
textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR;
textContentItem.spaceInFlowMax =
textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR;
const { fontSize } = textState;
textContentItem.trackingSpaceMin = fontSize * TRACKING_SPACE_FACTOR;
textContentItem.notASpace = fontSize * NOT_A_SPACE_FACTOR;
textContentItem.negativeSpaceMax = fontSize * NEGATIVE_SPACE_FACTOR;
textContentItem.spaceInFlowMin = fontSize * SPACE_IN_FLOW_MIN_FACTOR;
textContentItem.spaceInFlowMax = fontSize * SPACE_IN_FLOW_MAX_FACTOR;
textContentItem.hasEOL = false;
textContentItem.initialized = true;

View File

@ -0,0 +1,2 @@
https://github.com/mozilla/pdf.js/files/10907776/Fiskelagkagen_2016v2.1.pdf

View File

@ -7430,5 +7430,12 @@
"link": true,
"type": "eq",
"forms": true
},
{
"id": "issue16119",
"file": "pdfs/issue16119.pdf",
"md5": "76d680172c969c77c9fb650b3d822ad6",
"link": true,
"type": "other"
}
]

View File

@ -2368,6 +2368,26 @@ page 1 / 3`);
await loadingTask.destroy();
});
it("gets text content, with no extra spaces (issue 16119)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("issue16119.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent();
const text = mergeText(items);
expect(
text.includes(
"Engang var der i Samvirke en opskrift på en fiskelagkage, som jeg med"
)
).toBe(true);
await loadingTask.destroy();
});
it("gets text content, with merged spaces (issue 13201)", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue13201.pdf"));
const pdfDoc = await loadingTask.promise;