Create a new chunk when the char is too rised compared to the previouse one

This commit is contained in:
Calixte Denizet 2023-03-28 12:00:53 +02:00
parent 384bd96165
commit a96f10e55d
5 changed files with 33 additions and 0 deletions

View File

@ -2341,6 +2341,12 @@ class PartialEvaluator {
const SPACE_IN_FLOW_MIN_FACTOR = 0.102;
const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
// If a char is too high/too low compared to the previous we just create
// a new chunk.
// If the advance isn't in the +/-VERTICAL_SHIFT_RATIO * height range then
// a new chunk is created.
const VERTICAL_SHIFT_RATIO = 0.25;
const self = this;
const xref = this.xref;
const showSpacedTextBuffer = [];
@ -2649,6 +2655,10 @@ class PartialEvaluator {
}
}
if (Math.abs(advanceX) > textContentItem.width * VERTICAL_SHIFT_RATIO) {
flushTextContentItem();
}
return true;
}
@ -2706,6 +2716,10 @@ class PartialEvaluator {
}
}
if (Math.abs(advanceY) > textContentItem.height * VERTICAL_SHIFT_RATIO) {
flushTextContentItem();
}
return true;
}

View File

@ -581,3 +581,4 @@
!issue16063.pdf
!issue16067.pdf
!bug1820909.1.pdf
!issue16221.pdf

BIN
test/pdfs/issue16221.pdf Executable file

Binary file not shown.

View File

@ -7510,5 +7510,12 @@
"md5": "f71e89ebe3d6e75e0c83ce41cd72df1f",
"link": true,
"type": "other"
},
{
"id": "issue16221-text",
"file": "pdfs/issue16221.pdf",
"md5": "62d93c9b3aa4ba3af5446504632e78a5",
"rounds": 1,
"type": "text"
}
]

View File

@ -2624,6 +2624,17 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content with a rised text", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue16221.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent();
expect(items.map(i => i.str)).toEqual(["Hello ", "World"]);
await loadingTask.destroy();
});
it("gets empty structure tree", async function () {
const tree = await page.getStructTree();