Merge pull request #15105 from calixteman/15094

Always flush the current item with MarkedContent stuff when getting text (#15094)
This commit is contained in:
Jonas Jenwald 2022-06-25 18:24:33 +02:00 committed by GitHub
commit 4e025e1f08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 3 deletions

View File

@ -3290,6 +3290,7 @@ class PartialEvaluator {
);
return;
case OPS.beginMarkedContent:
flushTextContentItem();
if (includeMarkedContent) {
textContent.items.push({
type: "beginMarkedContent",
@ -3298,8 +3299,8 @@ class PartialEvaluator {
}
break;
case OPS.beginMarkedContentProps:
flushTextContentItem();
if (includeMarkedContent) {
flushTextContentItem();
let mcid = null;
if (args[1] instanceof Dict) {
mcid = args[1].get("MCID");
@ -3314,8 +3315,8 @@ class PartialEvaluator {
}
break;
case OPS.endMarkedContent:
flushTextContentItem();
if (includeMarkedContent) {
flushTextContentItem();
textContent.items.push({
type: "endMarkedContent",
});

View File

@ -78,7 +78,9 @@ describe("api", function () {
}
function mergeText(items) {
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
return items
.map(chunk => (chunk.str ?? "") + (chunk.hasEOL ? "\n" : ""))
.join("");
}
describe("getDocument", function () {
@ -2275,6 +2277,28 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content with or without includeMarkedContent, and compare (issue 15094)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("pdf.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(568);
let { items } = await pdfPage.getTextContent({
includeMarkedContent: false,
});
const textWithoutMC = mergeText(items);
({ items } = await pdfPage.getTextContent({
includeMarkedContent: true,
}));
const textWithMC = mergeText(items);
expect(textWithoutMC).toEqual(textWithMC);
await loadingTask.destroy();
});
it("gets empty structure tree", async function () {
const tree = await page.getStructTree();