Always flush the current item with MarkedContent stuff when getting text (#15094)
This commit is contained in:
parent
23fcdabb37
commit
3789dab307
@ -3290,6 +3290,7 @@ class PartialEvaluator {
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
case OPS.beginMarkedContent:
|
case OPS.beginMarkedContent:
|
||||||
|
flushTextContentItem();
|
||||||
if (includeMarkedContent) {
|
if (includeMarkedContent) {
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
type: "beginMarkedContent",
|
type: "beginMarkedContent",
|
||||||
@ -3298,8 +3299,8 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OPS.beginMarkedContentProps:
|
case OPS.beginMarkedContentProps:
|
||||||
|
flushTextContentItem();
|
||||||
if (includeMarkedContent) {
|
if (includeMarkedContent) {
|
||||||
flushTextContentItem();
|
|
||||||
let mcid = null;
|
let mcid = null;
|
||||||
if (args[1] instanceof Dict) {
|
if (args[1] instanceof Dict) {
|
||||||
mcid = args[1].get("MCID");
|
mcid = args[1].get("MCID");
|
||||||
@ -3314,8 +3315,8 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OPS.endMarkedContent:
|
case OPS.endMarkedContent:
|
||||||
|
flushTextContentItem();
|
||||||
if (includeMarkedContent) {
|
if (includeMarkedContent) {
|
||||||
flushTextContentItem();
|
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
type: "endMarkedContent",
|
type: "endMarkedContent",
|
||||||
});
|
});
|
||||||
|
@ -78,7 +78,9 @@ describe("api", function () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function mergeText(items) {
|
function mergeText(items) {
|
||||||
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
|
return items
|
||||||
|
.map(chunk => (chunk.str ?? "") + (chunk.hasEOL ? "\n" : ""))
|
||||||
|
.join("");
|
||||||
}
|
}
|
||||||
|
|
||||||
describe("getDocument", function () {
|
describe("getDocument", function () {
|
||||||
@ -2275,6 +2277,28 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content with or without includeMarkedContent, and compare (issue 15094)", async function () {
|
||||||
|
if (isNodeJS) {
|
||||||
|
pending("Linked test-cases are not supported in Node.js.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("pdf.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(568);
|
||||||
|
let { items } = await pdfPage.getTextContent({
|
||||||
|
includeMarkedContent: false,
|
||||||
|
});
|
||||||
|
const textWithoutMC = mergeText(items);
|
||||||
|
({ items } = await pdfPage.getTextContent({
|
||||||
|
includeMarkedContent: true,
|
||||||
|
}));
|
||||||
|
const textWithMC = mergeText(items);
|
||||||
|
|
||||||
|
expect(textWithoutMC).toEqual(textWithMC);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets empty structure tree", async function () {
|
it("gets empty structure tree", async function () {
|
||||||
const tree = await page.getStructTree();
|
const tree = await page.getStructTree();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user