Always flush the current item with MarkedContent stuff when getting text (#15094)
This commit is contained in:
parent
23fcdabb37
commit
3789dab307
@ -3290,6 +3290,7 @@ class PartialEvaluator {
|
||||
);
|
||||
return;
|
||||
case OPS.beginMarkedContent:
|
||||
flushTextContentItem();
|
||||
if (includeMarkedContent) {
|
||||
textContent.items.push({
|
||||
type: "beginMarkedContent",
|
||||
@ -3298,8 +3299,8 @@ class PartialEvaluator {
|
||||
}
|
||||
break;
|
||||
case OPS.beginMarkedContentProps:
|
||||
flushTextContentItem();
|
||||
if (includeMarkedContent) {
|
||||
flushTextContentItem();
|
||||
let mcid = null;
|
||||
if (args[1] instanceof Dict) {
|
||||
mcid = args[1].get("MCID");
|
||||
@ -3314,8 +3315,8 @@ class PartialEvaluator {
|
||||
}
|
||||
break;
|
||||
case OPS.endMarkedContent:
|
||||
flushTextContentItem();
|
||||
if (includeMarkedContent) {
|
||||
flushTextContentItem();
|
||||
textContent.items.push({
|
||||
type: "endMarkedContent",
|
||||
});
|
||||
|
@ -78,7 +78,9 @@ describe("api", function () {
|
||||
}
|
||||
|
||||
function mergeText(items) {
|
||||
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
|
||||
return items
|
||||
.map(chunk => (chunk.str ?? "") + (chunk.hasEOL ? "\n" : ""))
|
||||
.join("");
|
||||
}
|
||||
|
||||
describe("getDocument", function () {
|
||||
@ -2275,6 +2277,28 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content with or without includeMarkedContent, and compare (issue 15094)", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Linked test-cases are not supported in Node.js.");
|
||||
}
|
||||
|
||||
const loadingTask = getDocument(buildGetDocumentParams("pdf.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(568);
|
||||
let { items } = await pdfPage.getTextContent({
|
||||
includeMarkedContent: false,
|
||||
});
|
||||
const textWithoutMC = mergeText(items);
|
||||
({ items } = await pdfPage.getTextContent({
|
||||
includeMarkedContent: true,
|
||||
}));
|
||||
const textWithMC = mergeText(items);
|
||||
|
||||
expect(textWithoutMC).toEqual(textWithMC);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets empty structure tree", async function () {
|
||||
const tree = await page.getStructTree();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user