diff --git a/src/core/annotation.js b/src/core/annotation.js index abd5eddcb..b3f53460d 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1010,7 +1010,6 @@ class Annotation { task, resources, includeMarkedContent: true, - combineTextItems: true, sink, viewBox, }); diff --git a/src/core/document.js b/src/core/document.js index 54f70655d..6e4ca7cff 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -511,13 +511,7 @@ class Page { }); } - extractTextContent({ - handler, - task, - includeMarkedContent, - sink, - combineTextItems, - }) { + extractTextContent({ handler, task, includeMarkedContent, sink }) { const contentStreamPromise = this.getContentStream(); const resourcesPromise = this.loadResources([ "ExtGState", @@ -545,7 +539,6 @@ class Page { task, resources: this.resources, includeMarkedContent, - combineTextItems, sink, viewBox: this.view, }); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 748914b76..4058885e2 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2236,7 +2236,6 @@ class PartialEvaluator { task, resources, stateManager = null, - combineTextItems = false, includeMarkedContent = false, sink, seenStyles = new Set(), @@ -2534,11 +2533,7 @@ class PartialEvaluator { return false; } - if ( - !combineTextItems || - !textState.font || - !textContentItem.prevTransform - ) { + if (!textState.font || !textContentItem.prevTransform) { return true; } @@ -3191,7 +3186,6 @@ class PartialEvaluator { task, resources: xobj.dict.get("Resources") || resources, stateManager: xObjStateManager, - combineTextItems, includeMarkedContent, sink: sinkWrapper, seenStyles, diff --git a/src/core/worker.js b/src/core/worker.js index 471895c32..46efcca47 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -741,7 +741,7 @@ class WorkerMessageHandler { }); handler.on("GetTextContent", function (data, sink) { - const pageIndex = data.pageIndex; + const { pageIndex, includeMarkedContent } = data; pdfManager.getPage(pageIndex).then(function (page) { const task = new WorkerTask("GetTextContent: page " + pageIndex); @@ -755,8 +755,7 @@ class WorkerMessageHandler { handler, task, sink, - includeMarkedContent: data.includeMarkedContent, - combineTextItems: data.combineTextItems, + includeMarkedContent, }) .then( function () { diff --git a/src/display/api.js b/src/display/api.js index e206eed9b..28b86dac5 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1120,8 +1120,6 @@ class PDFDocumentProxy { * Page getTextContent parameters. * * @typedef {Object} getTextContentParameters - * @property {boolean} disableCombineTextItems - Do not attempt to combine - * same line {@link TextItem}'s. The default value is `false`. * @property {boolean} [includeMarkedContent] - When true include marked * content items in the items array of TextContent. The default is `false`. */ @@ -1602,17 +1600,13 @@ class PDFPageProxy { * @param {getTextContentParameters} params - getTextContent parameters. * @returns {ReadableStream} Stream for reading text content chunks. */ - streamTextContent({ - disableCombineTextItems = false, - includeMarkedContent = false, - } = {}) { + streamTextContent({ includeMarkedContent = false } = {}) { const TEXT_CONTENT_CHUNK_SIZE = 100; return this._transport.messageHandler.sendWithStream( "GetTextContent", { pageIndex: this._pageIndex, - combineTextItems: disableCombineTextItems !== true, includeMarkedContent: includeMarkedContent === true, }, { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 0aaa0d47c..ce46d4ef7 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -21,6 +21,7 @@ import { ImageKind, InvalidPDFException, MissingPDFException, + objectSize, OPS, PasswordException, PasswordResponses, @@ -2321,26 +2322,16 @@ describe("api", function () { }); it("gets text content", async function () { - const defaultPromise = page.getTextContent(); - const parametersPromise = page.getTextContent({ - disableCombineTextItems: true, - }); + const { items, styles } = await page.getTextContent(); - const data = await Promise.all([defaultPromise, parametersPromise]); + expect(items.length).toEqual(15); + expect(objectSize(styles)).toEqual(5); - expect(!!data[0].items).toEqual(true); - expect(data[0].items.length).toEqual(15); - expect(!!data[0].styles).toEqual(true); - - const page1 = mergeText(data[0].items); - expect(page1).toEqual(`Table Of Content + const text = mergeText(items); + expect(text).toEqual(`Table Of Content Chapter 1 .......................................................... 2 Paragraph 1.1 ...................................................... 3 page 1 / 3`); - - expect(!!data[1].items).toEqual(true); - expect(data[1].items.length).toEqual(6); - expect(!!data[1].styles).toEqual(true); }); it("gets text content, with correct properties (issue 8276)", async function () {