Merge pull request #16234 from Snuffleupagus/rm-disableCombineTextItems

[api-minor] Remove the `disableCombineTextItems` option
This commit is contained in:
Tim van der Meij 2023-04-01 14:18:00 +02:00 committed by GitHub
commit a9af0a6cc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 11 additions and 41 deletions

View File

@ -1010,7 +1010,6 @@ class Annotation {
task,
resources,
includeMarkedContent: true,
combineTextItems: true,
sink,
viewBox,
});

View File

@ -511,13 +511,7 @@ class Page {
});
}
extractTextContent({
handler,
task,
includeMarkedContent,
sink,
combineTextItems,
}) {
extractTextContent({ handler, task, includeMarkedContent, sink }) {
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ExtGState",
@ -545,7 +539,6 @@ class Page {
task,
resources: this.resources,
includeMarkedContent,
combineTextItems,
sink,
viewBox: this.view,
});

View File

@ -2236,7 +2236,6 @@ class PartialEvaluator {
task,
resources,
stateManager = null,
combineTextItems = false,
includeMarkedContent = false,
sink,
seenStyles = new Set(),
@ -2534,11 +2533,7 @@ class PartialEvaluator {
return false;
}
if (
!combineTextItems ||
!textState.font ||
!textContentItem.prevTransform
) {
if (!textState.font || !textContentItem.prevTransform) {
return true;
}
@ -3191,7 +3186,6 @@ class PartialEvaluator {
task,
resources: xobj.dict.get("Resources") || resources,
stateManager: xObjStateManager,
combineTextItems,
includeMarkedContent,
sink: sinkWrapper,
seenStyles,

View File

@ -741,7 +741,7 @@ class WorkerMessageHandler {
});
handler.on("GetTextContent", function (data, sink) {
const pageIndex = data.pageIndex;
const { pageIndex, includeMarkedContent } = data;
pdfManager.getPage(pageIndex).then(function (page) {
const task = new WorkerTask("GetTextContent: page " + pageIndex);
@ -755,8 +755,7 @@ class WorkerMessageHandler {
handler,
task,
sink,
includeMarkedContent: data.includeMarkedContent,
combineTextItems: data.combineTextItems,
includeMarkedContent,
})
.then(
function () {

View File

@ -1120,8 +1120,6 @@ class PDFDocumentProxy {
* Page getTextContent parameters.
*
* @typedef {Object} getTextContentParameters
* @property {boolean} disableCombineTextItems - Do not attempt to combine
* same line {@link TextItem}'s. The default value is `false`.
* @property {boolean} [includeMarkedContent] - When true include marked
* content items in the items array of TextContent. The default is `false`.
*/
@ -1602,17 +1600,13 @@ class PDFPageProxy {
* @param {getTextContentParameters} params - getTextContent parameters.
* @returns {ReadableStream} Stream for reading text content chunks.
*/
streamTextContent({
disableCombineTextItems = false,
includeMarkedContent = false,
} = {}) {
streamTextContent({ includeMarkedContent = false } = {}) {
const TEXT_CONTENT_CHUNK_SIZE = 100;
return this._transport.messageHandler.sendWithStream(
"GetTextContent",
{
pageIndex: this._pageIndex,
combineTextItems: disableCombineTextItems !== true,
includeMarkedContent: includeMarkedContent === true,
},
{

View File

@ -21,6 +21,7 @@ import {
ImageKind,
InvalidPDFException,
MissingPDFException,
objectSize,
OPS,
PasswordException,
PasswordResponses,
@ -2321,26 +2322,16 @@ describe("api", function () {
});
it("gets text content", async function () {
const defaultPromise = page.getTextContent();
const parametersPromise = page.getTextContent({
disableCombineTextItems: true,
});
const { items, styles } = await page.getTextContent();
const data = await Promise.all([defaultPromise, parametersPromise]);
expect(items.length).toEqual(15);
expect(objectSize(styles)).toEqual(5);
expect(!!data[0].items).toEqual(true);
expect(data[0].items.length).toEqual(15);
expect(!!data[0].styles).toEqual(true);
const page1 = mergeText(data[0].items);
expect(page1).toEqual(`Table Of Content
const text = mergeText(items);
expect(text).toEqual(`Table Of Content
Chapter 1 .......................................................... 2
Paragraph 1.1 ...................................................... 3
page 1 / 3`);
expect(!!data[1].items).toEqual(true);
expect(data[1].items.length).toEqual(6);
expect(!!data[1].styles).toEqual(true);
});
it("gets text content, with correct properties (issue 8276)", async function () {