[api-minor] Remove the normalizeWhitespace option in the PDFPageProxy.{getTextContent, streamTextContent} methods (issue 14519, PR 14428 follow-up)

With these changes, we'll now *always* replace all whitespaces with standard spaces (0x20). This behaviour is already, since many years, the default in both the viewer and the browser-tests.
This commit is contained in:
Jonas Jenwald 2022-01-31 17:48:35 +01:00
parent 48c8831a79
commit 403baa7bba
8 changed files with 8 additions and 16 deletions

View File

@ -438,7 +438,6 @@ class Page {
extractTextContent({ extractTextContent({
handler, handler,
task, task,
normalizeWhitespace,
includeMarkedContent, includeMarkedContent,
sink, sink,
combineTextItems, combineTextItems,
@ -469,7 +468,6 @@ class Page {
stream: contentStream, stream: contentStream,
task, task,
resources: this.resources, resources: this.resources,
normalizeWhitespace,
includeMarkedContent, includeMarkedContent,
combineTextItems, combineTextItems,
sink, sink,

View File

@ -2163,7 +2163,6 @@ class PartialEvaluator {
task, task,
resources, resources,
stateManager = null, stateManager = null,
normalizeWhitespace = false,
combineTextItems = false, combineTextItems = false,
includeMarkedContent = false, includeMarkedContent = false,
sink, sink,
@ -2642,7 +2641,7 @@ class PartialEvaluator {
textChunk.prevTransform = getCurrentTextTransform(); textChunk.prevTransform = getCurrentTextTransform();
} }
if (glyph.isWhitespace && normalizeWhitespace) { if (glyph.isWhitespace) {
// Replaces all whitespaces with standard spaces (0x20), to avoid // Replaces all whitespaces with standard spaces (0x20), to avoid
// alignment issues between the textLayer and the canvas if the text // alignment issues between the textLayer and the canvas if the text
// contains e.g. tabs (fixes issue6612.pdf). // contains e.g. tabs (fixes issue6612.pdf).
@ -3023,7 +3022,6 @@ class PartialEvaluator {
task, task,
resources: xobj.dict.get("Resources") || resources, resources: xobj.dict.get("Resources") || resources,
stateManager: xObjStateManager, stateManager: xObjStateManager,
normalizeWhitespace,
combineTextItems, combineTextItems,
includeMarkedContent, includeMarkedContent,
sink: sinkWrapper, sink: sinkWrapper,

View File

@ -740,7 +740,6 @@ class WorkerMessageHandler {
handler, handler,
task, task,
sink, sink,
normalizeWhitespace: data.normalizeWhitespace,
includeMarkedContent: data.includeMarkedContent, includeMarkedContent: data.includeMarkedContent,
combineTextItems: data.combineTextItems, combineTextItems: data.combineTextItems,
}) })

View File

@ -1069,8 +1069,6 @@ class PDFDocumentProxy {
* Page getTextContent parameters. * Page getTextContent parameters.
* *
* @typedef {Object} getTextContentParameters * @typedef {Object} getTextContentParameters
* @property {boolean} normalizeWhitespace - Replaces all occurrences of
* whitespace with standard spaces (0x20). The default value is `false`.
* @property {boolean} disableCombineTextItems - Do not attempt to combine * @property {boolean} disableCombineTextItems - Do not attempt to combine
* same line {@link TextItem}'s. The default value is `false`. * same line {@link TextItem}'s. The default value is `false`.
* @property {boolean} [includeMarkedContent] - When true include marked * @property {boolean} [includeMarkedContent] - When true include marked
@ -1585,11 +1583,13 @@ class PDFPageProxy {
} }
/** /**
* NOTE: All occurrences of whitespace will be replaced by
* standard spaces (0x20).
*
* @param {getTextContentParameters} params - getTextContent parameters. * @param {getTextContentParameters} params - getTextContent parameters.
* @returns {ReadableStream} Stream for reading text content chunks. * @returns {ReadableStream} Stream for reading text content chunks.
*/ */
streamTextContent({ streamTextContent({
normalizeWhitespace = false,
disableCombineTextItems = false, disableCombineTextItems = false,
includeMarkedContent = false, includeMarkedContent = false,
} = {}) { } = {}) {
@ -1599,7 +1599,6 @@ class PDFPageProxy {
"GetTextContent", "GetTextContent",
{ {
pageIndex: this._pageIndex, pageIndex: this._pageIndex,
normalizeWhitespace: normalizeWhitespace === true,
combineTextItems: disableCombineTextItems !== true, combineTextItems: disableCombineTextItems !== true,
includeMarkedContent: includeMarkedContent === true, includeMarkedContent: includeMarkedContent === true,
}, },
@ -1613,6 +1612,9 @@ class PDFPageProxy {
} }
/** /**
* NOTE: All occurrences of whitespace will be replaced by
* standard spaces (0x20).
*
* @param {getTextContentParameters} params - getTextContent parameters. * @param {getTextContentParameters} params - getTextContent parameters.
* @returns {Promise<TextContent>} A promise that is resolved with a * @returns {Promise<TextContent>} A promise that is resolved with a
* {@link TextContent} object that represents the page's text content. * {@link TextContent} object that represents the page's text content.

View File

@ -644,7 +644,6 @@ class Driver {
// The text builder will draw its content on the test canvas // The text builder will draw its content on the test canvas
initPromise = page initPromise = page
.getTextContent({ .getTextContent({
normalizeWhitespace: true,
includeMarkedContent: true, includeMarkedContent: true,
}) })
.then(function (textContent) { .then(function (textContent) {

View File

@ -1966,7 +1966,6 @@ describe("api", function () {
it("gets text content", async function () { it("gets text content", async function () {
const defaultPromise = page.getTextContent(); const defaultPromise = page.getTextContent();
const parametersPromise = page.getTextContent({ const parametersPromise = page.getTextContent({
normalizeWhitespace: true,
disableCombineTextItems: true, disableCombineTextItems: true,
}); });

View File

@ -551,9 +551,7 @@ class PDFFindController {
return this._pdfDocument return this._pdfDocument
.getPage(i + 1) .getPage(i + 1)
.then(pdfPage => { .then(pdfPage => {
return pdfPage.getTextContent({ return pdfPage.getTextContent();
normalizeWhitespace: true,
});
}) })
.then( .then(
textContent => { textContent => {

View File

@ -701,7 +701,6 @@ class PDFPageView {
return finishPaintTask(null).then(() => { return finishPaintTask(null).then(() => {
if (textLayer) { if (textLayer) {
const readableStream = pdfPage.streamTextContent({ const readableStream = pdfPage.streamTextContent({
normalizeWhitespace: true,
includeMarkedContent: true, includeMarkedContent: true,
}); });
textLayer.setTextContentStream(readableStream); textLayer.setTextContentStream(readableStream);