[api-minor] Remove the normalizeWhitespace
option in the PDFPageProxy.{getTextContent, streamTextContent}
methods (issue 14519, PR 14428 follow-up)
With these changes, we'll now *always* replace all whitespaces with standard spaces (0x20). This behaviour is already, since many years, the default in both the viewer and the browser-tests.
This commit is contained in:
parent
48c8831a79
commit
403baa7bba
@ -438,7 +438,6 @@ class Page {
|
|||||||
extractTextContent({
|
extractTextContent({
|
||||||
handler,
|
handler,
|
||||||
task,
|
task,
|
||||||
normalizeWhitespace,
|
|
||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
sink,
|
sink,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
@ -469,7 +468,6 @@ class Page {
|
|||||||
stream: contentStream,
|
stream: contentStream,
|
||||||
task,
|
task,
|
||||||
resources: this.resources,
|
resources: this.resources,
|
||||||
normalizeWhitespace,
|
|
||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
sink,
|
sink,
|
||||||
|
@ -2163,7 +2163,6 @@ class PartialEvaluator {
|
|||||||
task,
|
task,
|
||||||
resources,
|
resources,
|
||||||
stateManager = null,
|
stateManager = null,
|
||||||
normalizeWhitespace = false,
|
|
||||||
combineTextItems = false,
|
combineTextItems = false,
|
||||||
includeMarkedContent = false,
|
includeMarkedContent = false,
|
||||||
sink,
|
sink,
|
||||||
@ -2642,7 +2641,7 @@ class PartialEvaluator {
|
|||||||
textChunk.prevTransform = getCurrentTextTransform();
|
textChunk.prevTransform = getCurrentTextTransform();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (glyph.isWhitespace && normalizeWhitespace) {
|
if (glyph.isWhitespace) {
|
||||||
// Replaces all whitespaces with standard spaces (0x20), to avoid
|
// Replaces all whitespaces with standard spaces (0x20), to avoid
|
||||||
// alignment issues between the textLayer and the canvas if the text
|
// alignment issues between the textLayer and the canvas if the text
|
||||||
// contains e.g. tabs (fixes issue6612.pdf).
|
// contains e.g. tabs (fixes issue6612.pdf).
|
||||||
@ -3023,7 +3022,6 @@ class PartialEvaluator {
|
|||||||
task,
|
task,
|
||||||
resources: xobj.dict.get("Resources") || resources,
|
resources: xobj.dict.get("Resources") || resources,
|
||||||
stateManager: xObjStateManager,
|
stateManager: xObjStateManager,
|
||||||
normalizeWhitespace,
|
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
sink: sinkWrapper,
|
sink: sinkWrapper,
|
||||||
|
@ -740,7 +740,6 @@ class WorkerMessageHandler {
|
|||||||
handler,
|
handler,
|
||||||
task,
|
task,
|
||||||
sink,
|
sink,
|
||||||
normalizeWhitespace: data.normalizeWhitespace,
|
|
||||||
includeMarkedContent: data.includeMarkedContent,
|
includeMarkedContent: data.includeMarkedContent,
|
||||||
combineTextItems: data.combineTextItems,
|
combineTextItems: data.combineTextItems,
|
||||||
})
|
})
|
||||||
|
@ -1069,8 +1069,6 @@ class PDFDocumentProxy {
|
|||||||
* Page getTextContent parameters.
|
* Page getTextContent parameters.
|
||||||
*
|
*
|
||||||
* @typedef {Object} getTextContentParameters
|
* @typedef {Object} getTextContentParameters
|
||||||
* @property {boolean} normalizeWhitespace - Replaces all occurrences of
|
|
||||||
* whitespace with standard spaces (0x20). The default value is `false`.
|
|
||||||
* @property {boolean} disableCombineTextItems - Do not attempt to combine
|
* @property {boolean} disableCombineTextItems - Do not attempt to combine
|
||||||
* same line {@link TextItem}'s. The default value is `false`.
|
* same line {@link TextItem}'s. The default value is `false`.
|
||||||
* @property {boolean} [includeMarkedContent] - When true include marked
|
* @property {boolean} [includeMarkedContent] - When true include marked
|
||||||
@ -1585,11 +1583,13 @@ class PDFPageProxy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* NOTE: All occurrences of whitespace will be replaced by
|
||||||
|
* standard spaces (0x20).
|
||||||
|
*
|
||||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||||
* @returns {ReadableStream} Stream for reading text content chunks.
|
* @returns {ReadableStream} Stream for reading text content chunks.
|
||||||
*/
|
*/
|
||||||
streamTextContent({
|
streamTextContent({
|
||||||
normalizeWhitespace = false,
|
|
||||||
disableCombineTextItems = false,
|
disableCombineTextItems = false,
|
||||||
includeMarkedContent = false,
|
includeMarkedContent = false,
|
||||||
} = {}) {
|
} = {}) {
|
||||||
@ -1599,7 +1599,6 @@ class PDFPageProxy {
|
|||||||
"GetTextContent",
|
"GetTextContent",
|
||||||
{
|
{
|
||||||
pageIndex: this._pageIndex,
|
pageIndex: this._pageIndex,
|
||||||
normalizeWhitespace: normalizeWhitespace === true,
|
|
||||||
combineTextItems: disableCombineTextItems !== true,
|
combineTextItems: disableCombineTextItems !== true,
|
||||||
includeMarkedContent: includeMarkedContent === true,
|
includeMarkedContent: includeMarkedContent === true,
|
||||||
},
|
},
|
||||||
@ -1613,6 +1612,9 @@ class PDFPageProxy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* NOTE: All occurrences of whitespace will be replaced by
|
||||||
|
* standard spaces (0x20).
|
||||||
|
*
|
||||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||||
* @returns {Promise<TextContent>} A promise that is resolved with a
|
* @returns {Promise<TextContent>} A promise that is resolved with a
|
||||||
* {@link TextContent} object that represents the page's text content.
|
* {@link TextContent} object that represents the page's text content.
|
||||||
|
@ -644,7 +644,6 @@ class Driver {
|
|||||||
// The text builder will draw its content on the test canvas
|
// The text builder will draw its content on the test canvas
|
||||||
initPromise = page
|
initPromise = page
|
||||||
.getTextContent({
|
.getTextContent({
|
||||||
normalizeWhitespace: true,
|
|
||||||
includeMarkedContent: true,
|
includeMarkedContent: true,
|
||||||
})
|
})
|
||||||
.then(function (textContent) {
|
.then(function (textContent) {
|
||||||
|
@ -1966,7 +1966,6 @@ describe("api", function () {
|
|||||||
it("gets text content", async function () {
|
it("gets text content", async function () {
|
||||||
const defaultPromise = page.getTextContent();
|
const defaultPromise = page.getTextContent();
|
||||||
const parametersPromise = page.getTextContent({
|
const parametersPromise = page.getTextContent({
|
||||||
normalizeWhitespace: true,
|
|
||||||
disableCombineTextItems: true,
|
disableCombineTextItems: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -551,9 +551,7 @@ class PDFFindController {
|
|||||||
return this._pdfDocument
|
return this._pdfDocument
|
||||||
.getPage(i + 1)
|
.getPage(i + 1)
|
||||||
.then(pdfPage => {
|
.then(pdfPage => {
|
||||||
return pdfPage.getTextContent({
|
return pdfPage.getTextContent();
|
||||||
normalizeWhitespace: true,
|
|
||||||
});
|
|
||||||
})
|
})
|
||||||
.then(
|
.then(
|
||||||
textContent => {
|
textContent => {
|
||||||
|
@ -701,7 +701,6 @@ class PDFPageView {
|
|||||||
return finishPaintTask(null).then(() => {
|
return finishPaintTask(null).then(() => {
|
||||||
if (textLayer) {
|
if (textLayer) {
|
||||||
const readableStream = pdfPage.streamTextContent({
|
const readableStream = pdfPage.streamTextContent({
|
||||||
normalizeWhitespace: true,
|
|
||||||
includeMarkedContent: true,
|
includeMarkedContent: true,
|
||||||
});
|
});
|
||||||
textLayer.setTextContentStream(readableStream);
|
textLayer.setTextContentStream(readableStream);
|
||||||
|
Loading…
Reference in New Issue
Block a user