diff --git a/src/core/evaluator.js b/src/core/evaluator.js index fe407a7fa..48a170016 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -107,6 +107,17 @@ const PatternType = { SHADING: 2, }; +// Optionally avoid sending individual, or very few, text chunks to reduce +// `postMessage` overhead with ReadableStream (see issue 13962). +// +// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit +// in `enqueueChunk`), since that would cause streaming of textContent to become +// essentially useless in practice by sending all (or most) chunks at once. +// Also, a too large value would (indirectly) affect the main-thread `textLayer` +// building negatively by forcing all textContent to be handled at once, which +// could easily end up hurting *overall* performance (e.g. rendering as well). +const TEXT_CHUNK_BATCH_SIZE = 10; + const deferred = Promise.resolve(); // Convert PDF blend mode names to HTML5 blend mode names. @@ -2575,8 +2586,6 @@ class PartialEvaluator { if (textContentItem.initialized) { textContentItem.hasEOL = true; flushTextContentItem(); - } else if (textContent.items.length > 0) { - textContent.items[textContent.items.length - 1].hasEOL = true; } else { textContent.items.push({ str: "", @@ -2658,20 +2667,24 @@ class PartialEvaluator { textContentItem.str.length = 0; } - function enqueueChunk() { + function enqueueChunk(batch = false) { const length = textContent.items.length; - if (length > 0) { - sink.enqueue(textContent, length); - textContent.items = []; - textContent.styles = Object.create(null); + if (length === 0) { + return; } + if (batch && length < TEXT_CHUNK_BATCH_SIZE) { + return; + } + sink.enqueue(textContent, length); + textContent.items = []; + textContent.styles = Object.create(null); } const timeSlotManager = new TimeSlotManager(); return new Promise(function promiseBody(resolve, reject) { const next = function (promise) { - enqueueChunk(); + enqueueChunk(/* batch = */ true); Promise.all([promise, sink.ready]).then(function () { try { promiseBody(resolve, reject); diff --git a/src/core/worker.js b/src/core/worker.js index 023d5307c..9ae394881 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -737,8 +737,6 @@ class WorkerMessageHandler { handler.on("GetTextContent", function wphExtractText(data, sink) { const pageIndex = data.pageIndex; - sink.onPull = function (desiredSize) {}; - sink.onCancel = function (reason) {}; pdfManager.getPage(pageIndex).then(function (page) { const task = new WorkerTask("GetTextContent: page " + pageIndex); diff --git a/src/shared/message_handler.js b/src/shared/message_handler.js index bf4e81f8d..add84305b 100644 --- a/src/shared/message_handler.js +++ b/src/shared/message_handler.js @@ -448,7 +448,7 @@ class MessageHandler { } // Reset desiredSize property of sink on every pull. this.streamSinks[streamId].desiredSize = data.desiredSize; - const { onPull } = this.streamSinks[data.streamId]; + const { onPull } = this.streamSinks[streamId]; new Promise(function (resolve) { resolve(onPull && onPull()); }).then( @@ -518,7 +518,7 @@ class MessageHandler { if (!this.streamSinks[streamId]) { break; } - const { onCancel } = this.streamSinks[data.streamId]; + const { onCancel } = this.streamSinks[streamId]; new Promise(function (resolve) { resolve(onCancel && onCancel(wrapReason(data.reason))); }).then(