Merge pull request #13977 from Snuffleupagus/enqueueChunk-batch

[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)
2021-09-11 13:34:07 +02:00 · 2021-09-11 13:34:07 +02:00 · e97f01b17c
commit e97f01b17c
parent 9b42ae9612 45ddb12f61
3 changed files with 23 additions and 12 deletions
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@ -107,6 +107,17 @@ const PatternType = {
  SHADING: 2,
 };

+// Optionally avoid sending individual, or very few, text chunks to reduce
+// `postMessage` overhead with ReadableStream (see issue 13962).
+//
+// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit
+// in `enqueueChunk`), since that would cause streaming of textContent to become
+// essentially useless in practice by sending all (or most) chunks at once.
+// Also, a too large value would (indirectly) affect the main-thread `textLayer`
+// building negatively by forcing all textContent to be handled at once, which
+// could easily end up hurting *overall* performance (e.g. rendering as well).
+const TEXT_CHUNK_BATCH_SIZE = 10;
+
 const deferred = Promise.resolve();

 // Convert PDF blend mode names to HTML5 blend mode names.
@ -2575,8 +2586,6 @@ class PartialEvaluator {
      if (textContentItem.initialized) {
        textContentItem.hasEOL = true;
        flushTextContentItem();
-      } else if (textContent.items.length > 0) {
-        textContent.items[textContent.items.length - 1].hasEOL = true;
      } else {
        textContent.items.push({
          str: "",
@ -2658,20 +2667,24 @@ class PartialEvaluator {
      textContentItem.str.length = 0;
    }

-    function enqueueChunk() {
+    function enqueueChunk(batch = false) {
      const length = textContent.items.length;
-      if (length > 0) {
-        sink.enqueue(textContent, length);
-        textContent.items = [];
-        textContent.styles = Object.create(null);
+      if (length === 0) {
+        return;
      }
+      if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
+        return;
+      }
+      sink.enqueue(textContent, length);
+      textContent.items = [];
+      textContent.styles = Object.create(null);
    }

    const timeSlotManager = new TimeSlotManager();

    return new Promise(function promiseBody(resolve, reject) {
      const next = function (promise) {
-        enqueueChunk();
+        enqueueChunk(/* batch = */ true);
        Promise.all([promise, sink.ready]).then(function () {
          try {
            promiseBody(resolve, reject);
--- a/src/core/worker.js
+++ b/src/core/worker.js
@ -737,8 +737,6 @@ class WorkerMessageHandler {

    handler.on("GetTextContent", function wphExtractText(data, sink) {
      const pageIndex = data.pageIndex;
-      sink.onPull = function (desiredSize) {};
-      sink.onCancel = function (reason) {};

      pdfManager.getPage(pageIndex).then(function (page) {
        const task = new WorkerTask("GetTextContent: page " + pageIndex);
--- a/src/shared/message_handler.js
+++ b/src/shared/message_handler.js
@ -448,7 +448,7 @@ class MessageHandler {
        }
        // Reset desiredSize property of sink on every pull.
        this.streamSinks[streamId].desiredSize = data.desiredSize;
-        const { onPull } = this.streamSinks[data.streamId];
+        const { onPull } = this.streamSinks[streamId];
        new Promise(function (resolve) {
          resolve(onPull && onPull());
        }).then(
@ -518,7 +518,7 @@ class MessageHandler {
        if (!this.streamSinks[streamId]) {
          break;
        }
-        const { onCancel } = this.streamSinks[data.streamId];
+        const { onCancel } = this.streamSinks[streamId];
        new Promise(function (resolve) {
          resolve(onCancel && onCancel(wrapReason(data.reason)));
        }).then(