Merge pull request #13977 from Snuffleupagus/enqueueChunk-batch

[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)
This commit is contained in:
Tim van der Meij 2021-09-11 13:34:07 +02:00 committed by GitHub
commit e97f01b17c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 12 deletions

View File

@ -107,6 +107,17 @@ const PatternType = {
SHADING: 2, SHADING: 2,
}; };
// Optionally avoid sending individual, or very few, text chunks to reduce
// `postMessage` overhead with ReadableStream (see issue 13962).
//
// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit
// in `enqueueChunk`), since that would cause streaming of textContent to become
// essentially useless in practice by sending all (or most) chunks at once.
// Also, a too large value would (indirectly) affect the main-thread `textLayer`
// building negatively by forcing all textContent to be handled at once, which
// could easily end up hurting *overall* performance (e.g. rendering as well).
const TEXT_CHUNK_BATCH_SIZE = 10;
const deferred = Promise.resolve(); const deferred = Promise.resolve();
// Convert PDF blend mode names to HTML5 blend mode names. // Convert PDF blend mode names to HTML5 blend mode names.
@ -2575,8 +2586,6 @@ class PartialEvaluator {
if (textContentItem.initialized) { if (textContentItem.initialized) {
textContentItem.hasEOL = true; textContentItem.hasEOL = true;
flushTextContentItem(); flushTextContentItem();
} else if (textContent.items.length > 0) {
textContent.items[textContent.items.length - 1].hasEOL = true;
} else { } else {
textContent.items.push({ textContent.items.push({
str: "", str: "",
@ -2658,20 +2667,24 @@ class PartialEvaluator {
textContentItem.str.length = 0; textContentItem.str.length = 0;
} }
function enqueueChunk() { function enqueueChunk(batch = false) {
const length = textContent.items.length; const length = textContent.items.length;
if (length > 0) { if (length === 0) {
sink.enqueue(textContent, length); return;
textContent.items = [];
textContent.styles = Object.create(null);
} }
if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
return;
}
sink.enqueue(textContent, length);
textContent.items = [];
textContent.styles = Object.create(null);
} }
const timeSlotManager = new TimeSlotManager(); const timeSlotManager = new TimeSlotManager();
return new Promise(function promiseBody(resolve, reject) { return new Promise(function promiseBody(resolve, reject) {
const next = function (promise) { const next = function (promise) {
enqueueChunk(); enqueueChunk(/* batch = */ true);
Promise.all([promise, sink.ready]).then(function () { Promise.all([promise, sink.ready]).then(function () {
try { try {
promiseBody(resolve, reject); promiseBody(resolve, reject);

View File

@ -737,8 +737,6 @@ class WorkerMessageHandler {
handler.on("GetTextContent", function wphExtractText(data, sink) { handler.on("GetTextContent", function wphExtractText(data, sink) {
const pageIndex = data.pageIndex; const pageIndex = data.pageIndex;
sink.onPull = function (desiredSize) {};
sink.onCancel = function (reason) {};
pdfManager.getPage(pageIndex).then(function (page) { pdfManager.getPage(pageIndex).then(function (page) {
const task = new WorkerTask("GetTextContent: page " + pageIndex); const task = new WorkerTask("GetTextContent: page " + pageIndex);

View File

@ -448,7 +448,7 @@ class MessageHandler {
} }
// Reset desiredSize property of sink on every pull. // Reset desiredSize property of sink on every pull.
this.streamSinks[streamId].desiredSize = data.desiredSize; this.streamSinks[streamId].desiredSize = data.desiredSize;
const { onPull } = this.streamSinks[data.streamId]; const { onPull } = this.streamSinks[streamId];
new Promise(function (resolve) { new Promise(function (resolve) {
resolve(onPull && onPull()); resolve(onPull && onPull());
}).then( }).then(
@ -518,7 +518,7 @@ class MessageHandler {
if (!this.streamSinks[streamId]) { if (!this.streamSinks[streamId]) {
break; break;
} }
const { onCancel } = this.streamSinks[data.streamId]; const { onCancel } = this.streamSinks[streamId];
new Promise(function (resolve) { new Promise(function (resolve) {
resolve(onCancel && onCancel(wrapReason(data.reason))); resolve(onCancel && onCancel(wrapReason(data.reason)));
}).then( }).then(