From b5254f274588839d92b28df3f9d89344351b6d8c Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 18 Jul 2019 16:24:25 +0200 Subject: [PATCH] Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision. --- src/core/chunked_stream.js | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/core/chunked_stream.js b/src/core/chunked_stream.js index 39c1cb677..a0f29ec74 100644 --- a/src/core/chunked_stream.js +++ b/src/core/chunked_stream.js @@ -159,7 +159,9 @@ class ChunkedStream { if (pos >= this.end) { return -1; } - this.ensureByte(pos); + if (pos >= this.progressiveDataLength) { + this.ensureByte(pos); + } return this.bytes[this.pos++]; } @@ -187,7 +189,9 @@ class ChunkedStream { const strEnd = this.end; if (!length) { - this.ensureRange(pos, strEnd); + if (strEnd > this.progressiveDataLength) { + this.ensureRange(pos, strEnd); + } const subarray = bytes.subarray(pos, strEnd); // `this.bytes` is always a `Uint8Array` here. return (forceClamped ? new Uint8ClampedArray(subarray) : subarray); @@ -197,7 +201,9 @@ class ChunkedStream { if (end > strEnd) { end = strEnd; } - this.ensureRange(pos, end); + if (end > this.progressiveDataLength) { + this.ensureRange(pos, end); + } this.pos = end; const subarray = bytes.subarray(pos, end); @@ -224,7 +230,9 @@ class ChunkedStream { if (end > this.end) { end = this.end; } - this.ensureRange(begin, end); + if (end > this.progressiveDataLength) { + this.ensureRange(begin, end); + } return this.bytes.subarray(begin, end); } @@ -245,7 +253,9 @@ class ChunkedStream { makeSubStream(start, length, dict) { if (length) { - this.ensureRange(start, start + length); + if (start + length > this.progressiveDataLength) { + this.ensureRange(start, start + length); + } } else { // When the `length` is undefined you do *not*, under any circumstances, // want to fallback on calling `this.ensureRange(start, this.end)` since @@ -256,7 +266,9 @@ class ChunkedStream { // time/resources during e.g. parsing, since `MissingDataException`s will // require data to be re-parsed, which we attempt to minimize by at least // checking that the *beginning* of the data is available here. - this.ensureByte(start); + if (start >= this.progressiveDataLength) { + this.ensureByte(start); + } } function ChunkedStreamSubstream() {}