2013-02-07 08:19:29 +09:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2018-12-30 00:18:36 +09:00
|
|
|
/* eslint no-var: error */
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2017-04-02 23:14:30 +09:00
|
|
|
import {
|
2019-02-24 00:14:31 +09:00
|
|
|
arrayByteLength, arraysToBytes, createPromiseCapability, isEmptyObj
|
2017-04-02 23:14:30 +09:00
|
|
|
} from '../shared/util';
|
2019-02-24 00:14:31 +09:00
|
|
|
import { MissingDataException } from './core_utils';
|
2015-11-22 01:32:47 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
class ChunkedStream {
|
|
|
|
constructor(length, chunkSize, manager) {
|
2013-02-07 08:19:29 +09:00
|
|
|
this.bytes = new Uint8Array(length);
|
|
|
|
this.start = 0;
|
|
|
|
this.pos = 0;
|
|
|
|
this.end = length;
|
|
|
|
this.chunkSize = chunkSize;
|
|
|
|
this.loadedChunks = [];
|
|
|
|
this.numChunksLoaded = 0;
|
|
|
|
this.numChunks = Math.ceil(length / chunkSize);
|
2013-06-05 09:57:52 +09:00
|
|
|
this.manager = manager;
|
2014-09-06 10:02:54 +09:00
|
|
|
this.progressiveDataLength = 0;
|
2018-12-24 04:02:31 +09:00
|
|
|
this.lastSuccessfulEnsureByteChunk = -1; // Single-entry cache
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
// If a particular stream does not implement one or more of these methods,
|
|
|
|
// an error should be thrown.
|
|
|
|
getMissingChunks() {
|
|
|
|
const chunks = [];
|
|
|
|
for (let chunk = 0, n = this.numChunks; chunk < n; ++chunk) {
|
|
|
|
if (!this.loadedChunks[chunk]) {
|
|
|
|
chunks.push(chunk);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
return chunks;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
getBaseStreams() {
|
|
|
|
return [this];
|
|
|
|
}
|
2013-07-04 06:29:38 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
allChunksLoaded() {
|
|
|
|
return this.numChunksLoaded === this.numChunks;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
onReceiveData(begin, chunk) {
|
|
|
|
const chunkSize = this.chunkSize;
|
|
|
|
if (begin % chunkSize !== 0) {
|
|
|
|
throw new Error(`Bad begin offset: ${begin}`);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Using `this.length` is inaccurate here since `this.start` can be moved
|
|
|
|
// (see the `moveStart` method).
|
|
|
|
const end = begin + chunk.byteLength;
|
|
|
|
if (end % chunkSize !== 0 && end !== this.bytes.length) {
|
|
|
|
throw new Error(`Bad end offset: ${end}`);
|
|
|
|
}
|
|
|
|
|
|
|
|
this.bytes.set(new Uint8Array(chunk), begin);
|
|
|
|
const beginChunk = Math.floor(begin / chunkSize);
|
|
|
|
const endChunk = Math.floor((end - 1) / chunkSize) + 1;
|
|
|
|
|
|
|
|
for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
|
|
|
|
if (!this.loadedChunks[curChunk]) {
|
|
|
|
this.loadedChunks[curChunk] = true;
|
|
|
|
++this.numChunksLoaded;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
onReceiveProgressiveData(data) {
|
|
|
|
let position = this.progressiveDataLength;
|
|
|
|
const beginChunk = Math.floor(position / this.chunkSize);
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
this.bytes.set(new Uint8Array(data), position);
|
|
|
|
position += data.byteLength;
|
|
|
|
this.progressiveDataLength = position;
|
|
|
|
const endChunk = position >= this.end ? this.numChunks :
|
|
|
|
Math.floor(position / this.chunkSize);
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
|
|
|
|
if (!this.loadedChunks[curChunk]) {
|
|
|
|
this.loadedChunks[curChunk] = true;
|
|
|
|
++this.numChunksLoaded;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
}
|
2014-09-06 10:02:54 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
ensureByte(pos) {
|
2019-03-30 03:57:52 +09:00
|
|
|
if (pos < this.progressiveDataLength) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const chunk = Math.floor(pos / this.chunkSize);
|
|
|
|
if (chunk === this.lastSuccessfulEnsureByteChunk) {
|
|
|
|
return;
|
|
|
|
}
|
2013-11-19 04:17:26 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (!this.loadedChunks[chunk]) {
|
|
|
|
throw new MissingDataException(pos, pos + 1);
|
|
|
|
}
|
|
|
|
this.lastSuccessfulEnsureByteChunk = chunk;
|
|
|
|
}
|
2014-06-18 13:20:58 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
ensureRange(begin, end) {
|
|
|
|
if (begin >= end) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (end <= this.progressiveDataLength) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const chunkSize = this.chunkSize;
|
|
|
|
const beginChunk = Math.floor(begin / chunkSize);
|
|
|
|
const endChunk = Math.floor((end - 1) / chunkSize) + 1;
|
|
|
|
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
|
2014-09-06 10:05:12 +09:00
|
|
|
if (!this.loadedChunks[chunk]) {
|
2018-12-24 04:02:31 +09:00
|
|
|
throw new MissingDataException(begin, end);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
nextEmptyChunk(beginChunk) {
|
|
|
|
const numChunks = this.numChunks;
|
|
|
|
for (let i = 0; i < numChunks; ++i) {
|
|
|
|
const chunk = (beginChunk + i) % numChunks; // Wrap around to beginning.
|
|
|
|
if (!this.loadedChunks[chunk]) {
|
|
|
|
return chunk;
|
2013-11-19 04:17:26 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
2013-11-19 04:17:26 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
hasChunk(chunk) {
|
|
|
|
return !!this.loadedChunks[chunk];
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
get length() {
|
|
|
|
return this.end - this.start;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
get isEmpty() {
|
|
|
|
return this.length === 0;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
getByte() {
|
|
|
|
const pos = this.pos;
|
|
|
|
if (pos >= this.end) {
|
|
|
|
return -1;
|
|
|
|
}
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (pos >= this.progressiveDataLength) {
|
|
|
|
this.ensureByte(pos);
|
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
return this.bytes[this.pos++];
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
getUint16() {
|
|
|
|
const b0 = this.getByte();
|
|
|
|
const b1 = this.getByte();
|
|
|
|
if (b0 === -1 || b1 === -1) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return (b0 << 8) + b1;
|
|
|
|
}
|
2014-05-18 07:31:47 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
getInt32() {
|
|
|
|
const b0 = this.getByte();
|
|
|
|
const b1 = this.getByte();
|
|
|
|
const b2 = this.getByte();
|
|
|
|
const b3 = this.getByte();
|
|
|
|
return (b0 << 24) + (b1 << 16) + (b2 << 8) + b3;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
// Returns subarray of original buffer, should only be read.
|
|
|
|
getBytes(length, forceClamped = false) {
|
|
|
|
const bytes = this.bytes;
|
|
|
|
const pos = this.pos;
|
|
|
|
const strEnd = this.end;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (!length) {
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (strEnd > this.progressiveDataLength) {
|
|
|
|
this.ensureRange(pos, strEnd);
|
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
const subarray = bytes.subarray(pos, strEnd);
|
2018-06-12 00:25:30 +09:00
|
|
|
// `this.bytes` is always a `Uint8Array` here.
|
|
|
|
return (forceClamped ? new Uint8ClampedArray(subarray) : subarray);
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
let end = pos + length;
|
|
|
|
if (end > strEnd) {
|
|
|
|
end = strEnd;
|
|
|
|
}
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (end > this.progressiveDataLength) {
|
|
|
|
this.ensureRange(pos, end);
|
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
|
|
|
|
this.pos = end;
|
|
|
|
const subarray = bytes.subarray(pos, end);
|
|
|
|
// `this.bytes` is always a `Uint8Array` here.
|
|
|
|
return (forceClamped ? new Uint8ClampedArray(subarray) : subarray);
|
|
|
|
}
|
|
|
|
|
|
|
|
peekByte() {
|
|
|
|
const peekedByte = this.getByte();
|
|
|
|
this.pos--;
|
|
|
|
return peekedByte;
|
|
|
|
}
|
|
|
|
|
|
|
|
peekBytes(length, forceClamped = false) {
|
|
|
|
const bytes = this.getBytes(length, forceClamped);
|
|
|
|
this.pos -= bytes.length;
|
|
|
|
return bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
getByteRange(begin, end) {
|
2019-07-15 18:26:07 +09:00
|
|
|
if (begin < 0) {
|
|
|
|
begin = 0;
|
|
|
|
}
|
|
|
|
if (end > this.end) {
|
|
|
|
end = this.end;
|
|
|
|
}
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (end > this.progressiveDataLength) {
|
|
|
|
this.ensureRange(begin, end);
|
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
return this.bytes.subarray(begin, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
skip(n) {
|
|
|
|
if (!n) {
|
|
|
|
n = 1;
|
|
|
|
}
|
|
|
|
this.pos += n;
|
|
|
|
}
|
|
|
|
|
|
|
|
reset() {
|
|
|
|
this.pos = this.start;
|
|
|
|
}
|
|
|
|
|
|
|
|
moveStart() {
|
|
|
|
this.start = this.pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
makeSubStream(start, length, dict) {
|
2019-03-30 00:03:29 +09:00
|
|
|
if (length) {
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (start + length > this.progressiveDataLength) {
|
|
|
|
this.ensureRange(start, start + length);
|
|
|
|
}
|
2019-03-30 00:03:29 +09:00
|
|
|
} else {
|
|
|
|
// When the `length` is undefined you do *not*, under any circumstances,
|
|
|
|
// want to fallback on calling `this.ensureRange(start, this.end)` since
|
|
|
|
// that would force the *entire* PDF file to be loaded, thus completely
|
|
|
|
// breaking the whole purpose of using streaming and/or range requests.
|
|
|
|
//
|
|
|
|
// However, not doing any checking here could very easily lead to wasted
|
|
|
|
// time/resources during e.g. parsing, since `MissingDataException`s will
|
|
|
|
// require data to be re-parsed, which we attempt to minimize by at least
|
|
|
|
// checking that the *beginning* of the data is available here.
|
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites
The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing.
The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls.
Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`).
This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea.
Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`.
This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file):
```
[
{ "id": "tracemonkey-eq",
"file": "pdfs/tracemonkey.pdf",
"md5": "9a192d8b1a7dc652a19835f6f08098bd",
"rounds": 250,
"type": "eq"
}
]
```
I get the following complete results when comparing this patch against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster
Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 |
Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster
```
Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page:
```
-- Grouped By page, stat --
page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05)
---- | ------------ | ----- | ------------ | ----------- | --- | ------ | -------------
0 | Overall | 250 | 74 | 75 | 1 | 0.69 |
0 | Page Request | 250 | 1 | 1 | 0 | 33.20 |
0 | Rendering | 250 | 73 | 74 | 0 | 0.25 |
1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster
1 | Page Request | 250 | 3 | 2 | 0 | -11.73 |
1 | Rendering | 250 | 121 | 119 | -2 | -1.67 |
2 | Overall | 250 | 64 | 63 | -1 | -1.91 |
2 | Page Request | 250 | 1 | 1 | 0 | 8.81 |
2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster
3 | Overall | 250 | 97 | 97 | 0 | -0.06 |
3 | Page Request | 250 | 1 | 1 | 0 | 25.37 |
3 | Rendering | 250 | 96 | 95 | 0 | -0.34 |
4 | Overall | 250 | 97 | 97 | 0 | -0.38 |
4 | Page Request | 250 | 1 | 1 | 0 | -5.97 |
4 | Rendering | 250 | 96 | 96 | 0 | -0.27 |
5 | Overall | 250 | 99 | 97 | -3 | -2.92 |
5 | Page Request | 250 | 2 | 1 | 0 | -17.20 |
5 | Rendering | 250 | 98 | 95 | -3 | -2.68 |
6 | Overall | 250 | 99 | 99 | 0 | -0.14 |
6 | Page Request | 250 | 2 | 2 | 0 | -16.49 |
6 | Rendering | 250 | 97 | 98 | 0 | 0.16 |
7 | Overall | 250 | 96 | 95 | -1 | -0.55 |
7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower
7 | Rendering | 250 | 95 | 94 | -1 | -1.19 |
8 | Overall | 250 | 92 | 92 | -1 | -0.69 |
8 | Page Request | 250 | 1 | 1 | 0 | -17.60 |
8 | Rendering | 250 | 91 | 91 | 0 | -0.52 |
9 | Overall | 250 | 112 | 112 | 0 | 0.29 |
9 | Page Request | 250 | 2 | 1 | 0 | -7.92 |
9 | Rendering | 250 | 110 | 111 | 0 | 0.37 |
10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster
10 | Page Request | 250 | 14 | 13 | 0 | -1.26 |
10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster
11 | Overall | 250 | 66 | 66 | -1 | -0.86 |
11 | Page Request | 250 | 1 | 1 | 0 | -16.48 |
11 | Rendering | 250 | 65 | 65 | 0 | -0.62 |
12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster
12 | Page Request | 250 | 2 | 2 | 0 | 12.93 |
12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster
13 | Overall | 250 | 48 | 47 | 0 | -0.45 |
13 | Page Request | 250 | 1 | 1 | 0 | 1.59 |
13 | Rendering | 250 | 47 | 46 | 0 | -0.52 |
```
Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial).
There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 23:24:25 +09:00
|
|
|
if (start >= this.progressiveDataLength) {
|
|
|
|
this.ensureByte(start);
|
|
|
|
}
|
2019-03-30 00:03:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
|
|
|
|
function ChunkedStreamSubstream() {}
|
|
|
|
ChunkedStreamSubstream.prototype = Object.create(this);
|
|
|
|
ChunkedStreamSubstream.prototype.getMissingChunks = function() {
|
|
|
|
const chunkSize = this.chunkSize;
|
|
|
|
const beginChunk = Math.floor(this.start / chunkSize);
|
|
|
|
const endChunk = Math.floor((this.end - 1) / chunkSize) + 1;
|
|
|
|
const missingChunks = [];
|
|
|
|
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
|
|
|
|
if (!this.loadedChunks[chunk]) {
|
|
|
|
missingChunks.push(chunk);
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
|
|
|
return missingChunks;
|
|
|
|
};
|
|
|
|
|
|
|
|
const subStream = new ChunkedStreamSubstream();
|
|
|
|
subStream.pos = subStream.start = start;
|
|
|
|
subStream.end = start + length || this.end;
|
|
|
|
subStream.dict = dict;
|
|
|
|
return subStream;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class ChunkedStreamManager {
|
|
|
|
constructor(pdfNetworkStream, args) {
|
|
|
|
this.length = args.length;
|
|
|
|
this.chunkSize = args.rangeChunkSize;
|
|
|
|
this.stream = new ChunkedStream(this.length, this.chunkSize, this);
|
2016-02-10 05:55:11 +09:00
|
|
|
this.pdfNetworkStream = pdfNetworkStream;
|
2013-04-13 03:37:49 +09:00
|
|
|
this.disableAutoFetch = args.disableAutoFetch;
|
2016-02-10 05:55:11 +09:00
|
|
|
this.msgHandler = args.msgHandler;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
this.currRequestId = 0;
|
|
|
|
|
2016-01-28 02:04:13 +09:00
|
|
|
this.chunksNeededByRequest = Object.create(null);
|
|
|
|
this.requestsByChunk = Object.create(null);
|
|
|
|
this.promisesByRequest = Object.create(null);
|
2014-09-06 10:02:54 +09:00
|
|
|
this.progressiveDataLength = 0;
|
2016-02-10 05:55:11 +09:00
|
|
|
this.aborted = false;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2014-04-23 23:33:42 +09:00
|
|
|
this._loadedStreamCapability = createPromiseCapability();
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
onLoadedStream() {
|
|
|
|
return this._loadedStreamCapability.promise;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
sendRequest(begin, end) {
|
|
|
|
const rangeReader = this.pdfNetworkStream.getRangeReader(begin, end);
|
|
|
|
if (!rangeReader.isStreamingSupported) {
|
|
|
|
rangeReader.onProgress = this.onProgress.bind(this);
|
|
|
|
}
|
|
|
|
|
|
|
|
let chunks = [], loaded = 0;
|
|
|
|
const promise = new Promise((resolve, reject) => {
|
|
|
|
const readChunk = (chunk) => {
|
|
|
|
try {
|
|
|
|
if (!chunk.done) {
|
|
|
|
const data = chunk.value;
|
|
|
|
chunks.push(data);
|
|
|
|
loaded += arrayByteLength(data);
|
|
|
|
if (rangeReader.isStreamingSupported) {
|
|
|
|
this.onProgress({ loaded, });
|
2016-02-10 05:55:11 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
rangeReader.read().then(readChunk, reject);
|
|
|
|
return;
|
2016-02-10 05:55:11 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
const chunkData = arraysToBytes(chunks);
|
|
|
|
chunks = null;
|
|
|
|
resolve(chunkData);
|
|
|
|
} catch (e) {
|
|
|
|
reject(e);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
};
|
|
|
|
rangeReader.read().then(readChunk, reject);
|
|
|
|
});
|
|
|
|
promise.then((data) => {
|
|
|
|
if (this.aborted) {
|
|
|
|
return; // Ignoring any data after abort.
|
|
|
|
}
|
|
|
|
this.onReceiveData({ chunk: data, begin, });
|
|
|
|
});
|
|
|
|
// TODO check errors
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
/**
|
|
|
|
* Get all the chunks that are not yet loaded and group them into
|
|
|
|
* contiguous ranges to load in as few requests as possible.
|
|
|
|
*/
|
|
|
|
requestAllChunks() {
|
|
|
|
const missingChunks = this.stream.getMissingChunks();
|
|
|
|
this._requestChunks(missingChunks);
|
|
|
|
return this._loadedStreamCapability.promise;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
_requestChunks(chunks) {
|
|
|
|
const requestId = this.currRequestId++;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const chunksNeeded = Object.create(null);
|
|
|
|
this.chunksNeededByRequest[requestId] = chunksNeeded;
|
|
|
|
for (const chunk of chunks) {
|
|
|
|
if (!this.stream.hasChunk(chunk)) {
|
|
|
|
chunksNeeded[chunk] = true;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (isEmptyObj(chunksNeeded)) {
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const capability = createPromiseCapability();
|
|
|
|
this.promisesByRequest[requestId] = capability;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const chunksToRequest = [];
|
|
|
|
for (let chunk in chunksNeeded) {
|
|
|
|
chunk = chunk | 0;
|
|
|
|
if (!(chunk in this.requestsByChunk)) {
|
|
|
|
this.requestsByChunk[chunk] = [];
|
|
|
|
chunksToRequest.push(chunk);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
this.requestsByChunk[chunk].push(requestId);
|
|
|
|
}
|
2015-10-21 07:45:55 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (!chunksToRequest.length) {
|
2015-10-21 07:45:55 +09:00
|
|
|
return capability.promise;
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const groupedChunksToRequest = this.groupChunks(chunksToRequest);
|
|
|
|
for (const groupedChunk of groupedChunksToRequest) {
|
|
|
|
const begin = groupedChunk.beginChunk * this.chunkSize;
|
|
|
|
const end = Math.min(groupedChunk.endChunk * this.chunkSize, this.length);
|
|
|
|
this.sendRequest(begin, end);
|
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
return capability.promise;
|
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
getStream() {
|
|
|
|
return this.stream;
|
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
/**
|
|
|
|
* Loads any chunks in the requested range that are not yet loaded.
|
|
|
|
*/
|
|
|
|
requestRange(begin, end) {
|
|
|
|
end = Math.min(end, this.length);
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const beginChunk = this.getBeginChunk(begin);
|
|
|
|
const endChunk = this.getEndChunk(end);
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const chunks = [];
|
|
|
|
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
|
|
|
|
chunks.push(chunk);
|
|
|
|
}
|
|
|
|
return this._requestChunks(chunks);
|
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
requestRanges(ranges = []) {
|
|
|
|
const chunksToRequest = [];
|
|
|
|
for (const range of ranges) {
|
|
|
|
const beginChunk = this.getBeginChunk(range.begin);
|
|
|
|
const endChunk = this.getEndChunk(range.end);
|
|
|
|
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
|
|
|
|
if (!chunksToRequest.includes(chunk)) {
|
|
|
|
chunksToRequest.push(chunk);
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
chunksToRequest.sort(function(a, b) {
|
|
|
|
return a - b;
|
|
|
|
});
|
|
|
|
return this._requestChunks(chunksToRequest);
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
/**
|
|
|
|
* Groups a sorted array of chunks into as few contiguous larger
|
|
|
|
* chunks as possible.
|
|
|
|
*/
|
|
|
|
groupChunks(chunks) {
|
|
|
|
const groupedChunks = [];
|
|
|
|
let beginChunk = -1;
|
|
|
|
let prevChunk = -1;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
for (let i = 0, ii = chunks.length; i < ii; ++i) {
|
|
|
|
const chunk = chunks[i];
|
|
|
|
if (beginChunk < 0) {
|
|
|
|
beginChunk = chunk;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2014-09-06 10:02:54 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (prevChunk >= 0 && prevChunk + 1 !== chunk) {
|
|
|
|
groupedChunks.push({ beginChunk,
|
|
|
|
endChunk: prevChunk + 1, });
|
|
|
|
beginChunk = chunk;
|
2014-09-06 10:02:54 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
if (i + 1 === chunks.length) {
|
|
|
|
groupedChunks.push({ beginChunk,
|
|
|
|
endChunk: chunk + 1, });
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
prevChunk = chunk;
|
|
|
|
}
|
|
|
|
return groupedChunks;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
onProgress(args) {
|
|
|
|
this.msgHandler.send('DocProgress', {
|
|
|
|
loaded: this.stream.numChunksLoaded * this.chunkSize + args.loaded,
|
|
|
|
total: this.length,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
onReceiveData(args) {
|
|
|
|
let chunk = args.chunk;
|
|
|
|
const isProgressive = args.begin === undefined;
|
|
|
|
const begin = isProgressive ? this.progressiveDataLength : args.begin;
|
|
|
|
const end = begin + chunk.byteLength;
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
const beginChunk = Math.floor(begin / this.chunkSize);
|
|
|
|
const endChunk = end < this.length ? Math.floor(end / this.chunkSize) :
|
|
|
|
Math.ceil(end / this.chunkSize);
|
|
|
|
|
|
|
|
if (isProgressive) {
|
|
|
|
this.stream.onReceiveProgressiveData(chunk);
|
|
|
|
this.progressiveDataLength = end;
|
|
|
|
} else {
|
|
|
|
this.stream.onReceiveData(begin, chunk);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this.stream.allChunksLoaded()) {
|
|
|
|
this._loadedStreamCapability.resolve(this.stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
const loadedRequests = [];
|
|
|
|
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
|
|
|
|
// The server might return more chunks than requested.
|
|
|
|
const requestIds = this.requestsByChunk[chunk] || [];
|
|
|
|
delete this.requestsByChunk[chunk];
|
|
|
|
|
|
|
|
for (const requestId of requestIds) {
|
|
|
|
const chunksNeeded = this.chunksNeededByRequest[requestId];
|
|
|
|
if (chunk in chunksNeeded) {
|
|
|
|
delete chunksNeeded[chunk];
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
if (!isEmptyObj(chunksNeeded)) {
|
|
|
|
continue;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
loadedRequests.push(requestId);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are no pending requests, automatically fetch the next
|
|
|
|
// unfetched chunk of the PDF file.
|
|
|
|
if (!this.disableAutoFetch && isEmptyObj(this.requestsByChunk)) {
|
|
|
|
let nextEmptyChunk;
|
|
|
|
if (this.stream.numChunksLoaded === 1) {
|
|
|
|
// This is a special optimization so that after fetching the first
|
|
|
|
// chunk, rather than fetching the second chunk, we fetch the last
|
|
|
|
// chunk.
|
|
|
|
const lastChunk = this.stream.numChunks - 1;
|
|
|
|
if (!this.stream.hasChunk(lastChunk)) {
|
|
|
|
nextEmptyChunk = lastChunk;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
} else {
|
|
|
|
nextEmptyChunk = this.stream.nextEmptyChunk(endChunk);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
if (Number.isInteger(nextEmptyChunk)) {
|
|
|
|
this._requestChunks([nextEmptyChunk]);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2018-12-24 04:02:31 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
for (const requestId of loadedRequests) {
|
|
|
|
const capability = this.promisesByRequest[requestId];
|
|
|
|
delete this.promisesByRequest[requestId];
|
|
|
|
capability.resolve();
|
|
|
|
}
|
|
|
|
|
|
|
|
this.msgHandler.send('DocProgress', {
|
|
|
|
loaded: this.stream.numChunksLoaded * this.chunkSize,
|
|
|
|
total: this.length,
|
|
|
|
});
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2018-12-24 04:02:31 +09:00
|
|
|
onError(err) {
|
|
|
|
this._loadedStreamCapability.reject(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
getBeginChunk(begin) {
|
|
|
|
return Math.floor(begin / this.chunkSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
getEndChunk(end) {
|
|
|
|
return Math.floor((end - 1) / this.chunkSize) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
abort() {
|
|
|
|
this.aborted = true;
|
|
|
|
if (this.pdfNetworkStream) {
|
|
|
|
this.pdfNetworkStream.cancelAllRequests('abort');
|
|
|
|
}
|
|
|
|
for (const requestId in this.promisesByRequest) {
|
|
|
|
this.promisesByRequest[requestId].reject(
|
|
|
|
new Error('Request was aborted'));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-11-22 01:32:47 +09:00
|
|
|
|
2017-04-02 23:14:30 +09:00
|
|
|
export {
|
|
|
|
ChunkedStream,
|
|
|
|
ChunkedStreamManager,
|
|
|
|
};
|