Make the ObjectLoader use more efficient methods when determining if data needs to be loaded

Currently, for data in `ChunkedStream` instances, the `getMissingChunks` method is used in a couple of places to determine if data is already available or if it needs to be loaded.

When looking at how `ChunkedStream.getMissingChunks` is being used in the `ObjectLoader` you'll notice that we don't actually care about which *specific* chunks are missing, but rather only want essentially a yes/no answer to the "Is the data available?" question.
Furthermore, when looking at how `ChunkedStream.getMissingChunks` itself is implemented you'll notice that it (somewhat expectedly) always iterates over *all* chunks.

All in all, using `ChunkedStream.getMissingChunks` in the `ObjectLoader` seems like an unnecessary "heavy" and roundabout way to obtain a boolean value. However, it turns out there already exists a `ChunkedStream.allChunksLoaded` method, consisting of a *single* simple check, which seems like a perfect fit for the `ObjectLoader` use cases.
In particular, once the *entire* PDF document has been loaded (which is usually fairly quick with streaming enabled), you'd really want the `ObjectLoader` to be as simple/quick as possible (similar to e.g. loading a local files) which this patch should help with.

Note that I wouldn't expect this patch to have a huge effect on performance, but it will nonetheless save some CPU/memory resources when the `ObjectLoader` is used. (As usual this should help larger PDF documents, w.r.t. both file size and number of pages, the most.)
This commit is contained in:
Jonas Jenwald 2019-10-28 21:22:59 +01:00
parent 16fb5437c7
commit 1133dbac33
2 changed files with 14 additions and 9 deletions

View File

@ -285,6 +285,12 @@ class ChunkedStream {
} }
return missingChunks; return missingChunks;
}; };
ChunkedStreamSubstream.prototype.allChunksLoaded = function() {
if (this.numChunksLoaded === this.numChunks) {
return true;
}
return this.getMissingChunks().length === 0;
};
const subStream = new ChunkedStreamSubstream(); const subStream = new ChunkedStreamSubstream();
subStream.pos = subStream.start = start; subStream.pos = subStream.start = start;

View File

@ -27,7 +27,6 @@ import { Lexer, Parser } from './parser';
import { import {
MissingDataException, toRomanNumerals, XRefEntryException, XRefParseException MissingDataException, toRomanNumerals, XRefEntryException, XRefParseException
} from './core_utils'; } from './core_utils';
import { ChunkedStream } from './chunked_stream';
import { CipherTransformFactory } from './crypto'; import { CipherTransformFactory } from './crypto';
import { ColorSpace } from './colorspace'; import { ColorSpace } from './colorspace';
@ -2072,14 +2071,14 @@ let ObjectLoader = (function() {
} }
ObjectLoader.prototype = { ObjectLoader.prototype = {
load() { async load() {
this.capability = createPromiseCapability(); // Don't walk the graph if all the data is already loaded; note that only
// Don't walk the graph if all the data is already loaded. // `ChunkedStream` instances have a `allChunksLoaded` method.
if (!(this.xref.stream instanceof ChunkedStream) || if (!this.xref.stream.allChunksLoaded ||
this.xref.stream.getMissingChunks().length === 0) { this.xref.stream.allChunksLoaded()) {
this.capability.resolve(); return undefined;
return this.capability.promise;
} }
this.capability = createPromiseCapability();
let { keys, dict, } = this; let { keys, dict, } = this;
this.refSet = new RefSet(); this.refSet = new RefSet();
@ -2126,7 +2125,7 @@ let ObjectLoader = (function() {
let foundMissingData = false; let foundMissingData = false;
for (let i = 0, ii = baseStreams.length; i < ii; i++) { for (let i = 0, ii = baseStreams.length; i < ii; i++) {
let stream = baseStreams[i]; let stream = baseStreams[i];
if (stream.getMissingChunks && stream.getMissingChunks().length) { if (stream.allChunksLoaded && !stream.allChunksLoaded()) {
foundMissingData = true; foundMissingData = true;
pendingRequests.push({ begin: stream.start, end: stream.end, }); pendingRequests.push({ begin: stream.start, end: stream.end, });
} }