From 49e8a270c443f9b9f3c2d2afc73b19f1e89c599f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 29 Mar 2019 16:03:29 +0100 Subject: [PATCH] Update `ChunkedStream.makeSubStream` to actually check if (some) data exists when the `length` parameter is undefined Note how `XRef.fetchUncompressed`, which is used *a lot* for most PDF documents, is calling the `makeSubStream` method without providing a `length` argument. In practice this results in the `makeSubStream` method, on the `ChunkedStream` instance, calling the `ensureRange` method with `NaN` as the end position, thus resulting in no data being requested despite it possibly being necessary. This may be quite bad, since in this particular case it will lead to a new `ChunkedStream` being created *and* also a new `Parser`/`Lexer` instance. Given that it's quite possible that even the very first `Parser.getObj` call could throw `MissingDataException`, this could thus lead to wasted time/resources (since re-parsing is necessary once the data finally arrives). You obviously need to be very careful to not have `ChunkedStream.makeSubStream` accidentally requesting the *entire* file, hence its `this.end` property is of no use here, but it should be possible to at least check that the `start` of the data is present before any potentially expensive parsing occurs. --- src/core/chunked_stream.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/core/chunked_stream.js b/src/core/chunked_stream.js index 920fc9af0..0be3140b0 100644 --- a/src/core/chunked_stream.js +++ b/src/core/chunked_stream.js @@ -234,7 +234,20 @@ class ChunkedStream { } makeSubStream(start, length, dict) { - this.ensureRange(start, start + length); + if (length) { + this.ensureRange(start, start + length); + } else { + // When the `length` is undefined you do *not*, under any circumstances, + // want to fallback on calling `this.ensureRange(start, this.end)` since + // that would force the *entire* PDF file to be loaded, thus completely + // breaking the whole purpose of using streaming and/or range requests. + // + // However, not doing any checking here could very easily lead to wasted + // time/resources during e.g. parsing, since `MissingDataException`s will + // require data to be re-parsed, which we attempt to minimize by at least + // checking that the *beginning* of the data is available here. + this.ensureByte(start); + } function ChunkedStreamSubstream() {} ChunkedStreamSubstream.prototype = Object.create(this);