From 0d591719d9482f255782dea1c1e0d741574f7b88 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Tue, 9 Feb 2016 14:55:11 -0600 Subject: [PATCH] Makes PDF data reading Streams API friendly. --- src/core/chunked_stream.js | 83 +++--- src/core/network.js | 342 +++++++++++++++++++++- src/core/pdf_manager.js | 17 +- src/core/worker.js | 567 ++++++++++++++++++++++++++++--------- src/shared/util.js | 51 ++++ test/unit/api_spec.js | 93 +++++- test/unit/network_spec.js | 169 +++++++++++ test/unit/unit_test.html | 9 +- 8 files changed, 1153 insertions(+), 178 deletions(-) create mode 100644 test/unit/network_spec.js diff --git a/src/core/chunked_stream.js b/src/core/chunked_stream.js index b76ecf405..58716e7e5 100644 --- a/src/core/chunked_stream.js +++ b/src/core/chunked_stream.js @@ -28,6 +28,8 @@ }(this, function (exports, sharedUtil) { var MissingDataException = sharedUtil.MissingDataException; +var arrayByteLength = sharedUtil.arrayByteLength; +var arraysToBytes = sharedUtil.arraysToBytes; var assert = sharedUtil.assert; var createPromiseCapability = sharedUtil.createPromiseCapability; var isInt = sharedUtil.isInt; @@ -279,37 +281,16 @@ var ChunkedStream = (function ChunkedStreamClosure() { var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { - function ChunkedStreamManager(length, chunkSize, url, args) { + function ChunkedStreamManager(pdfNetworkStream, args) { + var chunkSize = args.rangeChunkSize; + var length = args.length; this.stream = new ChunkedStream(length, chunkSize, this); this.length = length; this.chunkSize = chunkSize; - this.url = url; + this.pdfNetworkStream = pdfNetworkStream; + this.url = args.url; this.disableAutoFetch = args.disableAutoFetch; - var msgHandler = this.msgHandler = args.msgHandler; - - if (args.chunkedViewerLoading) { - msgHandler.on('OnDataRange', this.onReceiveData.bind(this)); - msgHandler.on('OnDataProgress', this.onProgress.bind(this)); - this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) { - msgHandler.send('RequestDataRange', { begin: begin, end: end }); - }; - } else { - - var getXhr = function getXhr() { - return new XMLHttpRequest(); - }; - this.networkManager = new NetworkManager(this.url, { - getXhr: getXhr, - httpHeaders: args.httpHeaders, - withCredentials: args.withCredentials - }); - this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) { - this.networkManager.requestRange(begin, end, { - onDone: this.onReceiveData.bind(this), - onProgress: this.onProgress.bind(this) - }); - }; - } + this.msgHandler = args.msgHandler; this.currRequestId = 0; @@ -317,12 +298,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { this.requestsByChunk = Object.create(null); this.promisesByRequest = Object.create(null); this.progressiveDataLength = 0; + this.aborted = false; this._loadedStreamCapability = createPromiseCapability(); - - if (args.initialData) { - this.onReceiveData({chunk: args.initialData}); - } } ChunkedStreamManager.prototype = { @@ -330,6 +308,44 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { return this._loadedStreamCapability.promise; }, + sendRequest: function ChunkedStreamManager_sendRequest(begin, end) { + var rangeReader = this.pdfNetworkStream.getRangeReader(begin, end); + if (!rangeReader.isStreamingSupported) { + rangeReader.onProgress = this.onProgress.bind(this); + } + var chunks = [], loaded = 0; + var manager = this; + var promise = new Promise(function (resolve, reject) { + var readChunk = function (chunk) { + try { + if (!chunk.done) { + var data = chunk.value; + chunks.push(data); + loaded += arrayByteLength(data); + if (rangeReader.isStreamingSupported) { + manager.onProgress({loaded: loaded}); + } + rangeReader.read().then(readChunk, reject); + return; + } + var chunkData = arraysToBytes(chunks); + chunks = null; + resolve(chunkData); + } catch (e) { + reject(e); + } + }; + rangeReader.read().then(readChunk, reject); + }); + promise.then(function (data) { + if (this.aborted) { + return; // ignoring any data after abort + } + this.onReceiveData({chunk: data, begin: begin}); + }.bind(this)); + // TODO check errors + }, + // Get all the chunks that are not yet loaded and groups them into // contiguous ranges to load in as few requests as possible requestAllChunks: function ChunkedStreamManager_requestAllChunks() { @@ -549,8 +565,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { }, abort: function ChunkedStreamManager_abort() { - if (this.networkManager) { - this.networkManager.abortAllRequests(); + this.aborted = true; + if (this.pdfNetworkStream) { + this.pdfNetworkStream.cancelAllRequests('abort'); } for(var requestId in this.promisesByRequest) { var capability = this.promisesByRequest[requestId]; diff --git a/src/core/network.js b/src/core/network.js index 221ebd722..f84026d41 100644 --- a/src/core/network.js +++ b/src/core/network.js @@ -295,13 +295,347 @@ var NetworkManager = (function NetworkManagerClosure() { //#if !(FIREFOX || MOZCENTRAL) (function (root, factory) { if (typeof define === 'function' && define.amd) { - define('pdfjs/core/network', ['exports'], factory); + define('pdfjs/core/network', ['exports', 'pdfjs/shared/util', + 'pdfjs/core/worker'], factory); } else if (typeof exports !== 'undefined') { - factory(exports); + factory(exports, require('../shared/util.js'), require('./worker.js')); } else { - factory((root.pdfjsCoreNetwork = {})); + factory((root.pdfjsCoreNetwork = {}), root.pdfjsSharedUtil, + root.pdfjsCoreWorker); } -}(this, function (exports) { +}(this, function (exports, sharedUtil, coreWorker) { + + var assert = sharedUtil.assert; + var createPromiseCapability = sharedUtil.createPromiseCapability; + var isInt = sharedUtil.isInt; + var MissingPDFException = sharedUtil.MissingPDFException; + var UnexpectedResponseException = sharedUtil.UnexpectedResponseException; + + /** @implements {IPDFStream} */ + function PDFNetworkStream(options) { + this._options = options; + var source = options.source; + this._manager = new NetworkManager(source.url, { + httpHeaders: source.httpHeaders, + withCredentials: source.withCredentials + }); + this._rangeChunkSize = source.rangeChunkSize; + this._fullRequestReader = null; + this._rangeRequestReaders = []; + } + + PDFNetworkStream.prototype = { + _onRangeRequestReaderClosed: + function PDFNetworkStream_onRangeRequestReaderClosed(reader) { + var i = this._rangeRequestReaders.indexOf(reader); + if (i >= 0) { + this._rangeRequestReaders.splice(i, 1); + } + }, + + getFullReader: function PDFNetworkStream_getFullReader() { + assert(!this._fullRequestReader); + this._fullRequestReader = + new PDFNetworkStreamFullRequestReader(this._manager, this._options); + return this._fullRequestReader; + }, + + getRangeReader: function PDFNetworkStream_getRangeReader(begin, end) { + var reader = new PDFNetworkStreamRangeRequestReader(this._manager, + begin, end); + reader.onClosed = this._onRangeRequestReaderClosed.bind(this); + this._rangeRequestReaders.push(reader); + return reader; + }, + + cancelAllRequests: function PDFNetworkStream_cancelAllRequests(reason) { + if (this._fullRequestReader) { + this._fullRequestReader.cancel(reason); + } + var readers = this._rangeRequestReaders.slice(0); + readers.forEach(function (reader) { + reader.cancel(reason); + }); + } + }; + + /** @implements {IPDFStreamReader} */ + function PDFNetworkStreamFullRequestReader(manager, options) { + this._manager = manager; + + var source = options.source; + var args = { + onHeadersReceived: this._onHeadersReceived.bind(this), + onProgressiveData: source.disableStream ? null : + this._onProgressiveData.bind(this), + onDone: this._onDone.bind(this), + onError: this._onError.bind(this), + onProgress: this._onProgress.bind(this) + }; + this._url = source.url; + this._fullRequestId = manager.requestFull(args); + this._headersReceivedCapability = createPromiseCapability(); + this._disableRange = options.disableRange || false; + this._contentLength = source.length; // optional + this._rangeChunkSize = source.rangeChunkSize; + if (!this._rangeChunkSize && !this._disableRange) { + this._disableRange = true; + } + + this._isStreamingSupported = false; + this._isRangeSupported = false; + + this._cachedChunks = []; + this._requests = []; + this._done = false; + this._storedError = undefined; + + this.onProgress = null; + } + + PDFNetworkStreamFullRequestReader.prototype = { + _validateRangeRequestCapabilities: function + PDFNetworkStreamFullRequestReader_validateRangeRequestCapabilities() { + + if (this._disableRange) { + return false; + } + + var networkManager = this._manager; + var fullRequestXhrId = this._fullRequestId; + var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId); + if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') { + return false; + } + + var contentEncoding = + fullRequestXhr.getResponseHeader('Content-Encoding') || 'identity'; + if (contentEncoding !== 'identity') { + return false; + } + + var length = fullRequestXhr.getResponseHeader('Content-Length'); + length = parseInt(length, 10); + if (!isInt(length)) { + return false; + } + + this._contentLength = length; // setting right content length + + if (length <= 2 * this._rangeChunkSize) { + // The file size is smaller than the size of two chunks, so it does + // not make any sense to abort the request and retry with a range + // request. + return false; + } + + return true; + }, + + _onHeadersReceived: + function PDFNetworkStreamFullRequestReader_onHeadersReceived() { + + if (this._validateRangeRequestCapabilities()) { + this._isRangeSupported = true; + } + + var networkManager = this._manager; + var fullRequestXhrId = this._fullRequestId; + if (networkManager.isStreamingRequest(fullRequestXhrId)) { + // We can continue fetching when progressive loading is enabled, + // and we don't need the autoFetch feature. + this._isStreamingSupported = true; + } else if (this._isRangeSupported) { + // NOTE: by cancelling the full request, and then issuing range + // requests, there will be an issue for sites where you can only + // request the pdf once. However, if this is the case, then the + // server should not be returning that it can support range + // requests. + networkManager.abortRequest(fullRequestXhrId); + } + + this._headersReceivedCapability.resolve(); + }, + + _onProgressiveData: + function PDFNetworkStreamFullRequestReader_onProgressiveData(chunk) { + if (this._requests.length > 0) { + var requestCapability = this._requests.shift(); + requestCapability.resolve({value: chunk, done: false}); + } else { + this._cachedChunks.push(chunk); + } + }, + + _onDone: function PDFNetworkStreamFullRequestReader_onDone(args) { + if (args) { + this._onProgressiveData(args.chunk); + } + this._done = true; + if (this._cachedChunks.length > 0) { + return; + } + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + }, + + _onError: function PDFNetworkStreamFullRequestReader_onError(status) { + var url = this._url; + var exception; + if (status === 404 || status === 0 && /^file:/.test(url)) { + exception = new MissingPDFException('Missing PDF "' + url + '".'); + } else { + exception = new UnexpectedResponseException( + 'Unexpected server response (' + status + + ') while retrieving PDF "' + url + '".', status); + } + this._storedError = exception; + this._headersReceivedCapability.reject(exception); + this._requests.forEach(function (requestCapability) { + requestCapability.reject(exception); + }); + this._requests = []; + this._cachedChunks = []; + }, + + _onProgress: function PDFNetworkStreamFullRequestReader_onProgress(data) { + if (this.onProgress) { + this.onProgress({ + loaded: data.loaded, + total: data.lengthComputable ? data.total : this._contentLength + }); + } + }, + + get isRangeSupported() { + return this._isRangeSupported; + }, + + get isStreamingSupported() { + return this._isStreamingSupported; + }, + + get contentLength() { + return this._contentLength; + }, + + get headersReady() { + return this._headersReceivedCapability.promise; + }, + + read: function PDFNetworkStreamFullRequestReader_read() { + if (this._storedError) { + return Promise.reject(this._storedError); + } + if (this._cachedChunks.length > 0) { + var chunk = this._cachedChunks.shift(); + return Promise.resolve(chunk); + } + if (this._done) { + return Promise.resolve({value: undefined, done: true}); + } + var requestCapability = createPromiseCapability(); + this._requests.push(requestCapability); + return requestCapability.promise; + }, + + cancel: function PDFNetworkStreamFullRequestReader_cancel(reason) { + this._done = true; + this._headersReceivedCapability.reject(reason); + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + if (this._manager.isPendingRequest(this._fullRequestId)) { + this._manager.abortRequest(this._fullRequestId); + } + this._fullRequestReader = null; + } + }; + + /** @implements {IPDFStreamRangeReader} */ + function PDFNetworkStreamRangeRequestReader(manager, begin, end) { + this._manager = manager; + var args = { + onDone: this._onDone.bind(this), + onProgress: this._onProgress.bind(this) + }; + this._requestId = manager.requestRange(begin, end, args); + this._requests = []; + this._queuedChunk = null; + this._done = false; + + this.onProgress = null; + this.onClosed = null; + } + + PDFNetworkStreamRangeRequestReader.prototype = { + _close: function PDFNetworkStreamRangeRequestReader_close() { + if (this.onClosed) { + this.onClosed(this); + } + }, + + _onDone: function PDFNetworkStreamRangeRequestReader_onDone(data) { + var chunk = data.chunk; + if (this._requests.length > 0) { + var requestCapability = this._requests.shift(); + requestCapability.resolve({value: chunk, done: false}); + } else { + this._queuedChunk = chunk; + } + this._done = true; + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + this._close(); + }, + + _onProgress: function PDFNetworkStreamRangeRequestReader_onProgress(evt) { + if (!this.isStreamingSupported && this.onProgress) { + this.onProgress({ + loaded: evt.loaded + }); + } + }, + + get isStreamingSupported() { + return false; // TODO allow progressive range bytes loading + }, + + read: function PDFNetworkStreamRangeRequestReader_read() { + if (this._queuedChunk !== null) { + var chunk = this._queuedChunk; + this._queuedChunk = null; + return Promise.resolve({value: chunk, done: false}); + } + if (this._done) { + return Promise.resolve({value: undefined, done: true}); + } + var requestCapability = createPromiseCapability(); + this._requests.push(requestCapability); + return requestCapability.promise; + }, + + cancel: function PDFNetworkStreamRangeRequestReader_cancel(reason) { + this._done = true; + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + if (this._manager.isPendingRequest(this._requestId)) { + this._manager.abortRequest(this._requestId); + } + this._close(); + } + }; + + coreWorker.setPDFNetworkStreamClass(PDFNetworkStream); + + exports.PDFNetworkStream = PDFNetworkStream; exports.NetworkManager = NetworkManager; })); //#endif diff --git a/src/core/pdf_manager.js b/src/core/pdf_manager.js index 564e7d2cd..8c6fd9b12 100644 --- a/src/core/pdf_manager.js +++ b/src/core/pdf_manager.js @@ -157,21 +157,18 @@ var LocalPdfManager = (function LocalPdfManagerClosure() { })(); var NetworkPdfManager = (function NetworkPdfManagerClosure() { - function NetworkPdfManager(docId, args, msgHandler) { + function NetworkPdfManager(docId, pdfNetworkStream, args) { this._docId = docId; - this.msgHandler = msgHandler; + this.msgHandler = args.msgHandler; var params = { - msgHandler: msgHandler, - httpHeaders: args.httpHeaders, - withCredentials: args.withCredentials, - chunkedViewerLoading: args.chunkedViewerLoading, + msgHandler: args.msgHandler, + url: args.url, + length: args.length, disableAutoFetch: args.disableAutoFetch, - initialData: args.initialData + rangeChunkSize: args.rangeChunkSize }; - this.streamManager = new ChunkedStreamManager(args.length, - args.rangeChunkSize, - args.url, params); + this.streamManager = new ChunkedStreamManager(pdfNetworkStream, params); this.pdfDocument = new PDFDocument(this, this.streamManager.getStream(), args.password); } diff --git a/src/core/worker.js b/src/core/worker.js index f71e42b93..c78b52eda 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -41,10 +41,12 @@ var PasswordException = sharedUtil.PasswordException; var PasswordResponses = sharedUtil.PasswordResponses; var UnknownErrorException = sharedUtil.UnknownErrorException; var XRefParseException = sharedUtil.XRefParseException; +var arrayByteLength = sharedUtil.arrayByteLength; +var arraysToBytes = sharedUtil.arraysToBytes; +var assert = sharedUtil.assert; var createPromiseCapability = sharedUtil.createPromiseCapability; var error = sharedUtil.error; var info = sharedUtil.info; -var isInt = sharedUtil.isInt; var warn = sharedUtil.warn; var Ref = corePrimitives.Ref; var LocalPdfManager = corePdfManager.LocalPdfManager; @@ -82,6 +84,350 @@ var WorkerTask = (function WorkerTaskClosure() { return WorkerTask; })(); +//#if !PRODUCTION +/** + * Interface that represents PDF data transport. If possible, it allows + * progressively load entire or fragment of the PDF binary data. + * + * @interface + * */ +function IPDFStream() {} +IPDFStream.prototype = { + /** + * Gets a reader for the entire PDF data. + * @returns {IPDFStreamReader} + */ + getFullReader: function () { return null; }, + + /** + * Gets a reader for the range of the PDF data. + * @param {number} begin - the start offset of the data. + * @param {number} end - the end offset of the data. + * @returns {IPDFStreamRangeReader} + */ + getRangeReader: function (begin, end) { return null; }, + + /** + * Cancels all opened reader and closes all their opened requests. + * @param {Object} reason - the reason for cancelling + */ + cancelAllRequests: function (reason) {}, +}; + +/** + * Interface for a PDF binary data reader. + * + * @interface + */ +function IPDFStreamReader() {} +IPDFStreamReader.prototype = { + /** + * Gets a promise that is resolved when the headers and other metadata of + * the PDF data stream are available. + * @returns {Promise} + */ + get headersReady() { return null; }, + + /** + * Gets PDF binary data length. It is defined after the headersReady promise + * is resolved. + * @returns {number} The data length (or 0 if unknown). + */ + get contentLength() { return 0; }, + + /** + * Gets ability of the stream to handle range requests. It is defined after + * the headersReady promise is resolved. Rejected when the reader is cancelled + * or an error occurs. + * @returns {boolean} + */ + get isRangeSupported() { return false; }, + + /** + * Gets ability of the stream to progressively load binary data. It is defined + * after the headersReady promise is resolved. + * @returns {boolean} + */ + get isStreamingSupported() { return false; }, + + /** + * Requests a chunk of the binary data. The method returns the promise, which + * is resolved into object with properties "value" and "done". If the done + * is set to true, then the stream has reached its end, otherwise the value + * contains binary data. Cancelled requests will be resolved with the done is + * set to true. + * @returns {Promise} + */ + read: function () {}, + + /** + * Cancels all pending read requests and closes the stream. + * @param {Object} reason + */ + cancel: function (reason) {}, + + /** + * Sets or gets the progress callback. The callback can be useful when the + * isStreamingSupported property of the object is defined as false. + * The callback is called with one parameter: an object with the loaded and + * total properties. + */ + onProgress: null, +}; + +/** + * Interface for a PDF binary data fragment reader. + * + * @interface + */ +function IPDFStreamRangeReader() {} +IPDFStreamRangeReader.prototype = { + /** + * Gets ability of the stream to progressively load binary data. + * @returns {boolean} + */ + get isStreamingSupported() { return false; }, + + /** + * Requests a chunk of the binary data. The method returns the promise, which + * is resolved into object with properties "value" and "done". If the done + * is set to true, then the stream has reached its end, otherwise the value + * contains binary data. Cancelled requests will be resolved with the done is + * set to true. + * @returns {Promise} + */ + read: function () {}, + + /** + * Cancels all pending read requests and closes the stream. + * @param {Object} reason + */ + cancel: function (reason) {}, + + /** + * Sets or gets the progress callback. The callback can be useful when the + * isStreamingSupported property of the object is defined as false. + * The callback is called with one parameter: an object with the loaded + * property. + */ + onProgress: null, +}; +//#endif + +/** @implements {IPDFStream} */ +var PDFWorkerStream = (function PDFWorkerStreamClosure() { + function PDFWorkerStream(params, msgHandler) { + this._queuedChunks = []; + var initialData = params.initialData; + if (initialData && initialData.length > 0) { + this._queuedChunks.push(initialData); + } + this._msgHandler = msgHandler; + + this._isRangeSupported = !(params.disableRange); + this._isStreamingSupported = !(params.disableStream); + this._contentLength = params.length; + + this._fullRequestReader = null; + this._rangeReaders = []; + + msgHandler.on('OnDataRange', this._onReceiveData.bind(this)); + msgHandler.on('OnDataProgress', this._onProgress.bind(this)); + } + PDFWorkerStream.prototype = { + _onReceiveData: function PDFWorkerStream_onReceiveData(args) { + if (args.begin === undefined) { + if (this._fullRequestReader) { + this._fullRequestReader._enqueue(args.chunk); + } else { + this._queuedChunks.push(args.chunk); + } + } else { + var found = this._rangeReaders.some(function (rangeReader) { + if (rangeReader._begin !== args.begin) { + return false; + } + rangeReader._enqueue(args.chunk); + return true; + }); + assert(found); + } + }, + + _onProgress: function PDFWorkerStream_onProgress(evt) { + if (this._rangeReaders.length > 0) { + // Reporting to first range reader. + var firstReader = this._rangeReaders[0]; + if (firstReader.onProgress) { + firstReader.onProgress({loaded: evt.loaded}); + } + } + }, + + _removeRangeReader: function PDFWorkerStream_removeRangeReader(reader) { + var i = this._rangeReaders.indexOf(reader); + if (i >= 0) { + this._rangeReaders.splice(i, 1); + } + }, + + getFullReader: function PDFWorkerStream_getFullReader() { + assert(!this._fullRequestReader); + var queuedChunks = this._queuedChunks; + this._queuedChunks = null; + return new PDFWorkerStreamReader(this, queuedChunks); + }, + + getRangeReader: function PDFWorkerStream_getRangeReader(begin, end) { + var reader = new PDFWorkerStreamRangeReader(this, begin, end); + this._msgHandler.send('RequestDataRange', { begin: begin, end: end }); + this._rangeReaders.push(reader); + return reader; + }, + + cancelAllRequests: function PDFWorkerStream_cancelAllRequests(reason) { + if (this._fullRequestReader) { + this._fullRequestReader.cancel(reason); + } + var readers = this._rangeReaders.slice(0); + readers.forEach(function (rangeReader) { + rangeReader.cancel(reason); + }); + } + }; + + /** @implements {IPDFStreamReader} */ + function PDFWorkerStreamReader(stream, queuedChunks) { + this._stream = stream; + this._done = false; + this._queuedChunks = queuedChunks || []; + this._requests = []; + this._headersReady = Promise.resolve(); + stream._fullRequestReader = this; + + this.onProgress = null; // not used + } + PDFWorkerStreamReader.prototype = { + _enqueue: function PDFWorkerStreamReader_enqueue(chunk) { + if (this._done) { + return; // ignore new data + } + if (this._requests.length > 0) { + var requestCapability = this._requests.shift(); + requestCapability.resolve({value: chunk, done: false}); + return; + } + this._queuedChunks.push(chunk); + }, + + get headersReady() { + return this._headersReady; + }, + + get isRangeSupported() { + return this._stream._isRangeSupported; + }, + + get isStreamingSupported() { + return this._stream._isStreamingSupported; + }, + + get contentLength() { + return this._stream._contentLength; + }, + + read: function PDFWorkerStreamReader_read() { + if (this._queuedChunks.length > 0) { + var chunk = this._queuedChunks.shift(); + return Promise.resolve({value: chunk, done: false}); + } + if (this._done) { + return Promise.resolve({value: undefined, done: true}); + } + var requestCapability = createPromiseCapability(); + this._requests.push(requestCapability); + return requestCapability.promise; + }, + + cancel: function PDFWorkerStreamReader_cancel(reason) { + this._done = true; + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + } + }; + + /** @implements {IPDFStreamRangeReader} */ + function PDFWorkerStreamRangeReader(stream, begin, end) { + this._stream = stream; + this._begin = begin; + this._end = end; + this._queuedChunk = null; + this._requests = []; + this._done = false; + + this.onProgress = null; + } + PDFWorkerStreamRangeReader.prototype = { + _enqueue: function PDFWorkerStreamRangeReader_enqueue(chunk) { + if (this._done) { + return; // ignore new data + } + if (this._requests.length === 0) { + this._queuedChunk = chunk; + } else { + var requestsCapability = this._requests.shift(); + requestsCapability.resolve({value: chunk, done: false}); + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + } + this._done = true; + this._stream._removeRangeReader(this); + }, + + get isStreamingSupported() { + return false; + }, + + read: function PDFWorkerStreamRangeReader_read() { + if (this._queuedChunk) { + return Promise.resolve({value: this._queuedChunk, done: false}); + } + if (this._done) { + return Promise.resolve({value: undefined, done: true}); + } + var requestCapability = createPromiseCapability(); + this._requests.push(requestCapability); + return requestCapability.promise; + }, + + cancel: function PDFWorkerStreamRangeReader_cancel(reason) { + this._done = true; + this._requests.forEach(function (requestCapability) { + requestCapability.resolve({value: undefined, done: true}); + }); + this._requests = []; + this._stream._removeRangeReader(this); + } + }; + + return PDFWorkerStream; +})(); + +/** @type IPDFStream */ +var PDFNetworkStream; + +/** + * Sets PDFNetworkStream class to be used as alternative PDF data transport. + * @param {IPDFStream} cls - the PDF data transport. + */ +function setPDFNetworkStreamClass(cls) { + PDFNetworkStream = cls; +} + var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { setup: function wphSetup(handler, port) { var testMessageProcessed = false; @@ -188,7 +534,6 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { var pdfManager; var source = data.source; - var disableRange = data.disableRange; if (source.data) { try { pdfManager = new LocalPdfManager(docId, source.data, source.password); @@ -196,144 +541,113 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { } catch (ex) { pdfManagerCapability.reject(ex); } + } + + var pdfStream; + try { + if (source.chunkedViewerLoading) { + pdfStream = new PDFWorkerStream(source, handler); + } else { + assert(PDFNetworkStream, 'pdfjs/core/network module is not loaded'); + pdfStream = new PDFNetworkStream(data); + } + } catch (ex) { + pdfManagerCapability.reject(ex); return pdfManagerCapability.promise; - } else if (source.chunkedViewerLoading) { + } + + var fullRequest = pdfStream.getFullReader(); + fullRequest.headersReady.then(function () { + if (!fullRequest.isStreamingSupported || + !fullRequest.isRangeSupported) { + // If stream or range are disabled, it's our only way to report + // loading progress. + fullRequest.onProgress = function (evt) { + handler.send('DocProgress', { + loaded: evt.loaded, + total: evt.total + }); + }; + } + + if (!fullRequest.isRangeSupported) { + return; + } + + // We don't need auto-fetch when streaming is enabled. + var disableAutoFetch = source.disableAutoFetch || + fullRequest.isStreamingSupported; + pdfManager = new NetworkPdfManager(docId, pdfStream, { + msgHandler: handler, + url: source.url, + password: source.password, + length: fullRequest.contentLength, + disableAutoFetch: disableAutoFetch, + rangeChunkSize: source.rangeChunkSize + }); + pdfManagerCapability.resolve(pdfManager); + cancelXHRs = null; + }).catch(function (reason) { + pdfManagerCapability.reject(reason); + cancelXHRs = null; + }); + + var cachedChunks = [], loaded = 0; + var flushChunks = function () { + var pdfFile = arraysToBytes(cachedChunks); + if (source.length && pdfFile.length !== source.length) { + warn('reported HTTP length is different from actual'); + } + // the data is array, instantiating directly from it try { - pdfManager = new NetworkPdfManager(docId, source, handler); + pdfManager = new LocalPdfManager(docId, pdfFile, source.password); pdfManagerCapability.resolve(pdfManager); } catch (ex) { pdfManagerCapability.reject(ex); } - return pdfManagerCapability.promise; - } - - var networkManager = new NetworkManager(source.url, { - httpHeaders: source.httpHeaders, - withCredentials: source.withCredentials - }); - var cachedChunks = []; - var fullRequestXhrId = networkManager.requestFull({ - onHeadersReceived: function onHeadersReceived() { - if (disableRange) { - return; - } - - var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId); - if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') { - return; - } - - var contentEncoding = - fullRequestXhr.getResponseHeader('Content-Encoding') || 'identity'; - if (contentEncoding !== 'identity') { - return; - } - - var length = fullRequestXhr.getResponseHeader('Content-Length'); - length = parseInt(length, 10); - if (!isInt(length)) { - return; - } - source.length = length; - if (length <= 2 * source.rangeChunkSize) { - // The file size is smaller than the size of two chunks, so it does - // not make any sense to abort the request and retry with a range - // request. - return; - } - - if (networkManager.isStreamingRequest(fullRequestXhrId)) { - // We can continue fetching when progressive loading is enabled, - // and we don't need the autoFetch feature. - source.disableAutoFetch = true; - } else { - // NOTE: by cancelling the full request, and then issuing range - // requests, there will be an issue for sites where you can only - // request the pdf once. However, if this is the case, then the - // server should not be returning that it can support range - // requests. - networkManager.abortRequest(fullRequestXhrId); - } - + cachedChunks = []; + }; + var readPromise = new Promise(function (resolve, reject) { + var readChunk = function (chunk) { try { - pdfManager = new NetworkPdfManager(docId, source, handler); - pdfManagerCapability.resolve(pdfManager); - } catch (ex) { - pdfManagerCapability.reject(ex); - } - cancelXHRs = null; - }, - - onProgressiveData: source.disableStream ? null : - function onProgressiveData(chunk) { - if (!pdfManager) { - cachedChunks.push(chunk); - return; - } - pdfManager.sendProgressiveData(chunk); - }, - - onDone: function onDone(args) { - if (pdfManager) { - return; // already processed - } - - var pdfFile; - if (args === null) { - // TODO add some streaming manager, e.g. for unknown length files. - // The data was returned in the onProgressiveData, combining... - var pdfFileLength = 0, pos = 0; - cachedChunks.forEach(function (chunk) { - pdfFileLength += chunk.byteLength; - }); - if (source.length && pdfFileLength !== source.length) { - warn('reported HTTP length is different from actual'); + ensureNotTerminated(); + if (chunk.done) { + if (!pdfManager) { + flushChunks(); + } + cancelXHRs = null; + return; } - var pdfFileArray = new Uint8Array(pdfFileLength); - cachedChunks.forEach(function (chunk) { - pdfFileArray.set(new Uint8Array(chunk), pos); - pos += chunk.byteLength; - }); - pdfFile = pdfFileArray.buffer; - } else { - pdfFile = args.chunk; - } - // the data is array, instantiating directly from it - try { - pdfManager = new LocalPdfManager(docId, pdfFile, source.password); - pdfManagerCapability.resolve(pdfManager); - } catch (ex) { - pdfManagerCapability.reject(ex); - } - cancelXHRs = null; - }, + var data = chunk.value; + loaded += arrayByteLength(data); + if (!fullRequest.isStreamingSupported) { + handler.send('DocProgress', { + loaded: loaded, + total: Math.max(loaded, fullRequest.contentLength || 0) + }); + } - onError: function onError(status) { - var exception; - if (status === 404 || status === 0 && /^file:/.test(source.url)) { - exception = new MissingPDFException('Missing PDF "' + - source.url + '".'); - handler.send('MissingPDF', exception); - } else { - exception = new UnexpectedResponseException( - 'Unexpected server response (' + status + - ') while retrieving PDF "' + source.url + '".', status); - handler.send('UnexpectedResponse', exception); - } - cancelXHRs = null; - }, + if (pdfManager) { + pdfManager.sendProgressiveData(data); + } else { + cachedChunks.push(data); + } - onProgress: function onProgress(evt) { - handler.send('DocProgress', { - loaded: evt.loaded, - total: evt.lengthComputable ? evt.total : source.length - }); - } + fullRequest.read().then(readChunk, reject); + } catch (e) { + reject(e); + } + }; + fullRequest.read().then(readChunk, reject); + }); + readPromise.catch(function (e) { + pdfManagerCapability.reject(e); + cancelXHRs = null; }); cancelXHRs = function () { - networkManager.abortRequest(fullRequestXhrId); + pdfStream.cancelAllRequests('abort'); }; return pdfManagerCapability.promise; @@ -676,6 +990,7 @@ if (typeof window === 'undefined' && initializeWorker(); } +exports.setPDFNetworkStreamClass = setPDFNetworkStreamClass; exports.WorkerTask = WorkerTask; exports.WorkerMessageHandler = WorkerMessageHandler; })); diff --git a/src/shared/util.js b/src/shared/util.js index 3a6b4f2c6..a13254ec4 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -567,6 +567,55 @@ function stringToBytes(str) { return bytes; } +/** + * Gets length of the array (Array, Uint8Array, or string) in bytes. + * @param {Array|Uint8Array|string} arr + * @returns {number} + */ +function arrayByteLength(arr) { + if (arr.length !== undefined) { + return arr.length; + } + assert(arr.byteLength !== undefined); + return arr.byteLength; +} + +/** + * Combines array items (arrays) into single Uint8Array object. + * @param {Array} arr - the array of the arrays (Array, Uint8Array, or string). + * @returns {Uint8Array} + */ +function arraysToBytes(arr) { + // Shortcut: if first and only item is Uint8Array, return it. + if (arr.length === 1 && (arr[0] instanceof Uint8Array)) { + return arr[0]; + } + var resultLength = 0; + var i, ii = arr.length; + var item, itemLength ; + for (i = 0; i < ii; i++) { + item = arr[i]; + itemLength = arrayByteLength(item); + resultLength += itemLength; + } + var pos = 0; + var data = new Uint8Array(resultLength); + for (i = 0; i < ii; i++) { + item = arr[i]; + if (!(item instanceof Uint8Array)) { + if (typeof item === 'string') { + item = stringToBytes(item); + } else { + item = new Uint8Array(item); + } + } + itemLength = item.byteLength; + data.set(item, pos); + pos += itemLength; + } + return data; +} + function string32(value) { return String.fromCharCode((value >> 24) & 0xff, (value >> 16) & 0xff, (value >> 8) & 0xff, value & 0xff); @@ -2361,6 +2410,8 @@ exports.UnexpectedResponseException = UnexpectedResponseException; exports.UnknownErrorException = UnknownErrorException; exports.Util = Util; exports.XRefParseException = XRefParseException; +exports.arrayByteLength = arrayByteLength; +exports.arraysToBytes = arraysToBytes; exports.assert = assert; exports.bytesToString = bytesToString; exports.combineUrl = combineUrl; diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 94f8e270c..a1ce75140 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -8,6 +8,7 @@ describe('api', function() { var basicApiUrl = combineUrl(window.location.href, '../pdfs/basicapi.pdf'); var basicApiFileLength = 105779; // bytes + var TEST_TIMEOUT = 20000; function waitsForPromiseResolved(promise, successCallback) { var resolved = false; promise.then(function(val) { @@ -20,7 +21,7 @@ describe('api', function() { }); waitsFor(function() { return resolved; - }, 20000); + }, TEST_TIMEOUT); } function waitsForPromiseRejected(promise, failureCallback) { var rejected = false; @@ -34,7 +35,13 @@ describe('api', function() { }); waitsFor(function() { return rejected; - }, 20000); + }, TEST_TIMEOUT); + } + function waitSome(callback) { + var WAIT_TIMEOUT = 10; + setTimeout(function () { + callback(); + }, WAIT_TIMEOUT); } describe('PDFJS', function() { @@ -710,4 +717,86 @@ describe('api', function() { waitsForPromiseResolved(promiseDone, function() {}); }); }); + describe('PDFDataRangeTransport', function () { + var pdfPath = combineUrl(window.location.href, '../pdfs/tracemonkey.pdf'); + var loadPromise; + function getDocumentData() { + if (loadPromise) { + return loadPromise; + } + loadPromise = new Promise(function (resolve, reject) { + var xhr = new XMLHttpRequest(pdfPath); + xhr.open('GET', pdfPath); + xhr.responseType = 'arraybuffer'; + xhr.onload = function () { resolve(new Uint8Array(xhr.response)); }; + xhr.onerror = function () { reject(new Error('PDF is not loaded')); }; + xhr.send(); + }); + return loadPromise; + } + it('should fetch document info and page using ranges', function () { + var transport; + var initialDataLength = 4000; + var fetches = 0; + var getDocumentPromise = getDocumentData().then(function (data) { + var initialData = data.subarray(0, initialDataLength); + transport = new PDFJS.PDFDataRangeTransport(data.length, initialData); + transport.requestDataRange = function (begin, end) { + fetches++; + waitSome(function () { + transport.onDataProgress(4000); + transport.onDataRange(begin, data.subarray(begin, end)); + }); + }; + var loadingTask = PDFJS.getDocument(transport); + return loadingTask.promise; + }); + var pdfDocument; + var getPagePromise = getDocumentPromise.then(function (pdfDocument_) { + pdfDocument = pdfDocument_; + var pagePromise = pdfDocument.getPage(10); + return pagePromise; + }); + + waitsForPromiseResolved(getPagePromise, function (page) { + expect(pdfDocument.numPages).toEqual(14); + expect(page.rotate).toEqual(0); + expect(fetches).toBeGreaterThan(2); + }); + }); + it('should fetch document info and page using range and streaming', + function () { + var transport; + var initialDataLength = 4000; + var fetches = 0; + var getDocumentPromise = getDocumentData().then(function (data) { + var initialData = data.subarray(0, initialDataLength); + transport = new PDFJS.PDFDataRangeTransport(data.length, initialData); + transport.requestDataRange = function (begin, end) { + fetches++; + if (fetches === 1) { + // send rest of the data on first range request. + transport.onDataProgressiveRead(data.subarray(initialDataLength)); + } + waitSome(function () { + transport.onDataRange(begin, data.subarray(begin, end)); + }); + }; + var loadingTask = PDFJS.getDocument(transport); + return loadingTask.promise; + }); + var pdfDocument; + var getPagePromise = getDocumentPromise.then(function (pdfDocument_) { + pdfDocument = pdfDocument_; + var pagePromise = pdfDocument.getPage(10); + return pagePromise; + }); + + waitsForPromiseResolved(getPagePromise, function (page) { + expect(pdfDocument.numPages).toEqual(14); + expect(page.rotate).toEqual(0); + expect(fetches).toEqual(1); + }); + }); + }); }); diff --git a/test/unit/network_spec.js b/test/unit/network_spec.js new file mode 100644 index 000000000..16e53715e --- /dev/null +++ b/test/unit/network_spec.js @@ -0,0 +1,169 @@ +/* globals expect, it, describe, waitsFor, combineUrl, PDFNetworkStream */ + +'use strict'; + +describe('network', function() { + var pdf1 = combineUrl(window.location.href, '../pdfs/tracemonkey.pdf'); + var pdf1Length = 1016315; + var pdf2 = combineUrl(window.location.href, '../pdfs/pdf.pdf'); + var pdf2Length = 32472771; + + function waitsForPromiseResolved(promise, successCallback) { + var TEST_TIMEOUT = 20000; + var resolved = false; + promise.then(function(val) { + resolved = true; + successCallback(val); + }, + function(error) { + // Shouldn't get here. + expect(error).toEqual('the promise should not have been rejected'); + }); + waitsFor(function() { + return resolved; + }, TEST_TIMEOUT); + } + + it('read without stream and range', function() { + var stream = new PDFNetworkStream({ + source: { + url: pdf1, + rangeChunkSize: 65536, + disableStream: true, + }, + disableRange: true + }); + + var fullReader = stream.getFullReader(); + + var isStreamingSupported, isRangeSupported; + var promise = fullReader.headersReady.then(function () { + isStreamingSupported = fullReader.isStreamingSupported; + isRangeSupported = fullReader.isRangeSupported; + }); + + var len = 0, count = 0; + var read = function () { + return fullReader.read().then(function (result) { + if (result.done) { + return; + } + count++; + len += result.value.byteLength; + return read(); + }); + }; + + var readPromise = read(); + + waitsForPromiseResolved(readPromise, function (page) { + expect(len).toEqual(pdf1Length); + expect(count).toEqual(1); + expect(isStreamingSupported).toEqual(false); + expect(isRangeSupported).toEqual(false); + }); + }); + + it('read with streaming', function() { + var userAgent = window.navigator.userAgent; + // The test is valid for FF only: the XHR has support of the + // 'moz-chunked-array' response type. + // TODO enable for other browsers, e.g. when fetch/streams API is supported. + var m = /Mozilla\/5.0.*?rv:(\d+).*? Gecko/.exec(userAgent); + if (!m || m[1] < 9) { + return; + } + + var stream = new PDFNetworkStream({ + source: { + url: pdf2, + rangeChunkSize: 65536, + disableStream: false, + }, + disableRange: false + }); + + var fullReader = stream.getFullReader(); + + var isStreamingSupported, isRangeSupported; + var promise = fullReader.headersReady.then(function () { + isStreamingSupported = fullReader.isStreamingSupported; + isRangeSupported = fullReader.isRangeSupported; + }); + + var len = 0, count = 0; + var read = function () { + return fullReader.read().then(function (result) { + if (result.done) { + return; + } + count++; + len += result.value.byteLength; + return read(); + }); + }; + + var readPromise = read(); + + waitsForPromiseResolved(readPromise, function (page) { + expect(len).toEqual(pdf2Length); + expect(count).toBeGreaterThan(1); + expect(isStreamingSupported).toEqual(true); + }); + }); + + it('read custom ranges', function () { + // We don't test on browsers that don't support range request, so + // requiring this test to pass. + var rangeSize = 32768; + var stream = new PDFNetworkStream({ + source: { + url: pdf1, + length: pdf1Length, + rangeChunkSize: rangeSize, + disableStream: true, + }, + disableRange: false + }); + + var fullReader = stream.getFullReader(); + + var isStreamingSupported, isRangeSupported, fullReaderCancelled; + var promise = fullReader.headersReady.then(function () { + isStreamingSupported = fullReader.isStreamingSupported; + isRangeSupported = fullReader.isRangeSupported; + // we shall be able to close the full reader without issues + fullReader.cancel('Don\'t need full reader'); + fullReaderCancelled = true; + }); + + // Skipping fullReader results, requesting something from the PDF end. + var tailSize = (pdf1Length % rangeSize) || rangeSize; + + var range1Reader = stream.getRangeReader(pdf1Length - tailSize - rangeSize, + pdf1Length - tailSize); + var range2Reader = stream.getRangeReader(pdf1Length - tailSize, pdf1Length); + + var result1 = {value: 0}, result2 = {value: 0}; + var read = function (reader, lenResult) { + return reader.read().then(function (result) { + if (result.done) { + return; + } + lenResult.value += result.value.byteLength; + return read(reader, lenResult); + }); + }; + + var readPromises = Promise.all([read(range1Reader, result1), + read(range2Reader, result2), + promise]); + + waitsForPromiseResolved(readPromises, function (page) { + expect(result1.value).toEqual(rangeSize); + expect(result2.value).toEqual(tailSize); + expect(isRangeSupported).toEqual(true); + expect(fullReaderCancelled).toEqual(true); + }); + }); +}); diff --git a/test/unit/unit_test.html b/test/unit/unit_test.html index 6ff77d3e6..3927ab7a4 100644 --- a/test/unit/unit_test.html +++ b/test/unit/unit_test.html @@ -36,6 +36,7 @@ +