Makes PDF data reading Streams API friendly.

This commit is contained in:
Yury Delendik 2016-02-09 14:55:11 -06:00
parent 8cdb69634f
commit 0d591719d9
8 changed files with 1153 additions and 178 deletions

View File

@ -28,6 +28,8 @@
}(this, function (exports, sharedUtil) {
var MissingDataException = sharedUtil.MissingDataException;
var arrayByteLength = sharedUtil.arrayByteLength;
var arraysToBytes = sharedUtil.arraysToBytes;
var assert = sharedUtil.assert;
var createPromiseCapability = sharedUtil.createPromiseCapability;
var isInt = sharedUtil.isInt;
@ -279,37 +281,16 @@ var ChunkedStream = (function ChunkedStreamClosure() {
var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
function ChunkedStreamManager(length, chunkSize, url, args) {
function ChunkedStreamManager(pdfNetworkStream, args) {
var chunkSize = args.rangeChunkSize;
var length = args.length;
this.stream = new ChunkedStream(length, chunkSize, this);
this.length = length;
this.chunkSize = chunkSize;
this.url = url;
this.pdfNetworkStream = pdfNetworkStream;
this.url = args.url;
this.disableAutoFetch = args.disableAutoFetch;
var msgHandler = this.msgHandler = args.msgHandler;
if (args.chunkedViewerLoading) {
msgHandler.on('OnDataRange', this.onReceiveData.bind(this));
msgHandler.on('OnDataProgress', this.onProgress.bind(this));
this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) {
msgHandler.send('RequestDataRange', { begin: begin, end: end });
};
} else {
var getXhr = function getXhr() {
return new XMLHttpRequest();
};
this.networkManager = new NetworkManager(this.url, {
getXhr: getXhr,
httpHeaders: args.httpHeaders,
withCredentials: args.withCredentials
});
this.sendRequest = function ChunkedStreamManager_sendRequest(begin, end) {
this.networkManager.requestRange(begin, end, {
onDone: this.onReceiveData.bind(this),
onProgress: this.onProgress.bind(this)
});
};
}
this.msgHandler = args.msgHandler;
this.currRequestId = 0;
@ -317,12 +298,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
this.requestsByChunk = Object.create(null);
this.promisesByRequest = Object.create(null);
this.progressiveDataLength = 0;
this.aborted = false;
this._loadedStreamCapability = createPromiseCapability();
if (args.initialData) {
this.onReceiveData({chunk: args.initialData});
}
}
ChunkedStreamManager.prototype = {
@ -330,6 +308,44 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
return this._loadedStreamCapability.promise;
},
sendRequest: function ChunkedStreamManager_sendRequest(begin, end) {
var rangeReader = this.pdfNetworkStream.getRangeReader(begin, end);
if (!rangeReader.isStreamingSupported) {
rangeReader.onProgress = this.onProgress.bind(this);
}
var chunks = [], loaded = 0;
var manager = this;
var promise = new Promise(function (resolve, reject) {
var readChunk = function (chunk) {
try {
if (!chunk.done) {
var data = chunk.value;
chunks.push(data);
loaded += arrayByteLength(data);
if (rangeReader.isStreamingSupported) {
manager.onProgress({loaded: loaded});
}
rangeReader.read().then(readChunk, reject);
return;
}
var chunkData = arraysToBytes(chunks);
chunks = null;
resolve(chunkData);
} catch (e) {
reject(e);
}
};
rangeReader.read().then(readChunk, reject);
});
promise.then(function (data) {
if (this.aborted) {
return; // ignoring any data after abort
}
this.onReceiveData({chunk: data, begin: begin});
}.bind(this));
// TODO check errors
},
// Get all the chunks that are not yet loaded and groups them into
// contiguous ranges to load in as few requests as possible
requestAllChunks: function ChunkedStreamManager_requestAllChunks() {
@ -549,8 +565,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
},
abort: function ChunkedStreamManager_abort() {
if (this.networkManager) {
this.networkManager.abortAllRequests();
this.aborted = true;
if (this.pdfNetworkStream) {
this.pdfNetworkStream.cancelAllRequests('abort');
}
for(var requestId in this.promisesByRequest) {
var capability = this.promisesByRequest[requestId];

View File

@ -295,13 +295,347 @@ var NetworkManager = (function NetworkManagerClosure() {
//#if !(FIREFOX || MOZCENTRAL)
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('pdfjs/core/network', ['exports'], factory);
define('pdfjs/core/network', ['exports', 'pdfjs/shared/util',
'pdfjs/core/worker'], factory);
} else if (typeof exports !== 'undefined') {
factory(exports);
factory(exports, require('../shared/util.js'), require('./worker.js'));
} else {
factory((root.pdfjsCoreNetwork = {}));
factory((root.pdfjsCoreNetwork = {}), root.pdfjsSharedUtil,
root.pdfjsCoreWorker);
}
}(this, function (exports) {
}(this, function (exports, sharedUtil, coreWorker) {
var assert = sharedUtil.assert;
var createPromiseCapability = sharedUtil.createPromiseCapability;
var isInt = sharedUtil.isInt;
var MissingPDFException = sharedUtil.MissingPDFException;
var UnexpectedResponseException = sharedUtil.UnexpectedResponseException;
/** @implements {IPDFStream} */
function PDFNetworkStream(options) {
this._options = options;
var source = options.source;
this._manager = new NetworkManager(source.url, {
httpHeaders: source.httpHeaders,
withCredentials: source.withCredentials
});
this._rangeChunkSize = source.rangeChunkSize;
this._fullRequestReader = null;
this._rangeRequestReaders = [];
}
PDFNetworkStream.prototype = {
_onRangeRequestReaderClosed:
function PDFNetworkStream_onRangeRequestReaderClosed(reader) {
var i = this._rangeRequestReaders.indexOf(reader);
if (i >= 0) {
this._rangeRequestReaders.splice(i, 1);
}
},
getFullReader: function PDFNetworkStream_getFullReader() {
assert(!this._fullRequestReader);
this._fullRequestReader =
new PDFNetworkStreamFullRequestReader(this._manager, this._options);
return this._fullRequestReader;
},
getRangeReader: function PDFNetworkStream_getRangeReader(begin, end) {
var reader = new PDFNetworkStreamRangeRequestReader(this._manager,
begin, end);
reader.onClosed = this._onRangeRequestReaderClosed.bind(this);
this._rangeRequestReaders.push(reader);
return reader;
},
cancelAllRequests: function PDFNetworkStream_cancelAllRequests(reason) {
if (this._fullRequestReader) {
this._fullRequestReader.cancel(reason);
}
var readers = this._rangeRequestReaders.slice(0);
readers.forEach(function (reader) {
reader.cancel(reason);
});
}
};
/** @implements {IPDFStreamReader} */
function PDFNetworkStreamFullRequestReader(manager, options) {
this._manager = manager;
var source = options.source;
var args = {
onHeadersReceived: this._onHeadersReceived.bind(this),
onProgressiveData: source.disableStream ? null :
this._onProgressiveData.bind(this),
onDone: this._onDone.bind(this),
onError: this._onError.bind(this),
onProgress: this._onProgress.bind(this)
};
this._url = source.url;
this._fullRequestId = manager.requestFull(args);
this._headersReceivedCapability = createPromiseCapability();
this._disableRange = options.disableRange || false;
this._contentLength = source.length; // optional
this._rangeChunkSize = source.rangeChunkSize;
if (!this._rangeChunkSize && !this._disableRange) {
this._disableRange = true;
}
this._isStreamingSupported = false;
this._isRangeSupported = false;
this._cachedChunks = [];
this._requests = [];
this._done = false;
this._storedError = undefined;
this.onProgress = null;
}
PDFNetworkStreamFullRequestReader.prototype = {
_validateRangeRequestCapabilities: function
PDFNetworkStreamFullRequestReader_validateRangeRequestCapabilities() {
if (this._disableRange) {
return false;
}
var networkManager = this._manager;
var fullRequestXhrId = this._fullRequestId;
var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId);
if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') {
return false;
}
var contentEncoding =
fullRequestXhr.getResponseHeader('Content-Encoding') || 'identity';
if (contentEncoding !== 'identity') {
return false;
}
var length = fullRequestXhr.getResponseHeader('Content-Length');
length = parseInt(length, 10);
if (!isInt(length)) {
return false;
}
this._contentLength = length; // setting right content length
if (length <= 2 * this._rangeChunkSize) {
// The file size is smaller than the size of two chunks, so it does
// not make any sense to abort the request and retry with a range
// request.
return false;
}
return true;
},
_onHeadersReceived:
function PDFNetworkStreamFullRequestReader_onHeadersReceived() {
if (this._validateRangeRequestCapabilities()) {
this._isRangeSupported = true;
}
var networkManager = this._manager;
var fullRequestXhrId = this._fullRequestId;
if (networkManager.isStreamingRequest(fullRequestXhrId)) {
// We can continue fetching when progressive loading is enabled,
// and we don't need the autoFetch feature.
this._isStreamingSupported = true;
} else if (this._isRangeSupported) {
// NOTE: by cancelling the full request, and then issuing range
// requests, there will be an issue for sites where you can only
// request the pdf once. However, if this is the case, then the
// server should not be returning that it can support range
// requests.
networkManager.abortRequest(fullRequestXhrId);
}
this._headersReceivedCapability.resolve();
},
_onProgressiveData:
function PDFNetworkStreamFullRequestReader_onProgressiveData(chunk) {
if (this._requests.length > 0) {
var requestCapability = this._requests.shift();
requestCapability.resolve({value: chunk, done: false});
} else {
this._cachedChunks.push(chunk);
}
},
_onDone: function PDFNetworkStreamFullRequestReader_onDone(args) {
if (args) {
this._onProgressiveData(args.chunk);
}
this._done = true;
if (this._cachedChunks.length > 0) {
return;
}
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
},
_onError: function PDFNetworkStreamFullRequestReader_onError(status) {
var url = this._url;
var exception;
if (status === 404 || status === 0 && /^file:/.test(url)) {
exception = new MissingPDFException('Missing PDF "' + url + '".');
} else {
exception = new UnexpectedResponseException(
'Unexpected server response (' + status +
') while retrieving PDF "' + url + '".', status);
}
this._storedError = exception;
this._headersReceivedCapability.reject(exception);
this._requests.forEach(function (requestCapability) {
requestCapability.reject(exception);
});
this._requests = [];
this._cachedChunks = [];
},
_onProgress: function PDFNetworkStreamFullRequestReader_onProgress(data) {
if (this.onProgress) {
this.onProgress({
loaded: data.loaded,
total: data.lengthComputable ? data.total : this._contentLength
});
}
},
get isRangeSupported() {
return this._isRangeSupported;
},
get isStreamingSupported() {
return this._isStreamingSupported;
},
get contentLength() {
return this._contentLength;
},
get headersReady() {
return this._headersReceivedCapability.promise;
},
read: function PDFNetworkStreamFullRequestReader_read() {
if (this._storedError) {
return Promise.reject(this._storedError);
}
if (this._cachedChunks.length > 0) {
var chunk = this._cachedChunks.shift();
return Promise.resolve(chunk);
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFNetworkStreamFullRequestReader_cancel(reason) {
this._done = true;
this._headersReceivedCapability.reject(reason);
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
if (this._manager.isPendingRequest(this._fullRequestId)) {
this._manager.abortRequest(this._fullRequestId);
}
this._fullRequestReader = null;
}
};
/** @implements {IPDFStreamRangeReader} */
function PDFNetworkStreamRangeRequestReader(manager, begin, end) {
this._manager = manager;
var args = {
onDone: this._onDone.bind(this),
onProgress: this._onProgress.bind(this)
};
this._requestId = manager.requestRange(begin, end, args);
this._requests = [];
this._queuedChunk = null;
this._done = false;
this.onProgress = null;
this.onClosed = null;
}
PDFNetworkStreamRangeRequestReader.prototype = {
_close: function PDFNetworkStreamRangeRequestReader_close() {
if (this.onClosed) {
this.onClosed(this);
}
},
_onDone: function PDFNetworkStreamRangeRequestReader_onDone(data) {
var chunk = data.chunk;
if (this._requests.length > 0) {
var requestCapability = this._requests.shift();
requestCapability.resolve({value: chunk, done: false});
} else {
this._queuedChunk = chunk;
}
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
this._close();
},
_onProgress: function PDFNetworkStreamRangeRequestReader_onProgress(evt) {
if (!this.isStreamingSupported && this.onProgress) {
this.onProgress({
loaded: evt.loaded
});
}
},
get isStreamingSupported() {
return false; // TODO allow progressive range bytes loading
},
read: function PDFNetworkStreamRangeRequestReader_read() {
if (this._queuedChunk !== null) {
var chunk = this._queuedChunk;
this._queuedChunk = null;
return Promise.resolve({value: chunk, done: false});
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFNetworkStreamRangeRequestReader_cancel(reason) {
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
if (this._manager.isPendingRequest(this._requestId)) {
this._manager.abortRequest(this._requestId);
}
this._close();
}
};
coreWorker.setPDFNetworkStreamClass(PDFNetworkStream);
exports.PDFNetworkStream = PDFNetworkStream;
exports.NetworkManager = NetworkManager;
}));
//#endif

View File

@ -157,21 +157,18 @@ var LocalPdfManager = (function LocalPdfManagerClosure() {
})();
var NetworkPdfManager = (function NetworkPdfManagerClosure() {
function NetworkPdfManager(docId, args, msgHandler) {
function NetworkPdfManager(docId, pdfNetworkStream, args) {
this._docId = docId;
this.msgHandler = msgHandler;
this.msgHandler = args.msgHandler;
var params = {
msgHandler: msgHandler,
httpHeaders: args.httpHeaders,
withCredentials: args.withCredentials,
chunkedViewerLoading: args.chunkedViewerLoading,
msgHandler: args.msgHandler,
url: args.url,
length: args.length,
disableAutoFetch: args.disableAutoFetch,
initialData: args.initialData
rangeChunkSize: args.rangeChunkSize
};
this.streamManager = new ChunkedStreamManager(args.length,
args.rangeChunkSize,
args.url, params);
this.streamManager = new ChunkedStreamManager(pdfNetworkStream, params);
this.pdfDocument = new PDFDocument(this, this.streamManager.getStream(),
args.password);
}

View File

@ -41,10 +41,12 @@ var PasswordException = sharedUtil.PasswordException;
var PasswordResponses = sharedUtil.PasswordResponses;
var UnknownErrorException = sharedUtil.UnknownErrorException;
var XRefParseException = sharedUtil.XRefParseException;
var arrayByteLength = sharedUtil.arrayByteLength;
var arraysToBytes = sharedUtil.arraysToBytes;
var assert = sharedUtil.assert;
var createPromiseCapability = sharedUtil.createPromiseCapability;
var error = sharedUtil.error;
var info = sharedUtil.info;
var isInt = sharedUtil.isInt;
var warn = sharedUtil.warn;
var Ref = corePrimitives.Ref;
var LocalPdfManager = corePdfManager.LocalPdfManager;
@ -82,6 +84,350 @@ var WorkerTask = (function WorkerTaskClosure() {
return WorkerTask;
})();
//#if !PRODUCTION
/**
* Interface that represents PDF data transport. If possible, it allows
* progressively load entire or fragment of the PDF binary data.
*
* @interface
* */
function IPDFStream() {}
IPDFStream.prototype = {
/**
* Gets a reader for the entire PDF data.
* @returns {IPDFStreamReader}
*/
getFullReader: function () { return null; },
/**
* Gets a reader for the range of the PDF data.
* @param {number} begin - the start offset of the data.
* @param {number} end - the end offset of the data.
* @returns {IPDFStreamRangeReader}
*/
getRangeReader: function (begin, end) { return null; },
/**
* Cancels all opened reader and closes all their opened requests.
* @param {Object} reason - the reason for cancelling
*/
cancelAllRequests: function (reason) {},
};
/**
* Interface for a PDF binary data reader.
*
* @interface
*/
function IPDFStreamReader() {}
IPDFStreamReader.prototype = {
/**
* Gets a promise that is resolved when the headers and other metadata of
* the PDF data stream are available.
* @returns {Promise}
*/
get headersReady() { return null; },
/**
* Gets PDF binary data length. It is defined after the headersReady promise
* is resolved.
* @returns {number} The data length (or 0 if unknown).
*/
get contentLength() { return 0; },
/**
* Gets ability of the stream to handle range requests. It is defined after
* the headersReady promise is resolved. Rejected when the reader is cancelled
* or an error occurs.
* @returns {boolean}
*/
get isRangeSupported() { return false; },
/**
* Gets ability of the stream to progressively load binary data. It is defined
* after the headersReady promise is resolved.
* @returns {boolean}
*/
get isStreamingSupported() { return false; },
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read: function () {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel: function (reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded and
* total properties.
*/
onProgress: null,
};
/**
* Interface for a PDF binary data fragment reader.
*
* @interface
*/
function IPDFStreamRangeReader() {}
IPDFStreamRangeReader.prototype = {
/**
* Gets ability of the stream to progressively load binary data.
* @returns {boolean}
*/
get isStreamingSupported() { return false; },
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read: function () {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel: function (reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded
* property.
*/
onProgress: null,
};
//#endif
/** @implements {IPDFStream} */
var PDFWorkerStream = (function PDFWorkerStreamClosure() {
function PDFWorkerStream(params, msgHandler) {
this._queuedChunks = [];
var initialData = params.initialData;
if (initialData && initialData.length > 0) {
this._queuedChunks.push(initialData);
}
this._msgHandler = msgHandler;
this._isRangeSupported = !(params.disableRange);
this._isStreamingSupported = !(params.disableStream);
this._contentLength = params.length;
this._fullRequestReader = null;
this._rangeReaders = [];
msgHandler.on('OnDataRange', this._onReceiveData.bind(this));
msgHandler.on('OnDataProgress', this._onProgress.bind(this));
}
PDFWorkerStream.prototype = {
_onReceiveData: function PDFWorkerStream_onReceiveData(args) {
if (args.begin === undefined) {
if (this._fullRequestReader) {
this._fullRequestReader._enqueue(args.chunk);
} else {
this._queuedChunks.push(args.chunk);
}
} else {
var found = this._rangeReaders.some(function (rangeReader) {
if (rangeReader._begin !== args.begin) {
return false;
}
rangeReader._enqueue(args.chunk);
return true;
});
assert(found);
}
},
_onProgress: function PDFWorkerStream_onProgress(evt) {
if (this._rangeReaders.length > 0) {
// Reporting to first range reader.
var firstReader = this._rangeReaders[0];
if (firstReader.onProgress) {
firstReader.onProgress({loaded: evt.loaded});
}
}
},
_removeRangeReader: function PDFWorkerStream_removeRangeReader(reader) {
var i = this._rangeReaders.indexOf(reader);
if (i >= 0) {
this._rangeReaders.splice(i, 1);
}
},
getFullReader: function PDFWorkerStream_getFullReader() {
assert(!this._fullRequestReader);
var queuedChunks = this._queuedChunks;
this._queuedChunks = null;
return new PDFWorkerStreamReader(this, queuedChunks);
},
getRangeReader: function PDFWorkerStream_getRangeReader(begin, end) {
var reader = new PDFWorkerStreamRangeReader(this, begin, end);
this._msgHandler.send('RequestDataRange', { begin: begin, end: end });
this._rangeReaders.push(reader);
return reader;
},
cancelAllRequests: function PDFWorkerStream_cancelAllRequests(reason) {
if (this._fullRequestReader) {
this._fullRequestReader.cancel(reason);
}
var readers = this._rangeReaders.slice(0);
readers.forEach(function (rangeReader) {
rangeReader.cancel(reason);
});
}
};
/** @implements {IPDFStreamReader} */
function PDFWorkerStreamReader(stream, queuedChunks) {
this._stream = stream;
this._done = false;
this._queuedChunks = queuedChunks || [];
this._requests = [];
this._headersReady = Promise.resolve();
stream._fullRequestReader = this;
this.onProgress = null; // not used
}
PDFWorkerStreamReader.prototype = {
_enqueue: function PDFWorkerStreamReader_enqueue(chunk) {
if (this._done) {
return; // ignore new data
}
if (this._requests.length > 0) {
var requestCapability = this._requests.shift();
requestCapability.resolve({value: chunk, done: false});
return;
}
this._queuedChunks.push(chunk);
},
get headersReady() {
return this._headersReady;
},
get isRangeSupported() {
return this._stream._isRangeSupported;
},
get isStreamingSupported() {
return this._stream._isStreamingSupported;
},
get contentLength() {
return this._stream._contentLength;
},
read: function PDFWorkerStreamReader_read() {
if (this._queuedChunks.length > 0) {
var chunk = this._queuedChunks.shift();
return Promise.resolve({value: chunk, done: false});
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFWorkerStreamReader_cancel(reason) {
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
}
};
/** @implements {IPDFStreamRangeReader} */
function PDFWorkerStreamRangeReader(stream, begin, end) {
this._stream = stream;
this._begin = begin;
this._end = end;
this._queuedChunk = null;
this._requests = [];
this._done = false;
this.onProgress = null;
}
PDFWorkerStreamRangeReader.prototype = {
_enqueue: function PDFWorkerStreamRangeReader_enqueue(chunk) {
if (this._done) {
return; // ignore new data
}
if (this._requests.length === 0) {
this._queuedChunk = chunk;
} else {
var requestsCapability = this._requests.shift();
requestsCapability.resolve({value: chunk, done: false});
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
}
this._done = true;
this._stream._removeRangeReader(this);
},
get isStreamingSupported() {
return false;
},
read: function PDFWorkerStreamRangeReader_read() {
if (this._queuedChunk) {
return Promise.resolve({value: this._queuedChunk, done: false});
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFWorkerStreamRangeReader_cancel(reason) {
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
this._stream._removeRangeReader(this);
}
};
return PDFWorkerStream;
})();
/** @type IPDFStream */
var PDFNetworkStream;
/**
* Sets PDFNetworkStream class to be used as alternative PDF data transport.
* @param {IPDFStream} cls - the PDF data transport.
*/
function setPDFNetworkStreamClass(cls) {
PDFNetworkStream = cls;
}
var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
setup: function wphSetup(handler, port) {
var testMessageProcessed = false;
@ -188,7 +534,6 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
var pdfManager;
var source = data.source;
var disableRange = data.disableRange;
if (source.data) {
try {
pdfManager = new LocalPdfManager(docId, source.data, source.password);
@ -196,144 +541,113 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
} catch (ex) {
pdfManagerCapability.reject(ex);
}
}
var pdfStream;
try {
if (source.chunkedViewerLoading) {
pdfStream = new PDFWorkerStream(source, handler);
} else {
assert(PDFNetworkStream, 'pdfjs/core/network module is not loaded');
pdfStream = new PDFNetworkStream(data);
}
} catch (ex) {
pdfManagerCapability.reject(ex);
return pdfManagerCapability.promise;
} else if (source.chunkedViewerLoading) {
}
var fullRequest = pdfStream.getFullReader();
fullRequest.headersReady.then(function () {
if (!fullRequest.isStreamingSupported ||
!fullRequest.isRangeSupported) {
// If stream or range are disabled, it's our only way to report
// loading progress.
fullRequest.onProgress = function (evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.total
});
};
}
if (!fullRequest.isRangeSupported) {
return;
}
// We don't need auto-fetch when streaming is enabled.
var disableAutoFetch = source.disableAutoFetch ||
fullRequest.isStreamingSupported;
pdfManager = new NetworkPdfManager(docId, pdfStream, {
msgHandler: handler,
url: source.url,
password: source.password,
length: fullRequest.contentLength,
disableAutoFetch: disableAutoFetch,
rangeChunkSize: source.rangeChunkSize
});
pdfManagerCapability.resolve(pdfManager);
cancelXHRs = null;
}).catch(function (reason) {
pdfManagerCapability.reject(reason);
cancelXHRs = null;
});
var cachedChunks = [], loaded = 0;
var flushChunks = function () {
var pdfFile = arraysToBytes(cachedChunks);
if (source.length && pdfFile.length !== source.length) {
warn('reported HTTP length is different from actual');
}
// the data is array, instantiating directly from it
try {
pdfManager = new NetworkPdfManager(docId, source, handler);
pdfManager = new LocalPdfManager(docId, pdfFile, source.password);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
return pdfManagerCapability.promise;
}
var networkManager = new NetworkManager(source.url, {
httpHeaders: source.httpHeaders,
withCredentials: source.withCredentials
});
var cachedChunks = [];
var fullRequestXhrId = networkManager.requestFull({
onHeadersReceived: function onHeadersReceived() {
if (disableRange) {
return;
}
var fullRequestXhr = networkManager.getRequestXhr(fullRequestXhrId);
if (fullRequestXhr.getResponseHeader('Accept-Ranges') !== 'bytes') {
return;
}
var contentEncoding =
fullRequestXhr.getResponseHeader('Content-Encoding') || 'identity';
if (contentEncoding !== 'identity') {
return;
}
var length = fullRequestXhr.getResponseHeader('Content-Length');
length = parseInt(length, 10);
if (!isInt(length)) {
return;
}
source.length = length;
if (length <= 2 * source.rangeChunkSize) {
// The file size is smaller than the size of two chunks, so it does
// not make any sense to abort the request and retry with a range
// request.
return;
}
if (networkManager.isStreamingRequest(fullRequestXhrId)) {
// We can continue fetching when progressive loading is enabled,
// and we don't need the autoFetch feature.
source.disableAutoFetch = true;
} else {
// NOTE: by cancelling the full request, and then issuing range
// requests, there will be an issue for sites where you can only
// request the pdf once. However, if this is the case, then the
// server should not be returning that it can support range
// requests.
networkManager.abortRequest(fullRequestXhrId);
}
cachedChunks = [];
};
var readPromise = new Promise(function (resolve, reject) {
var readChunk = function (chunk) {
try {
pdfManager = new NetworkPdfManager(docId, source, handler);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
cancelXHRs = null;
},
onProgressiveData: source.disableStream ? null :
function onProgressiveData(chunk) {
if (!pdfManager) {
cachedChunks.push(chunk);
return;
}
pdfManager.sendProgressiveData(chunk);
},
onDone: function onDone(args) {
if (pdfManager) {
return; // already processed
}
var pdfFile;
if (args === null) {
// TODO add some streaming manager, e.g. for unknown length files.
// The data was returned in the onProgressiveData, combining...
var pdfFileLength = 0, pos = 0;
cachedChunks.forEach(function (chunk) {
pdfFileLength += chunk.byteLength;
});
if (source.length && pdfFileLength !== source.length) {
warn('reported HTTP length is different from actual');
ensureNotTerminated();
if (chunk.done) {
if (!pdfManager) {
flushChunks();
}
cancelXHRs = null;
return;
}
var pdfFileArray = new Uint8Array(pdfFileLength);
cachedChunks.forEach(function (chunk) {
pdfFileArray.set(new Uint8Array(chunk), pos);
pos += chunk.byteLength;
});
pdfFile = pdfFileArray.buffer;
} else {
pdfFile = args.chunk;
}
// the data is array, instantiating directly from it
try {
pdfManager = new LocalPdfManager(docId, pdfFile, source.password);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
cancelXHRs = null;
},
var data = chunk.value;
loaded += arrayByteLength(data);
if (!fullRequest.isStreamingSupported) {
handler.send('DocProgress', {
loaded: loaded,
total: Math.max(loaded, fullRequest.contentLength || 0)
});
}
onError: function onError(status) {
var exception;
if (status === 404 || status === 0 && /^file:/.test(source.url)) {
exception = new MissingPDFException('Missing PDF "' +
source.url + '".');
handler.send('MissingPDF', exception);
} else {
exception = new UnexpectedResponseException(
'Unexpected server response (' + status +
') while retrieving PDF "' + source.url + '".', status);
handler.send('UnexpectedResponse', exception);
}
cancelXHRs = null;
},
if (pdfManager) {
pdfManager.sendProgressiveData(data);
} else {
cachedChunks.push(data);
}
onProgress: function onProgress(evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.lengthComputable ? evt.total : source.length
});
}
fullRequest.read().then(readChunk, reject);
} catch (e) {
reject(e);
}
};
fullRequest.read().then(readChunk, reject);
});
readPromise.catch(function (e) {
pdfManagerCapability.reject(e);
cancelXHRs = null;
});
cancelXHRs = function () {
networkManager.abortRequest(fullRequestXhrId);
pdfStream.cancelAllRequests('abort');
};
return pdfManagerCapability.promise;
@ -676,6 +990,7 @@ if (typeof window === 'undefined' &&
initializeWorker();
}
exports.setPDFNetworkStreamClass = setPDFNetworkStreamClass;
exports.WorkerTask = WorkerTask;
exports.WorkerMessageHandler = WorkerMessageHandler;
}));

View File

@ -567,6 +567,55 @@ function stringToBytes(str) {
return bytes;
}
/**
* Gets length of the array (Array, Uint8Array, or string) in bytes.
* @param {Array|Uint8Array|string} arr
* @returns {number}
*/
function arrayByteLength(arr) {
if (arr.length !== undefined) {
return arr.length;
}
assert(arr.byteLength !== undefined);
return arr.byteLength;
}
/**
* Combines array items (arrays) into single Uint8Array object.
* @param {Array} arr - the array of the arrays (Array, Uint8Array, or string).
* @returns {Uint8Array}
*/
function arraysToBytes(arr) {
// Shortcut: if first and only item is Uint8Array, return it.
if (arr.length === 1 && (arr[0] instanceof Uint8Array)) {
return arr[0];
}
var resultLength = 0;
var i, ii = arr.length;
var item, itemLength ;
for (i = 0; i < ii; i++) {
item = arr[i];
itemLength = arrayByteLength(item);
resultLength += itemLength;
}
var pos = 0;
var data = new Uint8Array(resultLength);
for (i = 0; i < ii; i++) {
item = arr[i];
if (!(item instanceof Uint8Array)) {
if (typeof item === 'string') {
item = stringToBytes(item);
} else {
item = new Uint8Array(item);
}
}
itemLength = item.byteLength;
data.set(item, pos);
pos += itemLength;
}
return data;
}
function string32(value) {
return String.fromCharCode((value >> 24) & 0xff, (value >> 16) & 0xff,
(value >> 8) & 0xff, value & 0xff);
@ -2361,6 +2410,8 @@ exports.UnexpectedResponseException = UnexpectedResponseException;
exports.UnknownErrorException = UnknownErrorException;
exports.Util = Util;
exports.XRefParseException = XRefParseException;
exports.arrayByteLength = arrayByteLength;
exports.arraysToBytes = arraysToBytes;
exports.assert = assert;
exports.bytesToString = bytesToString;
exports.combineUrl = combineUrl;

View File

@ -8,6 +8,7 @@
describe('api', function() {
var basicApiUrl = combineUrl(window.location.href, '../pdfs/basicapi.pdf');
var basicApiFileLength = 105779; // bytes
var TEST_TIMEOUT = 20000;
function waitsForPromiseResolved(promise, successCallback) {
var resolved = false;
promise.then(function(val) {
@ -20,7 +21,7 @@ describe('api', function() {
});
waitsFor(function() {
return resolved;
}, 20000);
}, TEST_TIMEOUT);
}
function waitsForPromiseRejected(promise, failureCallback) {
var rejected = false;
@ -34,7 +35,13 @@ describe('api', function() {
});
waitsFor(function() {
return rejected;
}, 20000);
}, TEST_TIMEOUT);
}
function waitSome(callback) {
var WAIT_TIMEOUT = 10;
setTimeout(function () {
callback();
}, WAIT_TIMEOUT);
}
describe('PDFJS', function() {
@ -710,4 +717,86 @@ describe('api', function() {
waitsForPromiseResolved(promiseDone, function() {});
});
});
describe('PDFDataRangeTransport', function () {
var pdfPath = combineUrl(window.location.href, '../pdfs/tracemonkey.pdf');
var loadPromise;
function getDocumentData() {
if (loadPromise) {
return loadPromise;
}
loadPromise = new Promise(function (resolve, reject) {
var xhr = new XMLHttpRequest(pdfPath);
xhr.open('GET', pdfPath);
xhr.responseType = 'arraybuffer';
xhr.onload = function () { resolve(new Uint8Array(xhr.response)); };
xhr.onerror = function () { reject(new Error('PDF is not loaded')); };
xhr.send();
});
return loadPromise;
}
it('should fetch document info and page using ranges', function () {
var transport;
var initialDataLength = 4000;
var fetches = 0;
var getDocumentPromise = getDocumentData().then(function (data) {
var initialData = data.subarray(0, initialDataLength);
transport = new PDFJS.PDFDataRangeTransport(data.length, initialData);
transport.requestDataRange = function (begin, end) {
fetches++;
waitSome(function () {
transport.onDataProgress(4000);
transport.onDataRange(begin, data.subarray(begin, end));
});
};
var loadingTask = PDFJS.getDocument(transport);
return loadingTask.promise;
});
var pdfDocument;
var getPagePromise = getDocumentPromise.then(function (pdfDocument_) {
pdfDocument = pdfDocument_;
var pagePromise = pdfDocument.getPage(10);
return pagePromise;
});
waitsForPromiseResolved(getPagePromise, function (page) {
expect(pdfDocument.numPages).toEqual(14);
expect(page.rotate).toEqual(0);
expect(fetches).toBeGreaterThan(2);
});
});
it('should fetch document info and page using range and streaming',
function () {
var transport;
var initialDataLength = 4000;
var fetches = 0;
var getDocumentPromise = getDocumentData().then(function (data) {
var initialData = data.subarray(0, initialDataLength);
transport = new PDFJS.PDFDataRangeTransport(data.length, initialData);
transport.requestDataRange = function (begin, end) {
fetches++;
if (fetches === 1) {
// send rest of the data on first range request.
transport.onDataProgressiveRead(data.subarray(initialDataLength));
}
waitSome(function () {
transport.onDataRange(begin, data.subarray(begin, end));
});
};
var loadingTask = PDFJS.getDocument(transport);
return loadingTask.promise;
});
var pdfDocument;
var getPagePromise = getDocumentPromise.then(function (pdfDocument_) {
pdfDocument = pdfDocument_;
var pagePromise = pdfDocument.getPage(10);
return pagePromise;
});
waitsForPromiseResolved(getPagePromise, function (page) {
expect(pdfDocument.numPages).toEqual(14);
expect(page.rotate).toEqual(0);
expect(fetches).toEqual(1);
});
});
});
});

169
test/unit/network_spec.js Normal file
View File

@ -0,0 +1,169 @@
/* globals expect, it, describe, waitsFor, combineUrl, PDFNetworkStream */
'use strict';
describe('network', function() {
var pdf1 = combineUrl(window.location.href, '../pdfs/tracemonkey.pdf');
var pdf1Length = 1016315;
var pdf2 = combineUrl(window.location.href, '../pdfs/pdf.pdf');
var pdf2Length = 32472771;
function waitsForPromiseResolved(promise, successCallback) {
var TEST_TIMEOUT = 20000;
var resolved = false;
promise.then(function(val) {
resolved = true;
successCallback(val);
},
function(error) {
// Shouldn't get here.
expect(error).toEqual('the promise should not have been rejected');
});
waitsFor(function() {
return resolved;
}, TEST_TIMEOUT);
}
it('read without stream and range', function() {
var stream = new PDFNetworkStream({
source: {
url: pdf1,
rangeChunkSize: 65536,
disableStream: true,
},
disableRange: true
});
var fullReader = stream.getFullReader();
var isStreamingSupported, isRangeSupported;
var promise = fullReader.headersReady.then(function () {
isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported = fullReader.isRangeSupported;
});
var len = 0, count = 0;
var read = function () {
return fullReader.read().then(function (result) {
if (result.done) {
return;
}
count++;
len += result.value.byteLength;
return read();
});
};
var readPromise = read();
waitsForPromiseResolved(readPromise, function (page) {
expect(len).toEqual(pdf1Length);
expect(count).toEqual(1);
expect(isStreamingSupported).toEqual(false);
expect(isRangeSupported).toEqual(false);
});
});
it('read with streaming', function() {
var userAgent = window.navigator.userAgent;
// The test is valid for FF only: the XHR has support of the
// 'moz-chunked-array' response type.
// TODO enable for other browsers, e.g. when fetch/streams API is supported.
var m = /Mozilla\/5.0.*?rv:(\d+).*? Gecko/.exec(userAgent);
if (!m || m[1] < 9) {
return;
}
var stream = new PDFNetworkStream({
source: {
url: pdf2,
rangeChunkSize: 65536,
disableStream: false,
},
disableRange: false
});
var fullReader = stream.getFullReader();
var isStreamingSupported, isRangeSupported;
var promise = fullReader.headersReady.then(function () {
isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported = fullReader.isRangeSupported;
});
var len = 0, count = 0;
var read = function () {
return fullReader.read().then(function (result) {
if (result.done) {
return;
}
count++;
len += result.value.byteLength;
return read();
});
};
var readPromise = read();
waitsForPromiseResolved(readPromise, function (page) {
expect(len).toEqual(pdf2Length);
expect(count).toBeGreaterThan(1);
expect(isStreamingSupported).toEqual(true);
});
});
it('read custom ranges', function () {
// We don't test on browsers that don't support range request, so
// requiring this test to pass.
var rangeSize = 32768;
var stream = new PDFNetworkStream({
source: {
url: pdf1,
length: pdf1Length,
rangeChunkSize: rangeSize,
disableStream: true,
},
disableRange: false
});
var fullReader = stream.getFullReader();
var isStreamingSupported, isRangeSupported, fullReaderCancelled;
var promise = fullReader.headersReady.then(function () {
isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported = fullReader.isRangeSupported;
// we shall be able to close the full reader without issues
fullReader.cancel('Don\'t need full reader');
fullReaderCancelled = true;
});
// Skipping fullReader results, requesting something from the PDF end.
var tailSize = (pdf1Length % rangeSize) || rangeSize;
var range1Reader = stream.getRangeReader(pdf1Length - tailSize - rangeSize,
pdf1Length - tailSize);
var range2Reader = stream.getRangeReader(pdf1Length - tailSize, pdf1Length);
var result1 = {value: 0}, result2 = {value: 0};
var read = function (reader, lenResult) {
return reader.read().then(function (result) {
if (result.done) {
return;
}
lenResult.value += result.value.byteLength;
return read(reader, lenResult);
});
};
var readPromises = Promise.all([read(range1Reader, result1),
read(range2Reader, result2),
promise]);
waitsForPromiseResolved(readPromises, function (page) {
expect(result1.value).toEqual(rangeSize);
expect(result2.value).toEqual(tailSize);
expect(isRangeSupported).toEqual(true);
expect(fullReaderCancelled).toEqual(true);
});
});
});

View File

@ -36,6 +36,7 @@
<script src="util_spec.js"></script>
<script src="cmap_spec.js"></script>
<script src="annotation_layer_spec.js"></script>
<script src="network_spec.js"></script>
<script>
'use strict';
@ -48,11 +49,12 @@
'pdfjs/core/annotation', 'pdfjs/core/crypto', 'pdfjs/core/stream',
'pdfjs/core/fonts', 'pdfjs/core/ps_parser', 'pdfjs/core/function',
'pdfjs/core/parser', 'pdfjs/core/evaluator', 'pdfjs/core/cmap',
'pdfjs/core/worker', 'pdfjs/display/api', 'pdfjs/display/metadata'],
'pdfjs/core/worker', 'pdfjs/core/network', 'pdfjs/display/api',
'pdfjs/display/metadata'],
function (sharedUtil, sharedGlobal, corePrimitives, coreAnnotation,
coreCrypto, coreStream, coreFonts, corePsParser, coreFunction,
coreParser, coreEvaluator, coreCMap, coreWorker, displayAPI,
displayMetadata) {
coreParser, coreEvaluator, coreCMap, coreWorker,
coreNetwork, displayAPI, displayMetadata) {
pdfjsLibs = {
sharedUtil: sharedUtil,
@ -68,6 +70,7 @@
coreEvaluator: coreEvaluator,
coreCMap: coreCMap,
coreWorker: coreWorker,
coreNetwork: coreNetwork,
displayAPI: displayAPI,
displayMetadata: displayMetadata
};