pdf.js/src/core/worker.js

963 lines
29 KiB
JavaScript
Raw Normal View History

2012-09-01 07:48:21 +09:00
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
arrayByteLength, arraysToBytes, assert, createPromiseCapability, info,
InvalidPDFException, isNodeJS, MessageHandler, MissingPDFException,
PasswordException, setVerbosityLevel, UnexpectedResponseException,
UnknownErrorException, UNSUPPORTED_FEATURES, warn, XRefParseException
} from '../shared/util';
import { LocalPdfManager, NetworkPdfManager } from './pdf_manager';
import { Ref } from './primitives';
2015-10-21 10:50:32 +09:00
var WorkerTask = (function WorkerTaskClosure() {
function WorkerTask(name) {
this.name = name;
this.terminated = false;
this._capability = createPromiseCapability();
}
WorkerTask.prototype = {
get finished() {
return this._capability.promise;
},
finish() {
2015-10-21 10:50:32 +09:00
this._capability.resolve();
},
terminate() {
2015-10-21 10:50:32 +09:00
this.terminated = true;
},
ensureNotTerminated() {
2015-10-21 10:50:32 +09:00
if (this.terminated) {
throw new Error('Worker task was terminated');
}
}
};
return WorkerTask;
})();
if (typeof PDFJSDev === 'undefined' || !PDFJSDev.test('PRODUCTION')) {
/**
* Interface that represents PDF data transport. If possible, it allows
* progressively load entire or fragment of the PDF binary data.
*
* @interface
Switch to using ESLint, instead of JSHint, for linting *Please note that most of the necessary code adjustments were made in PR 7890.* ESLint has a number of advantageous properties, compared to JSHint. Among those are: - The ability to find subtle bugs, thanks to more rules (e.g. PR 7881). - Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint. - Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary). - The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage. - More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead. By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want. Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much. I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations. Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%). A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible: - `comma-dangle`, controls trailing commas in Objects and Arrays (among others). - `object-curly-spacing`, controls spacing inside of Objects. - `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.) Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed. Useful links: - http://eslint.org/docs/user-guide/configuring - http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
*/
function IPDFStream() {} // eslint-disable-line no-inner-declarations
IPDFStream.prototype = {
/**
* Gets a reader for the entire PDF data.
* @returns {IPDFStreamReader}
*/
getFullReader() {
return null;
},
/**
* Gets a reader for the range of the PDF data.
* @param {number} begin - the start offset of the data.
* @param {number} end - the end offset of the data.
* @returns {IPDFStreamRangeReader}
*/
getRangeReader(begin, end) {
return null;
},
/**
* Cancels all opened reader and closes all their opened requests.
* @param {Object} reason - the reason for cancelling
*/
cancelAllRequests(reason) {},
};
/**
* Interface for a PDF binary data reader.
*
* @interface
*/
Switch to using ESLint, instead of JSHint, for linting *Please note that most of the necessary code adjustments were made in PR 7890.* ESLint has a number of advantageous properties, compared to JSHint. Among those are: - The ability to find subtle bugs, thanks to more rules (e.g. PR 7881). - Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint. - Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary). - The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage. - More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead. By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want. Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much. I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations. Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%). A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible: - `comma-dangle`, controls trailing commas in Objects and Arrays (among others). - `object-curly-spacing`, controls spacing inside of Objects. - `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.) Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed. Useful links: - http://eslint.org/docs/user-guide/configuring - http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
function IPDFStreamReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamReader.prototype = {
/**
* Gets a promise that is resolved when the headers and other metadata of
* the PDF data stream are available.
* @returns {Promise}
*/
get headersReady() {
return null;
},
/**
* Gets PDF binary data length. It is defined after the headersReady promise
* is resolved.
* @returns {number} The data length (or 0 if unknown).
*/
get contentLength() {
return 0;
},
/**
* Gets ability of the stream to handle range requests. It is defined after
* the headersReady promise is resolved. Rejected when the reader is cancelled
* or an error occurs.
* @returns {boolean}
*/
get isRangeSupported() {
return false;
},
/**
* Gets ability of the stream to progressively load binary data. It is defined
* after the headersReady promise is resolved.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded and
* total properties.
*/
onProgress: null,
};
/**
* Interface for a PDF binary data fragment reader.
*
* @interface
*/
Switch to using ESLint, instead of JSHint, for linting *Please note that most of the necessary code adjustments were made in PR 7890.* ESLint has a number of advantageous properties, compared to JSHint. Among those are: - The ability to find subtle bugs, thanks to more rules (e.g. PR 7881). - Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint. - Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary). - The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage. - More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead. By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want. Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much. I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations. Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%). A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible: - `comma-dangle`, controls trailing commas in Objects and Arrays (among others). - `object-curly-spacing`, controls spacing inside of Objects. - `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.) Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed. Useful links: - http://eslint.org/docs/user-guide/configuring - http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
function IPDFStreamRangeReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamRangeReader.prototype = {
/**
* Gets ability of the stream to progressively load binary data.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded
* property.
*/
onProgress: null,
};
}
/** @implements {IPDFStream} */
var PDFWorkerStream = (function PDFWorkerStreamClosure() {
function PDFWorkerStream(params, msgHandler) {
this._queuedChunks = [];
var initialData = params.initialData;
if (initialData && initialData.length > 0) {
this._queuedChunks.push(initialData);
}
this._msgHandler = msgHandler;
this._isRangeSupported = !(params.disableRange);
this._isStreamingSupported = !(params.disableStream);
this._contentLength = params.length;
this._fullRequestReader = null;
this._rangeReaders = [];
msgHandler.on('OnDataRange', this._onReceiveData.bind(this));
msgHandler.on('OnDataProgress', this._onProgress.bind(this));
}
PDFWorkerStream.prototype = {
_onReceiveData: function PDFWorkerStream_onReceiveData(args) {
if (args.begin === undefined) {
if (this._fullRequestReader) {
this._fullRequestReader._enqueue(args.chunk);
} else {
this._queuedChunks.push(args.chunk);
}
} else {
var found = this._rangeReaders.some(function (rangeReader) {
if (rangeReader._begin !== args.begin) {
return false;
}
rangeReader._enqueue(args.chunk);
return true;
});
assert(found);
}
},
_onProgress: function PDFWorkerStream_onProgress(evt) {
if (this._rangeReaders.length > 0) {
// Reporting to first range reader.
var firstReader = this._rangeReaders[0];
if (firstReader.onProgress) {
firstReader.onProgress({loaded: evt.loaded});
}
}
},
_removeRangeReader: function PDFWorkerStream_removeRangeReader(reader) {
var i = this._rangeReaders.indexOf(reader);
if (i >= 0) {
this._rangeReaders.splice(i, 1);
}
},
getFullReader: function PDFWorkerStream_getFullReader() {
assert(!this._fullRequestReader);
var queuedChunks = this._queuedChunks;
this._queuedChunks = null;
return new PDFWorkerStreamReader(this, queuedChunks);
},
getRangeReader: function PDFWorkerStream_getRangeReader(begin, end) {
var reader = new PDFWorkerStreamRangeReader(this, begin, end);
this._msgHandler.send('RequestDataRange', { begin, end, });
this._rangeReaders.push(reader);
return reader;
},
cancelAllRequests: function PDFWorkerStream_cancelAllRequests(reason) {
if (this._fullRequestReader) {
this._fullRequestReader.cancel(reason);
}
var readers = this._rangeReaders.slice(0);
readers.forEach(function (rangeReader) {
rangeReader.cancel(reason);
});
}
};
/** @implements {IPDFStreamReader} */
function PDFWorkerStreamReader(stream, queuedChunks) {
this._stream = stream;
this._done = false;
this._queuedChunks = queuedChunks || [];
this._requests = [];
this._headersReady = Promise.resolve();
stream._fullRequestReader = this;
this.onProgress = null; // not used
}
PDFWorkerStreamReader.prototype = {
_enqueue: function PDFWorkerStreamReader_enqueue(chunk) {
if (this._done) {
return; // ignore new data
}
if (this._requests.length > 0) {
var requestCapability = this._requests.shift();
requestCapability.resolve({value: chunk, done: false});
return;
}
this._queuedChunks.push(chunk);
},
get headersReady() {
return this._headersReady;
},
get isRangeSupported() {
return this._stream._isRangeSupported;
},
get isStreamingSupported() {
return this._stream._isStreamingSupported;
},
get contentLength() {
return this._stream._contentLength;
},
read: function PDFWorkerStreamReader_read() {
if (this._queuedChunks.length > 0) {
var chunk = this._queuedChunks.shift();
return Promise.resolve({value: chunk, done: false});
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFWorkerStreamReader_cancel(reason) {
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
}
};
/** @implements {IPDFStreamRangeReader} */
function PDFWorkerStreamRangeReader(stream, begin, end) {
this._stream = stream;
this._begin = begin;
this._end = end;
this._queuedChunk = null;
this._requests = [];
this._done = false;
this.onProgress = null;
}
PDFWorkerStreamRangeReader.prototype = {
_enqueue: function PDFWorkerStreamRangeReader_enqueue(chunk) {
if (this._done) {
return; // ignore new data
}
if (this._requests.length === 0) {
this._queuedChunk = chunk;
} else {
var requestsCapability = this._requests.shift();
requestsCapability.resolve({value: chunk, done: false});
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
}
this._done = true;
this._stream._removeRangeReader(this);
},
get isStreamingSupported() {
return false;
},
read: function PDFWorkerStreamRangeReader_read() {
if (this._queuedChunk) {
return Promise.resolve({value: this._queuedChunk, done: false});
}
if (this._done) {
return Promise.resolve({value: undefined, done: true});
}
var requestCapability = createPromiseCapability();
this._requests.push(requestCapability);
return requestCapability.promise;
},
cancel: function PDFWorkerStreamRangeReader_cancel(reason) {
this._done = true;
this._requests.forEach(function (requestCapability) {
requestCapability.resolve({value: undefined, done: true});
});
this._requests = [];
this._stream._removeRangeReader(this);
}
};
return PDFWorkerStream;
})();
/** @type IPDFStream */
var PDFNetworkStream;
/**
* Sets PDFNetworkStream class to be used as alternative PDF data transport.
* @param {IPDFStream} cls - the PDF data transport.
*/
function setPDFNetworkStreamClass(cls) {
PDFNetworkStream = cls;
}
var WorkerMessageHandler = {
setup(handler, port) {
2015-12-17 09:37:43 +09:00
var testMessageProcessed = false;
handler.on('test', function wphSetupTest(data) {
2015-12-17 09:37:43 +09:00
if (testMessageProcessed) {
return; // we already processed 'test' message once
}
testMessageProcessed = true;
// check if Uint8Array can be sent to worker
if (!(data instanceof Uint8Array)) {
handler.send('test', 'main', false);
return;
}
// making sure postMessage transfers are working
var supportTransfers = data[0] === 255;
handler.postMessageTransfers = supportTransfers;
// check if the response property is supported by xhr
var xhr = new XMLHttpRequest();
var responseExists = 'response' in xhr;
// check if the property is actually implemented
try {
xhr.responseType; // eslint-disable-line no-unused-expressions
} catch (e) {
responseExists = false;
}
if (!responseExists) {
handler.send('test', false);
return;
}
handler.send('test', {
supportTypedArray: true,
supportTransfers,
});
});
handler.on('configure', function wphConfigure(data) {
setVerbosityLevel(data.verbosity);
});
handler.on('GetDocRequest', function wphSetupDoc(data) {
return WorkerMessageHandler.createDocumentHandler(data, port);
});
},
createDocumentHandler(docParams, port) {
// This context is actually holds references on pdfManager and handler,
// until the latter is destroyed.
2013-02-07 08:19:29 +09:00
var pdfManager;
var terminated = false;
var cancelXHRs = null;
2015-10-21 10:50:32 +09:00
var WorkerTasks = [];
var docId = docParams.docId;
var docBaseUrl = docParams.docBaseUrl;
var workerHandlerName = docParams.docId + '_worker';
var handler = new MessageHandler(workerHandlerName, docId, port);
// Ensure that postMessage transfers are correctly enabled/disabled,
// to prevent "DataCloneError" in older versions of IE (see issue 6957).
handler.postMessageTransfers = docParams.postMessageTransfers;
function ensureNotTerminated() {
if (terminated) {
throw new Error('Worker was terminated');
}
}
2011-10-09 17:37:53 +09:00
2015-10-21 10:50:32 +09:00
function startWorkerTask(task) {
WorkerTasks.push(task);
}
function finishWorkerTask(task) {
task.finish();
var i = WorkerTasks.indexOf(task);
WorkerTasks.splice(i, 1);
}
2013-02-07 08:19:29 +09:00
function loadDocument(recoveryMode) {
var loadDocumentCapability = createPromiseCapability();
2013-02-07 08:19:29 +09:00
var parseSuccess = function parseSuccess() {
var numPagesPromise = pdfManager.ensureDoc('numPages');
var fingerprintPromise = pdfManager.ensureDoc('fingerprint');
2013-02-07 08:19:29 +09:00
var encryptedPromise = pdfManager.ensureXRef('encrypt');
2014-05-08 04:06:44 +09:00
Promise.all([numPagesPromise, fingerprintPromise,
encryptedPromise]).then(function onDocReady(results) {
2013-02-07 08:19:29 +09:00
var doc = {
numPages: results[0],
fingerprint: results[1],
encrypted: !!results[2],
2013-02-07 08:19:29 +09:00
};
loadDocumentCapability.resolve(doc);
},
parseFailure);
2013-02-07 08:19:29 +09:00
};
var parseFailure = function parseFailure(e) {
loadDocumentCapability.reject(e);
2013-02-07 08:19:29 +09:00
};
pdfManager.ensureDoc('checkHeader', []).then(function() {
pdfManager.ensureDoc('parseStartXRef', []).then(function() {
pdfManager.ensureDoc('parse', [recoveryMode]).then(
2014-03-23 04:59:16 +09:00
parseSuccess, parseFailure);
}, parseFailure);
}, parseFailure);
2013-02-07 08:19:29 +09:00
return loadDocumentCapability.promise;
2013-02-07 08:19:29 +09:00
}
function getPdfManager(data, evaluatorOptions) {
var pdfManagerCapability = createPromiseCapability();
var pdfManager;
2013-02-07 08:19:29 +09:00
var source = data.source;
if (source.data) {
2013-05-31 06:54:49 +09:00
try {
pdfManager = new LocalPdfManager(docId, source.data, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
2013-05-31 06:54:49 +09:00
} catch (ex) {
pdfManagerCapability.reject(ex);
2013-05-31 06:54:49 +09:00
}
return pdfManagerCapability.promise;
}
var pdfStream;
try {
if (source.chunkedViewerLoading) {
pdfStream = new PDFWorkerStream(source, handler);
} else {
assert(PDFNetworkStream, './network module is not loaded');
pdfStream = new PDFNetworkStream(data);
2013-05-31 06:54:49 +09:00
}
} catch (ex) {
pdfManagerCapability.reject(ex);
return pdfManagerCapability.promise;
2013-02-07 08:19:29 +09:00
}
var fullRequest = pdfStream.getFullReader();
fullRequest.headersReady.then(function () {
if (!fullRequest.isStreamingSupported ||
!fullRequest.isRangeSupported) {
// If stream or range are disabled, it's our only way to report
// loading progress.
fullRequest.onProgress = function (evt) {
handler.send('DocProgress', {
loaded: evt.loaded,
total: evt.total
});
};
}
if (!fullRequest.isRangeSupported) {
return;
}
// We don't need auto-fetch when streaming is enabled.
var disableAutoFetch = source.disableAutoFetch ||
fullRequest.isStreamingSupported;
pdfManager = new NetworkPdfManager(docId, pdfStream, {
msgHandler: handler,
url: source.url,
password: source.password,
length: fullRequest.contentLength,
disableAutoFetch,
rangeChunkSize: source.rangeChunkSize
}, evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
cancelXHRs = null;
}).catch(function (reason) {
pdfManagerCapability.reject(reason);
cancelXHRs = null;
});
2013-02-07 08:19:29 +09:00
var cachedChunks = [], loaded = 0;
var flushChunks = function () {
var pdfFile = arraysToBytes(cachedChunks);
if (source.length && pdfFile.length !== source.length) {
warn('reported HTTP length is different from actual');
}
// the data is array, instantiating directly from it
try {
pdfManager = new LocalPdfManager(docId, pdfFile, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
cachedChunks = [];
};
var readPromise = new Promise(function (resolve, reject) {
var readChunk = function (chunk) {
2013-05-31 06:54:49 +09:00
try {
ensureNotTerminated();
if (chunk.done) {
if (!pdfManager) {
flushChunks();
}
cancelXHRs = null;
return;
}
var data = chunk.value;
loaded += arrayByteLength(data);
if (!fullRequest.isStreamingSupported) {
handler.send('DocProgress', {
loaded,
total: Math.max(loaded, fullRequest.contentLength || 0)
});
}
if (pdfManager) {
pdfManager.sendProgressiveData(data);
} else {
cachedChunks.push(data);
}
2013-02-07 08:19:29 +09:00
fullRequest.read().then(readChunk, reject);
} catch (e) {
reject(e);
2013-02-07 08:19:29 +09:00
}
};
fullRequest.read().then(readChunk, reject);
});
readPromise.catch(function (e) {
pdfManagerCapability.reject(e);
cancelXHRs = null;
2013-02-07 08:19:29 +09:00
});
cancelXHRs = function () {
pdfStream.cancelAllRequests('abort');
};
return pdfManagerCapability.promise;
2012-06-24 04:48:33 +09:00
}
function setupDoc(data) {
function onSuccess(doc) {
ensureNotTerminated();
2013-02-07 08:19:29 +09:00
handler.send('GetDoc', { pdfInfo: doc });
}
2013-02-07 08:19:29 +09:00
function onFailure(e) {
2013-02-07 08:19:29 +09:00
if (e instanceof PasswordException) {
var task = new WorkerTask('PasswordException: response ' + e.code);
startWorkerTask(task);
handler.sendWithPromise('PasswordRequest', e).then(function (data) {
finishWorkerTask(task);
pdfManager.updatePassword(data.password);
pdfManagerReady();
}).catch(function (ex) {
finishWorkerTask(task);
handler.send('PasswordException', ex);
}.bind(null, e));
2013-02-07 08:19:29 +09:00
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', e);
2013-02-07 08:19:29 +09:00
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', e);
} else if (e instanceof UnexpectedResponseException) {
handler.send('UnexpectedResponse', e);
2013-02-07 08:19:29 +09:00
} else {
handler.send('UnknownError',
new UnknownErrorException(e.message, e.toString()));
2013-02-07 08:19:29 +09:00
}
}
function pdfManagerReady() {
ensureNotTerminated();
loadDocument(false).then(onSuccess, function loadFailure(ex) {
ensureNotTerminated();
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
onFailure(ex);
return;
}
pdfManager.requestLoadedStream();
pdfManager.onLoadedStream().then(function() {
ensureNotTerminated();
loadDocument(true).then(onSuccess, onFailure);
});
}, onFailure);
}
2013-02-07 08:19:29 +09:00
ensureNotTerminated();
var evaluatorOptions = {
forceDataSchema: data.disableCreateObjectURL,
maxImageSize: data.maxImageSize === undefined ? -1 : data.maxImageSize,
disableFontFace: data.disableFontFace,
nativeImageDecoderSupport: data.nativeImageDecoderSupport,
ignoreErrors: data.ignoreErrors,
};
getPdfManager(data, evaluatorOptions).then(function (newPdfManager) {
if (terminated) {
// We were in a process of setting up the manager, but it got
// terminated in the middle.
newPdfManager.terminate();
throw new Error('Worker was terminated');
}
pdfManager = newPdfManager;
handler.send('PDFManagerReady', null);
pdfManager.onLoadedStream().then(function(stream) {
handler.send('DataLoaded', { length: stream.bytes.byteLength });
});
}).then(pdfManagerReady, onFailure);
}
2012-04-12 07:52:15 +09:00
handler.on('GetPage', function wphSetupGetPage(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
2013-02-07 08:19:29 +09:00
var rotatePromise = pdfManager.ensure(page, 'rotate');
var refPromise = pdfManager.ensure(page, 'ref');
var userUnitPromise = pdfManager.ensure(page, 'userUnit');
2013-02-07 08:19:29 +09:00
var viewPromise = pdfManager.ensure(page, 'view');
return Promise.all([
rotatePromise, refPromise, userUnitPromise, viewPromise
]).then(function(results) {
return {
2013-02-07 08:19:29 +09:00
rotate: results[0],
ref: results[1],
userUnit: results[2],
view: results[3]
2013-02-07 08:19:29 +09:00
};
});
});
});
2011-10-09 17:37:53 +09:00
handler.on('GetPageIndex', function wphSetupGetPageIndex(data) {
var ref = new Ref(data.ref.num, data.ref.gen);
var catalog = pdfManager.pdfDocument.catalog;
return catalog.getPageIndex(ref);
});
2013-02-07 08:19:29 +09:00
handler.on('GetDestinations',
function wphSetupGetDestinations(data) {
return pdfManager.ensureCatalog('destinations');
2013-02-07 08:19:29 +09:00
}
);
handler.on('GetDestination',
function wphSetupGetDestination(data) {
return pdfManager.ensureCatalog('getDestination', [data.id]);
}
);
handler.on('GetPageLabels',
function wphSetupGetPageLabels(data) {
return pdfManager.ensureCatalog('pageLabels');
}
);
handler.on('GetAttachments',
function wphSetupGetAttachments(data) {
return pdfManager.ensureCatalog('attachments');
}
);
2014-05-08 04:15:34 +09:00
handler.on('GetJavaScript',
function wphSetupGetJavaScript(data) {
return pdfManager.ensureCatalog('javaScript');
2014-05-08 04:15:34 +09:00
}
);
2014-05-08 04:06:44 +09:00
handler.on('GetOutline',
function wphSetupGetOutline(data) {
return pdfManager.ensureCatalog('documentOutline');
2014-05-08 04:06:44 +09:00
}
);
handler.on('GetMetadata',
function wphSetupGetMetadata(data) {
return Promise.all([pdfManager.ensureDoc('documentInfo'),
pdfManager.ensureCatalog('metadata')]);
}
);
handler.on('GetData', function wphSetupGetData(data) {
2013-02-07 08:19:29 +09:00
pdfManager.requestLoadedStream();
return pdfManager.onLoadedStream().then(function(stream) {
return stream.bytes;
2013-02-07 08:19:29 +09:00
});
});
handler.on('GetStats',
function wphSetupGetStats(data) {
return pdfManager.pdfDocument.xref.stats;
}
);
handler.on('GetAnnotations', function wphSetupGetAnnotations(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
return pdfManager.ensure(page, 'getAnnotationsData', [data.intent]);
});
});
handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
2015-10-21 10:50:32 +09:00
var pageIndex = data.pageIndex;
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('RenderPageRequest: page ' + pageIndex);
startWorkerTask(task);
2015-10-21 10:50:32 +09:00
var pageNum = pageIndex + 1;
2013-02-07 08:19:29 +09:00
var start = Date.now();
// Pre compile the pdf page and fetch the fonts/images.
Change the signatures of the `PartialEvaluator` "constructor" and its `getOperatorList`/`getTextContent` methods to take parameter objects Currently these methods accept a large number of parameters, which creates quite unwieldy call-sites. When invoking them, you have to remember not only what arguments to supply, but also the correct order, to avoid runtime errors. Furthermore, since some of the parameters are optional, you also have to remember to pass e.g. `null` or `undefined` for those ones. Also, adding new parameters to these methods (which happens occasionally), often becomes unnecessarily tedious (based on personal experience). Please note that I do *not* think that we need/should convert *every* single method in `evaluator.js` (or elsewhere in `/core` files) to take parameter objects. However, in my opinion, once a method starts relying on approximately five parameter (or even more), passing them in individually becomes quite cumbersome. With these changes, I obviously needed to update the `evaluator_spec.js` unit-tests. The main change there, except the new method signatures[1], is that it's now re-using *one* `PartialEvalutor` instance, since I couldn't see any compelling reason for creating a new one in every single test. *Note:* If this patch is accepted, my intention is to (time permitting) see if it makes sense to convert additional methods in `evaluator.js` (and other `/core` files) in a similar fashion, but I figured that it'd be a good idea to limit the initial scope somewhat. --- [1] A fun fact here, note how the `PartialEvaluator` signature used in `evaluator_spec.js` wasn't even correct in the current `master`.
2017-04-30 06:13:51 +09:00
page.getOperatorList({
handler,
task,
intent: data.intent,
renderInteractiveForms: data.renderInteractiveForms,
}).then(function(operatorList) {
2015-10-21 10:50:32 +09:00
finishWorkerTask(task);
info('page=' + pageNum + ' - getOperatorList: time=' +
(Date.now() - start) + 'ms, len=' + operatorList.totalLength);
2013-02-07 08:19:29 +09:00
}, function(e) {
2015-10-21 10:50:32 +09:00
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
// For compatibility with older behavior, generating unknown
// unsupported feature notification on errors.
handler.send('UnsupportedFeature',
{featureId: UNSUPPORTED_FEATURES.unknown});
2013-02-07 08:19:29 +09:00
var minimumStackMessage =
2014-03-23 04:59:16 +09:00
'worker.js: while trying to getPage() and getOperatorList()';
2013-02-07 08:19:29 +09:00
var wrappedException;
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage
};
}
handler.send('PageError', {
pageNum,
error: wrappedException,
intent: data.intent
2013-02-07 08:19:29 +09:00
});
});
});
}, this);
2011-10-09 17:37:53 +09:00
handler.on('GetTextContent', function wphExtractText(data) {
2015-10-21 10:50:32 +09:00
var pageIndex = data.pageIndex;
return pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
2015-10-21 10:50:32 +09:00
var pageNum = pageIndex + 1;
2013-02-07 08:19:29 +09:00
var start = Date.now();
Change the signatures of the `PartialEvaluator` "constructor" and its `getOperatorList`/`getTextContent` methods to take parameter objects Currently these methods accept a large number of parameters, which creates quite unwieldy call-sites. When invoking them, you have to remember not only what arguments to supply, but also the correct order, to avoid runtime errors. Furthermore, since some of the parameters are optional, you also have to remember to pass e.g. `null` or `undefined` for those ones. Also, adding new parameters to these methods (which happens occasionally), often becomes unnecessarily tedious (based on personal experience). Please note that I do *not* think that we need/should convert *every* single method in `evaluator.js` (or elsewhere in `/core` files) to take parameter objects. However, in my opinion, once a method starts relying on approximately five parameter (or even more), passing them in individually becomes quite cumbersome. With these changes, I obviously needed to update the `evaluator_spec.js` unit-tests. The main change there, except the new method signatures[1], is that it's now re-using *one* `PartialEvalutor` instance, since I couldn't see any compelling reason for creating a new one in every single test. *Note:* If this patch is accepted, my intention is to (time permitting) see if it makes sense to convert additional methods in `evaluator.js` (and other `/core` files) in a similar fashion, but I figured that it'd be a good idea to limit the initial scope somewhat. --- [1] A fun fact here, note how the `PartialEvaluator` signature used in `evaluator_spec.js` wasn't even correct in the current `master`.
2017-04-30 06:13:51 +09:00
return page.extractTextContent({
handler,
task,
normalizeWhitespace: data.normalizeWhitespace,
combineTextItems: data.combineTextItems,
}).then(function(textContent) {
2015-10-21 10:50:32 +09:00
finishWorkerTask(task);
Change the signatures of the `PartialEvaluator` "constructor" and its `getOperatorList`/`getTextContent` methods to take parameter objects Currently these methods accept a large number of parameters, which creates quite unwieldy call-sites. When invoking them, you have to remember not only what arguments to supply, but also the correct order, to avoid runtime errors. Furthermore, since some of the parameters are optional, you also have to remember to pass e.g. `null` or `undefined` for those ones. Also, adding new parameters to these methods (which happens occasionally), often becomes unnecessarily tedious (based on personal experience). Please note that I do *not* think that we need/should convert *every* single method in `evaluator.js` (or elsewhere in `/core` files) to take parameter objects. However, in my opinion, once a method starts relying on approximately five parameter (or even more), passing them in individually becomes quite cumbersome. With these changes, I obviously needed to update the `evaluator_spec.js` unit-tests. The main change there, except the new method signatures[1], is that it's now re-using *one* `PartialEvalutor` instance, since I couldn't see any compelling reason for creating a new one in every single test. *Note:* If this patch is accepted, my intention is to (time permitting) see if it makes sense to convert additional methods in `evaluator.js` (and other `/core` files) in a similar fashion, but I figured that it'd be a good idea to limit the initial scope somewhat. --- [1] A fun fact here, note how the `PartialEvaluator` signature used in `evaluator_spec.js` wasn't even correct in the current `master`.
2017-04-30 06:13:51 +09:00
info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms');
return textContent;
2015-10-21 10:50:32 +09:00
}, function (reason) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
throw reason;
2013-02-07 08:19:29 +09:00
});
});
2011-12-11 08:24:54 +09:00
});
handler.on('Cleanup', function wphCleanup(data) {
2014-05-10 10:21:15 +09:00
return pdfManager.cleanup();
});
handler.on('Terminate', function wphTerminate(data) {
terminated = true;
if (pdfManager) {
pdfManager.terminate();
pdfManager = null;
}
if (cancelXHRs) {
cancelXHRs();
}
2015-10-21 10:50:32 +09:00
var waitOn = [];
WorkerTasks.forEach(function (task) {
waitOn.push(task.finished);
task.terminate();
});
return Promise.all(waitOn).then(function () {
// Notice that even if we destroying handler, resolved response promise
// must be sent back.
handler.destroy();
handler = null;
});
});
handler.on('Ready', function wphReady(data) {
setupDoc(docParams);
docParams = null; // we don't need docParams anymore -- saving memory.
});
return workerHandlerName;
},
initializeFromPort(port) {
var handler = new MessageHandler('worker', 'main', port);
WorkerMessageHandler.setup(handler, port);
handler.send('ready', null);
},
2011-10-09 17:37:53 +09:00
};
function isMessagePort(maybePort) {
return typeof maybePort.postMessage === 'function' &&
('onmessage' in maybePort);
2015-12-17 09:37:43 +09:00
}
// Worker thread (and not node.js)?
if (typeof window === 'undefined' && !isNodeJS() &&
typeof self !== 'undefined' && isMessagePort(self)) {
WorkerMessageHandler.initializeFromPort(self);
2011-10-26 07:43:41 +09:00
}
export {
setPDFNetworkStreamClass,
WorkerTask,
WorkerMessageHandler,
};