pdf.js/src/core/worker.js
Mukul Mishra 109106794d Adds Streams API support for networking task of PDF.js project.
network.js file moved to main thread and `PDFNetworkStream` implemented
at worker thread, that is used to ask for data whenever worker needs.
2017-07-28 02:32:30 +05:30

867 lines
26 KiB
JavaScript

/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
arrayByteLength, arraysToBytes, assert, createPromiseCapability, info,
InvalidPDFException, isNodeJS, MessageHandler, MissingPDFException,
PasswordException, setVerbosityLevel, UnexpectedResponseException,
UnknownErrorException, UNSUPPORTED_FEATURES, warn, XRefParseException
} from '../shared/util';
import { LocalPdfManager, NetworkPdfManager } from './pdf_manager';
import { Ref } from './primitives';
var WorkerTask = (function WorkerTaskClosure() {
function WorkerTask(name) {
this.name = name;
this.terminated = false;
this._capability = createPromiseCapability();
}
WorkerTask.prototype = {
get finished() {
return this._capability.promise;
},
finish() {
this._capability.resolve();
},
terminate() {
this.terminated = true;
},
ensureNotTerminated() {
if (this.terminated) {
throw new Error('Worker task was terminated');
}
},
};
return WorkerTask;
})();
if (typeof PDFJSDev === 'undefined' || !PDFJSDev.test('PRODUCTION')) {
/**
* Interface that represents PDF data transport. If possible, it allows
* progressively load entire or fragment of the PDF binary data.
*
* @interface
*/
function IPDFStream() {} // eslint-disable-line no-inner-declarations
IPDFStream.prototype = {
/**
* Gets a reader for the entire PDF data.
* @returns {IPDFStreamReader}
*/
getFullReader() {
return null;
},
/**
* Gets a reader for the range of the PDF data.
* @param {number} begin - the start offset of the data.
* @param {number} end - the end offset of the data.
* @returns {IPDFStreamRangeReader}
*/
getRangeReader(begin, end) {
return null;
},
/**
* Cancels all opened reader and closes all their opened requests.
* @param {Object} reason - the reason for cancelling
*/
cancelAllRequests(reason) {},
};
/**
* Interface for a PDF binary data reader.
*
* @interface
*/
function IPDFStreamReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamReader.prototype = {
/**
* Gets a promise that is resolved when the headers and other metadata of
* the PDF data stream are available.
* @returns {Promise}
*/
get headersReady() {
return null;
},
/**
* Gets PDF binary data length. It is defined after the headersReady promise
* is resolved.
* @returns {number} The data length (or 0 if unknown).
*/
get contentLength() {
return 0;
},
/**
* Gets ability of the stream to handle range requests. It is defined after
* the headersReady promise is resolved. Rejected when the reader is cancelled
* or an error occurs.
* @returns {boolean}
*/
get isRangeSupported() {
return false;
},
/**
* Gets ability of the stream to progressively load binary data. It is defined
* after the headersReady promise is resolved.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded and
* total properties.
*/
onProgress: null,
};
/**
* Interface for a PDF binary data fragment reader.
*
* @interface
*/
function IPDFStreamRangeReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamRangeReader.prototype = {
/**
* Gets ability of the stream to progressively load binary data.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded
* property.
*/
onProgress: null,
};
}
/** @implements {IPDFStream} */
var PDFWorkerStream = (function PDFWorkerStreamClosure() {
function PDFWorkerStream(msgHandler) {
this._msgHandler = msgHandler;
this._contentLength = null;
this._fullRequestReader = null;
this._rangeRequestReaders = [];
}
PDFWorkerStream.prototype = {
getFullReader() {
assert(!this._fullRequestReader);
this._fullRequestReader = new PDFWorkerStreamReader(this._msgHandler);
return this._fullRequestReader;
},
getRangeReader(begin, end) {
let reader = new PDFWorkerStreamRangeReader(begin, end, this._msgHandler);
this._rangeRequestReaders.push(reader);
return reader;
},
cancelAllRequests(reason) {
if (this._fullRequestReader) {
this._fullRequestReader.cancel(reason);
}
let readers = this._rangeRequestReaders.slice(0);
readers.forEach(function (reader) {
reader.cancel(reason);
});
},
};
/** @implements {IPDFStreamReader} */
function PDFWorkerStreamReader(msgHandler) {
this._msgHandler = msgHandler;
this._contentLength = null;
this._isRangeSupported = false;
this._isStreamingSupported = false;
let readableStream = this._msgHandler.sendWithStream('GetReader');
this._reader = readableStream.getReader();
this._headersReady = this._msgHandler.sendWithPromise('ReaderHeadersReady').
then((data) => {
this._isStreamingSupported = data.isStreamingSupported;
this._isRangeSupported = data.isRangeSupported;
this._contentLength = data.contentLength;
});
}
PDFWorkerStreamReader.prototype = {
get headersReady() {
return this._headersReady;
},
get contentLength() {
return this._contentLength;
},
get isStreamingSupported() {
return this._isStreamingSupported;
},
get isRangeSupported() {
return this._isRangeSupported;
},
read() {
return this._reader.read().then(function({ value, done, }) {
if (done) {
return { value: undefined, done: true, };
}
// `value` is wrapped into Uint8Array, we need to
// unwrap it to ArrayBuffer for further processing.
return { value: value.buffer, done: false, };
});
},
cancel(reason) {
this._reader.cancel(reason);
},
};
/** @implements {IPDFStreamRangeReader} */
function PDFWorkerStreamRangeReader(begin, end, msgHandler) {
this._msgHandler = msgHandler;
this.onProgress = null;
let readableStream = this._msgHandler.sendWithStream('GetRangeReader',
{ begin, end, });
this._reader = readableStream.getReader();
}
PDFWorkerStreamRangeReader.prototype = {
get isStreamingSupported() {
return false;
},
read() {
return this._reader.read().then(function({ value, done, }) {
if (done) {
return { value: undefined, done: true, };
}
return { value: value.buffer, done: false, };
});
},
cancel(reason) {
this._reader.cancel(reason);
},
};
return PDFWorkerStream;
})();
var WorkerMessageHandler = {
setup(handler, port) {
var testMessageProcessed = false;
handler.on('test', function wphSetupTest(data) {
if (testMessageProcessed) {
return; // we already processed 'test' message once
}
testMessageProcessed = true;
// check if Uint8Array can be sent to worker
if (!(data instanceof Uint8Array)) {
handler.send('test', 'main', false);
return;
}
// making sure postMessage transfers are working
var supportTransfers = data[0] === 255;
handler.postMessageTransfers = supportTransfers;
// check if the response property is supported by xhr
var xhr = new XMLHttpRequest();
var responseExists = 'response' in xhr;
// check if the property is actually implemented
try {
xhr.responseType; // eslint-disable-line no-unused-expressions
} catch (e) {
responseExists = false;
}
if (!responseExists) {
handler.send('test', false);
return;
}
handler.send('test', {
supportTypedArray: true,
supportTransfers,
});
});
handler.on('configure', function wphConfigure(data) {
setVerbosityLevel(data.verbosity);
});
handler.on('GetDocRequest', function wphSetupDoc(data) {
return WorkerMessageHandler.createDocumentHandler(data, port);
});
},
createDocumentHandler(docParams, port) {
// This context is actually holds references on pdfManager and handler,
// until the latter is destroyed.
var pdfManager;
var terminated = false;
var cancelXHRs = null;
var WorkerTasks = [];
var docId = docParams.docId;
var docBaseUrl = docParams.docBaseUrl;
var workerHandlerName = docParams.docId + '_worker';
var handler = new MessageHandler(workerHandlerName, docId, port);
// Ensure that postMessage transfers are correctly enabled/disabled,
// to prevent "DataCloneError" in older versions of IE (see issue 6957).
handler.postMessageTransfers = docParams.postMessageTransfers;
function ensureNotTerminated() {
if (terminated) {
throw new Error('Worker was terminated');
}
}
function startWorkerTask(task) {
WorkerTasks.push(task);
}
function finishWorkerTask(task) {
task.finish();
var i = WorkerTasks.indexOf(task);
WorkerTasks.splice(i, 1);
}
function loadDocument(recoveryMode) {
var loadDocumentCapability = createPromiseCapability();
var parseSuccess = function parseSuccess() {
var numPagesPromise = pdfManager.ensureDoc('numPages');
var fingerprintPromise = pdfManager.ensureDoc('fingerprint');
var encryptedPromise = pdfManager.ensureXRef('encrypt');
Promise.all([numPagesPromise, fingerprintPromise,
encryptedPromise]).then(function onDocReady(results) {
var doc = {
numPages: results[0],
fingerprint: results[1],
encrypted: !!results[2],
};
loadDocumentCapability.resolve(doc);
},
parseFailure);
};
var parseFailure = function parseFailure(e) {
loadDocumentCapability.reject(e);
};
pdfManager.ensureDoc('checkHeader', []).then(function() {
pdfManager.ensureDoc('parseStartXRef', []).then(function() {
pdfManager.ensureDoc('parse', [recoveryMode]).then(
parseSuccess, parseFailure);
}, parseFailure);
}, parseFailure);
return loadDocumentCapability.promise;
}
function getPdfManager(data, evaluatorOptions) {
var pdfManagerCapability = createPromiseCapability();
var pdfManager;
var source = data.source;
if (source.data) {
try {
pdfManager = new LocalPdfManager(docId, source.data, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
return pdfManagerCapability.promise;
}
var pdfStream, cachedChunks = [];
try {
pdfStream = new PDFWorkerStream(handler);
} catch (ex) {
pdfManagerCapability.reject(ex);
return pdfManagerCapability.promise;
}
var fullRequest = pdfStream.getFullReader();
fullRequest.headersReady.then(function () {
if (!fullRequest.isRangeSupported) {
return;
}
// We don't need auto-fetch when streaming is enabled.
var disableAutoFetch = source.disableAutoFetch ||
fullRequest.isStreamingSupported;
pdfManager = new NetworkPdfManager(docId, pdfStream, {
msgHandler: handler,
url: source.url,
password: source.password,
length: fullRequest.contentLength,
disableAutoFetch,
rangeChunkSize: source.rangeChunkSize,
}, evaluatorOptions, docBaseUrl);
// There may be a chance that `pdfManager` is not initialized
// for first few runs of `readchunk` block of code. Be sure
// to send all cached chunks, if any, to chunked_stream via
// pdf_manager.
for (let i = 0; i < cachedChunks.length; i++) {
pdfManager.sendProgressiveData(cachedChunks[i]);
}
cachedChunks = [];
pdfManagerCapability.resolve(pdfManager);
cancelXHRs = null;
}).catch(function (reason) {
pdfManagerCapability.reject(reason);
cancelXHRs = null;
});
var loaded = 0;
var flushChunks = function () {
var pdfFile = arraysToBytes(cachedChunks);
if (source.length && pdfFile.length !== source.length) {
warn('reported HTTP length is different from actual');
}
// the data is array, instantiating directly from it
try {
pdfManager = new LocalPdfManager(docId, pdfFile, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
cachedChunks = [];
};
var readPromise = new Promise(function (resolve, reject) {
var readChunk = function (chunk) {
try {
ensureNotTerminated();
if (chunk.done) {
if (!pdfManager) {
flushChunks();
}
cancelXHRs = null;
return;
}
var data = chunk.value;
loaded += arrayByteLength(data);
if (!fullRequest.isStreamingSupported) {
handler.send('DocProgress', {
loaded,
total: Math.max(loaded, fullRequest.contentLength || 0),
});
}
if (pdfManager) {
pdfManager.sendProgressiveData(data);
} else {
cachedChunks.push(data);
}
fullRequest.read().then(readChunk, reject);
} catch (e) {
reject(e);
}
};
fullRequest.read().then(readChunk, reject);
});
readPromise.catch(function (e) {
pdfManagerCapability.reject(e);
cancelXHRs = null;
});
cancelXHRs = function () {
pdfStream.cancelAllRequests('abort');
};
return pdfManagerCapability.promise;
}
function setupDoc(data) {
function onSuccess(doc) {
ensureNotTerminated();
handler.send('GetDoc', { pdfInfo: doc, });
}
function onFailure(e) {
ensureNotTerminated();
if (e instanceof PasswordException) {
var task = new WorkerTask('PasswordException: response ' + e.code);
startWorkerTask(task);
handler.sendWithPromise('PasswordRequest', e).then(function (data) {
finishWorkerTask(task);
pdfManager.updatePassword(data.password);
pdfManagerReady();
}).catch(function (ex) {
finishWorkerTask(task);
handler.send('PasswordException', ex);
}.bind(null, e));
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', e);
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', e);
} else if (e instanceof UnexpectedResponseException) {
handler.send('UnexpectedResponse', e);
} else {
handler.send('UnknownError',
new UnknownErrorException(e.message, e.toString()));
}
}
function pdfManagerReady() {
ensureNotTerminated();
loadDocument(false).then(onSuccess, function loadFailure(ex) {
ensureNotTerminated();
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
onFailure(ex);
return;
}
pdfManager.requestLoadedStream();
pdfManager.onLoadedStream().then(function() {
ensureNotTerminated();
loadDocument(true).then(onSuccess, onFailure);
});
}, onFailure);
}
ensureNotTerminated();
var evaluatorOptions = {
forceDataSchema: data.disableCreateObjectURL,
maxImageSize: data.maxImageSize === undefined ? -1 : data.maxImageSize,
disableFontFace: data.disableFontFace,
nativeImageDecoderSupport: data.nativeImageDecoderSupport,
ignoreErrors: data.ignoreErrors,
};
getPdfManager(data, evaluatorOptions).then(function (newPdfManager) {
if (terminated) {
// We were in a process of setting up the manager, but it got
// terminated in the middle.
newPdfManager.terminate();
throw new Error('Worker was terminated');
}
pdfManager = newPdfManager;
handler.send('PDFManagerReady', null);
pdfManager.onLoadedStream().then(function(stream) {
handler.send('DataLoaded', { length: stream.bytes.byteLength, });
});
}).then(pdfManagerReady, onFailure);
}
handler.on('GetPage', function wphSetupGetPage(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
var rotatePromise = pdfManager.ensure(page, 'rotate');
var refPromise = pdfManager.ensure(page, 'ref');
var userUnitPromise = pdfManager.ensure(page, 'userUnit');
var viewPromise = pdfManager.ensure(page, 'view');
return Promise.all([
rotatePromise, refPromise, userUnitPromise, viewPromise
]).then(function(results) {
return {
rotate: results[0],
ref: results[1],
userUnit: results[2],
view: results[3],
};
});
});
});
handler.on('GetPageIndex', function wphSetupGetPageIndex(data) {
var ref = new Ref(data.ref.num, data.ref.gen);
var catalog = pdfManager.pdfDocument.catalog;
return catalog.getPageIndex(ref);
});
handler.on('GetDestinations',
function wphSetupGetDestinations(data) {
return pdfManager.ensureCatalog('destinations');
}
);
handler.on('GetDestination',
function wphSetupGetDestination(data) {
return pdfManager.ensureCatalog('getDestination', [data.id]);
}
);
handler.on('GetPageLabels',
function wphSetupGetPageLabels(data) {
return pdfManager.ensureCatalog('pageLabels');
}
);
handler.on('GetPageMode', function wphSetupGetPageMode(data) {
return pdfManager.ensureCatalog('pageMode');
});
handler.on('GetAttachments',
function wphSetupGetAttachments(data) {
return pdfManager.ensureCatalog('attachments');
}
);
handler.on('GetJavaScript',
function wphSetupGetJavaScript(data) {
return pdfManager.ensureCatalog('javaScript');
}
);
handler.on('GetOutline',
function wphSetupGetOutline(data) {
return pdfManager.ensureCatalog('documentOutline');
}
);
handler.on('GetMetadata',
function wphSetupGetMetadata(data) {
return Promise.all([pdfManager.ensureDoc('documentInfo'),
pdfManager.ensureCatalog('metadata')]);
}
);
handler.on('GetData', function wphSetupGetData(data) {
pdfManager.requestLoadedStream();
return pdfManager.onLoadedStream().then(function(stream) {
return stream.bytes;
});
});
handler.on('GetStats',
function wphSetupGetStats(data) {
return pdfManager.pdfDocument.xref.stats;
}
);
handler.on('GetAnnotations', function wphSetupGetAnnotations(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
return pdfManager.ensure(page, 'getAnnotationsData', [data.intent]);
});
});
handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
var pageIndex = data.pageIndex;
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('RenderPageRequest: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
// Pre compile the pdf page and fetch the fonts/images.
page.getOperatorList({
handler,
task,
intent: data.intent,
renderInteractiveForms: data.renderInteractiveForms,
}).then(function(operatorList) {
finishWorkerTask(task);
info('page=' + pageNum + ' - getOperatorList: time=' +
(Date.now() - start) + 'ms, len=' + operatorList.totalLength);
}, function(e) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
// For compatibility with older behavior, generating unknown
// unsupported feature notification on errors.
handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown, });
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
var wrappedException;
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage,
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage,
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage,
};
}
handler.send('PageError', {
pageNum,
error: wrappedException,
intent: data.intent,
});
});
});
}, this);
handler.on('GetTextContent', function wphExtractText(data, sink) {
var pageIndex = data.pageIndex;
sink.onPull = function (desiredSize) { };
sink.onCancel = function (reason) { };
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
page.extractTextContent({
handler,
task,
sink,
normalizeWhitespace: data.normalizeWhitespace,
combineTextItems: data.combineTextItems,
}).then(function() {
finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms');
sink.close();
}, function (reason) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
sink.error(reason);
throw reason;
});
});
});
handler.on('Cleanup', function wphCleanup(data) {
return pdfManager.cleanup();
});
handler.on('Terminate', function wphTerminate(data) {
terminated = true;
if (pdfManager) {
pdfManager.terminate();
pdfManager = null;
}
if (cancelXHRs) {
cancelXHRs();
}
var waitOn = [];
WorkerTasks.forEach(function (task) {
waitOn.push(task.finished);
task.terminate();
});
return Promise.all(waitOn).then(function () {
// Notice that even if we destroying handler, resolved response promise
// must be sent back.
handler.destroy();
handler = null;
});
});
handler.on('Ready', function wphReady(data) {
setupDoc(docParams);
docParams = null; // we don't need docParams anymore -- saving memory.
});
return workerHandlerName;
},
initializeFromPort(port) {
var handler = new MessageHandler('worker', 'main', port);
WorkerMessageHandler.setup(handler, port);
handler.send('ready', null);
},
};
function isMessagePort(maybePort) {
return typeof maybePort.postMessage === 'function' &&
('onmessage' in maybePort);
}
// Worker thread (and not node.js)?
if (typeof window === 'undefined' && !isNodeJS() &&
typeof self !== 'undefined' && isMessagePort(self)) {
WorkerMessageHandler.initializeFromPort(self);
}
export {
WorkerTask,
WorkerMessageHandler,
};