fix stream reset, interrupting thread when pages are indexed

This commit is contained in:
notmasteryet 2011-12-11 18:14:52 -06:00
parent 94cc2cdb75
commit 4d44eb6184
2 changed files with 19 additions and 8 deletions

View File

@ -205,7 +205,7 @@ var Page = (function PageClosure() {
streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams);
} else if (isStream(content))
content.pos = 0;
content.reset();
var pe = this.pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_');
@ -236,7 +236,7 @@ var Page = (function PageClosure() {
streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams);
} else if (isStream(content))
content.pos = 0;
content.reset();
var pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_');

View File

@ -164,23 +164,34 @@ var WorkerMessageHandler = {
handler.on('extract_text', function wphExtractText() {
var numPages = pdfDoc.numPages;
var index = [];
for (var i = 0; i < numPages; i++) {
var start = Date.now();
var start = Date.now();
function indexPage(pageNum) {
if (pageNum > numPages) {
console.log('text indexing=: time=%dms', Date.now() - start);
handler.send('text_extracted', { index: index });
return;
}
var textContent = '';
try {
var page = pdfDoc.getPage(i + 1);
var page = pdfDoc.getPage(pageNum);
textContent = page.extractTextContent();
} catch (e) {
// Skip errored pages
}
index.push(textContent);
// processing one page, interrupting thread to process
// other requests
setTimeout(function extractTextNextPage() {
indexPage(pageNum + 1);
}, 0);
}
console.log('text indexing=: time=%dms', Date.now() - start);
handler.send('text_extracted', { index: index });
indexPage(1);
});
}
};