Make getOperatorList() calls independent and merge queues at end

This commit is contained in:
Mack Duan 2013-04-08 15:14:56 -07:00
parent 6b2c6fc223
commit dbccbaaa27
10 changed files with 1208 additions and 753 deletions

View File

@ -18,7 +18,7 @@
isArrayBuffer, isDict, isName, isStream, isString, Lexer,
Linearization, NullStream, PartialEvaluator, shadow, Stream,
StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef,
MissingDataException */
MissingDataException, PDFJS */
'use strict';
@ -60,7 +60,8 @@ var Page = (function PageClosure() {
return appearance;
}
function Page(xref, pageIndex, pageDict, ref) {
function Page(pdfManager, xref, pageIndex, pageDict, ref) {
this.pdfManager = pdfManager;
this.pageIndex = pageIndex;
this.pageDict = pageDict;
this.xref = xref;
@ -146,28 +147,71 @@ var Page = (function PageClosure() {
}
return content;
},
getOperatorList: function Page_getOperatorList(handler, dependency) {
var xref = this.xref;
var contentStream = this.getContentStream();
var resources = this.resources;
var pe = this.pe = new PartialEvaluator(
xref, handler, this.pageIndex,
'p' + this.pageIndex + '_');
getOperatorList: function Page_getOperatorList(handler) {
var self = this;
var promise = new PDFJS.Promise();
var list = pe.getOperatorList(contentStream, resources, dependency);
var pageListPromise = new PDFJS.Promise();
var annotationListPromise = new PDFJS.Promise();
var annotations = this.getAnnotationsForDraw();
var annotationEvaluator = new PartialEvaluator(
xref, handler, this.pageIndex,
'p' + this.pageIndex + '_annotation');
var annotationsList = annotationEvaluator.getAnnotationsOperatorList(
annotations, dependency);
var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]);
var resourcesPromise = pdfManager.ensure(this, 'resources');
var dataPromises = PDFJS.Promise.all(
[contentStreamPromise, resourcesPromise]);
dataPromises.then(function(data) {
var contentStream = data[0];
var resources = data[1];
var pe = self.pe = new PartialEvaluator(
self.xref, handler, self.pageIndex,
'p' + self.pageIndex + '_');
Util.concatenateToArray(list.fnArray, annotationsList.fnArray);
Util.concatenateToArray(list.argsArray, annotationsList.argsArray);
pdfManager.ensure(pe, 'getOperatorList',
[contentStream, resources]).then(
function(opListPromise) {
opListPromise.then(function(data) {
pageListPromise.resolve(data);
});
}
);
});
pdfManager.ensure(this, 'getAnnotationsForDraw', []).then(
function(annotations) {
var annotationEvaluator = new PartialEvaluator(
self.xref, handler, self.pageIndex,
'p' + self.pageIndex + '_annotation');
pdfManager.ensure(annotationEvaluator, 'getAnnotationsOperatorList',
[annotations]).then(
function(opListPromise) {
opListPromise.then(function(data) {
annotationListPromise.resolve(data);
});
}
);
}
);
PDFJS.Promise.all([pageListPromise, annotationListPromise]).then(
function(datas) {
var pageData = datas[0];
var pageQueue = pageData.queue;
var annotationData = datas[1];
var annotationQueue = annotationData.queue;
Util.concatenateToArray(pageQueue.fnArray, annotationQueue.fnArray);
Util.concatenateToArray(pageQueue.argsArray,
annotationQueue.argsArray);
PartialEvaluator.optimizeQueue(pageQueue);
Util.extendObj(pageData.dependencies, annotationData.dependencies);
promise.resolve(pageData);
}
);
return promise;
pe.optimizeQueue(list);
return list;
},
extractTextContent: function Page_extractTextContent() {
var handler = {
@ -175,14 +219,39 @@ var Page = (function PageClosure() {
send: function nullHandlerSend() {}
};
var xref = this.xref;
var contentStream = this.getContentStream();
var resources = xref.fetchIfRef(this.resources);
var self = this;
var pe = new PartialEvaluator(
xref, handler, this.pageIndex,
'p' + this.pageIndex + '_');
return pe.getTextContent(contentStream, resources);
var textContentPromise = new PDFJS.Promise();
var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]);
var resourcesPromise = new PDFJS.Promise();
pdfManager.ensure(this, 'resources').then(function(resources) {
pdfManager.ensure(self.xref, 'fetchIfRef', [resources]).then(
function(resources) {
resourcesPromise.resolve(resources);
}
);
});
var dataPromises = PDFJS.Promise.all([contentStreamPromise,
resourcesPromise]);
dataPromises.then(function(data) {
var contentStream = data[0];
var resources = data[1];
var pe = new PartialEvaluator(
self.xref, handler, self.pageIndex,
'p' + self.pageIndex + '_');
pe.getTextContent(contentStream, resources).then(function(bidiTexts) {
textContentPromise.resolve({
bidiTexts: bidiTexts
});
});
});
return textContentPromise;
},
getLinks: function Page_getLinks() {
var links = [];
@ -422,17 +491,18 @@ var Page = (function PageClosure() {
* `PDFDocument` objects on the main thread created.
*/
var PDFDocument = (function PDFDocumentClosure() {
function PDFDocument(arg, password) {
function PDFDocument(pdfManager, arg, password) {
if (isStream(arg))
init.call(this, arg, password);
init.call(this, pdfManager, arg, password);
else if (isArrayBuffer(arg))
init.call(this, new Stream(arg), password);
init.call(this, pdfManager, new Stream(arg), password);
else
error('PDFDocument: Unknown argument type');
}
function init(stream, password) {
function init(pdfManager, stream, password) {
assertWellFormed(stream.length > 0, 'stream must have data');
this.pdfManager = pdfManager;
this.stream = stream;
var xref = new XRef(this.stream, password);
this.xref = xref;
@ -576,7 +646,7 @@ var PDFDocument = (function PDFDocumentClosure() {
},
setup: function PDFDocument_setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.xref);
this.catalog = new Catalog(this.pdfManager, this.xref);
},
get numPages() {
var linearization = this.linearization;

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,7 @@
InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef,
isStream, JpegStream, Lexer, log, Page, Parser, Promise, shadow,
stringToPDFString, stringToUTF8String, warn, isString, assert, PDFJS,
MissingDataException, XRefParseException */
MissingDataException, XRefParseException, Stream */
'use strict';
@ -151,7 +151,8 @@ var RefSet = (function RefSetClosure() {
})();
var Catalog = (function CatalogClosure() {
function Catalog(xref) {
function Catalog(pdfManager, xref) {
this.pdfManager = pdfManager;
this.xref = xref;
this.catDict = xref.getCatalogObj();
assertWellFormed(isDict(this.catDict),
@ -363,7 +364,8 @@ var Catalog = (function CatalogClosure() {
var kid = this.xref.fetch(kidRef);
if (isDict(kid, 'Page') || (isDict(kid) && !kid.has('Kids'))) {
var pageIndex = this.currPageIndex++;
var page = new Page(this.xref, pageIndex, kid, kidRef);
var page = new Page(this.pdfManager, this.xref, pageIndex, kid,
kidRef);
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new PDFJS.Promise();
}
@ -832,10 +834,16 @@ var XRef = (function XRefClosure() {
fetch: function XRef_fetch(ref, suppressEncryption) {
assertWellFormed(isRef(ref), 'ref object is not a reference');
var num = ref.num;
if (num in this.cache)
return this.cache[num];
var e;
if (num in this.cache) {
e = this.cache[num];
if (e instanceof Stream) {
return e.makeSubStream(e.start, e.length, e.dict);
}
return e;
}
var e = this.getEntry(num);
e = this.getEntry(num);
// the referenced entry can be free
if (e === null)
@ -877,9 +885,16 @@ var XRef = (function XRefClosure() {
} else {
e = parser.getObj();
}
// Don't cache streams since they are mutable (except images).
if (!isStream(e) || e instanceof JpegStream)
if (!isStream(e) || e instanceof JpegStream) {
this.cache[num] = e;
} else if (e instanceof Stream) {
e = e.makeSubStream(e.start, e.length, e.dict);
this.cache[num] = e;
} else if ('readBlock' in e) {
e.getBytes();
e = e.makeSubStream(0, e.bufferLength, e.dict);
this.cache[num] = e;
}
return e;
}

View File

@ -36,6 +36,21 @@ var Parser = (function ParserClosure() {
}
Parser.prototype = {
saveState: function Parser_saveState() {
this.state = {
buf1: this.buf1,
buf2: this.buf2,
streamPos: this.lexer.stream.pos
};
},
restoreState: function Parser_restoreState() {
var state = this.state;
this.buf1 = state.buf1;
this.buf2 = state.buf2;
this.lexer.stream.pos = state.streamPos;
},
refill: function Parser_refill() {
this.buf1 = this.lexer.getObj();
this.buf2 = this.lexer.getObj();

View File

@ -67,7 +67,7 @@ var BasePdfManager = (function BasePdfManagerClosure() {
var LocalPdfManager = (function LocalPdfManagerClosure() {
function LocalPdfManager(data, password) {
var stream = new Stream(data);
this.pdfModel = new PDFDocument(stream, password);
this.pdfModel = new PDFDocument(this, stream, password);
this.loadedStream = new PDFJS.Promise();
this.loadedStream.resolve(stream);
}
@ -124,13 +124,14 @@ var NetworkPdfManager = (function NetworkPdfManagerClosure() {
this.streamManager = new ChunkedStreamManager(args.length, CHUNK_SIZE,
args.url, params);
this.pdfModel = new PDFDocument(this.streamManager.getStream(),
this.pdfModel = new PDFDocument(this, this.streamManager.getStream(),
args.password);
}
NetworkPdfManager.prototype = Object.create(BasePdfManager.prototype);
NetworkPdfManager.prototype.constructor = NetworkPdfManager;
// FIXME(mack): Make ensure() use array for all arguments
NetworkPdfManager.prototype.ensure =
function NetworkPdfManager_ensure(obj, prop) {
var promise = new PDFJS.Promise();

View File

@ -26,7 +26,7 @@ var Stream = (function StreamClosure() {
this.start = start || 0;
this.pos = this.start;
this.end = (start + length) || this.bytes.length;
this.dict = dict;
this.parameters = this.dict = dict;
}
// required methods for a stream. if a particular stream does not
@ -645,6 +645,10 @@ var PredictorStream = (function PredictorStreamClosure() {
var colors = this.colors;
var rawBytes = this.stream.getBytes(rowBytes);
this.eof = !rawBytes.length;
if (this.eof) {
return;
}
var inbuf = 0, outbuf = 0;
var inbits = 0, outbits = 0;
@ -705,6 +709,10 @@ var PredictorStream = (function PredictorStreamClosure() {
var predictor = this.stream.getByte();
var rawBytes = this.stream.getBytes(rowBytes);
this.eof = !rawBytes.length;
if (this.eof) {
return;
}
var bufferLength = this.bufferLength;
var buffer = this.ensureBuffer(bufferLength + rowBytes);
@ -853,6 +861,7 @@ var JpegStream = (function JpegStreamClosure() {
var data = jpegImage.getData(width, height);
this.buffer = data;
this.bufferLength = data.length;
this.eof = true;
} catch (e) {
error('JPEG error: ' + e);
}
@ -988,6 +997,7 @@ var JpxStream = (function JpxStreamClosure() {
this.buffer = data;
this.bufferLength = data.length;
this.eof = true;
};
JpxStream.prototype.getChar = function JpxStream_getChar() {
error('internal error: getChar is not valid on JpxStream');
@ -1032,6 +1042,7 @@ var Jbig2Stream = (function Jbig2StreamClosure() {
this.buffer = data;
this.bufferLength = dataLength;
this.eof = true;
};
Jbig2Stream.prototype.getChar = function Jbig2Stream_getChar() {
error('internal error: getChar is not valid on Jbig2Stream');

View File

@ -397,8 +397,19 @@ var Util = PDFJS.Util = (function UtilClosure() {
return num < 0 ? -1 : 1;
};
// TODO(mack): Rename appendToArray
Util.concatenateToArray = function concatenateToArray(arr1, arr2) {
return Array.prototype.push.apply(arr1, arr2);
Array.prototype.push.apply(arr1, arr2);
};
Util.prependToArray = function concatenateToArray(arr1, arr2) {
Array.prototype.unshift.apply(arr1, arr2);
};
Util.extendObj = function extendObj(obj1, obj2) {
for (var key in obj2) {
obj1[key] = obj2[key];
}
};
return Util;

View File

@ -285,6 +285,7 @@ var WorkerMessageHandler = {
};
getPdfManager(data).then(function() {
globalScope.pdfManager = pdfManager;
loadDocument(false).then(onSuccess, function(ex) {
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
@ -358,10 +359,11 @@ var WorkerMessageHandler = {
var pageNum = data.pageIndex + 1;
var start = Date.now();
var dependency = [];
// Pre compile the pdf page and fetch the fonts/images.
pdfManager.ensure(page, 'getOperatorList', handler,
dependency).then(function(operatorList) {
page.getOperatorList(handler).then(function(opListData) {
var operatorList = opListData.queue;
var dependency = Object.keys(opListData.dependencies);
// The following code does quite the same as
// Page.prototype.startRendering, but stops at one point and sends the
@ -420,8 +422,7 @@ var WorkerMessageHandler = {
pdfManager.getPage(data.pageIndex).then(function(page) {
var pageNum = data.pageIndex + 1;
var start = Date.now();
pdfManager.ensure(page,
'extractTextContent').then(function(textContent) {
page.extractTextContent().then(function(textContent) {
promise.resolve(textContent);
log('text indexing: page=%d - time=%dms', pageNum,
Date.now() - start);

View File

@ -158,7 +158,6 @@ class TestHandlerBase(BaseHTTPRequestHandler):
elif v[0] == errno.EPIPE:
print 'Detected remote peer disconnected'
elif v[0] == 10053:
# FIXME(mack): Address this issue
print 'An established connection was aborted by the' \
' software in your host machine'
else:

View File

@ -33,23 +33,27 @@ describe('evaluator', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('qTT');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(1);
expect(result.fnArray[0]).toEqual('save');
expect(result.argsArray[0].length).toEqual(0);
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(1);
expect(result.fnArray[0]).toEqual('save');
expect(result.argsArray[0].length).toEqual(0);
});
});
it('should handle one operations', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('Q');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(1);
expect(result.fnArray[0]).toEqual('restore');
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(1);
expect(result.fnArray[0]).toEqual('restore');
});
});
it('should handle two glued operations', function() {
@ -58,25 +62,29 @@ describe('evaluator', function() {
var resources = new ResourcesMock();
resources.Res1 = {};
var stream = new StringStream('/Res1 DoQ');
var result = evaluator.getOperatorList(stream, resources, []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(2);
expect(result.fnArray[0]).toEqual('paintXObject');
expect(result.fnArray[1]).toEqual('restore');
var promise = evaluator.getOperatorList(stream, resources);
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(2);
expect(result.fnArray[0]).toEqual('paintXObject');
expect(result.fnArray[1]).toEqual('restore');
});
});
it('should handle tree glued operations', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('qqq');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('save');
expect(result.fnArray[1]).toEqual('save');
expect(result.fnArray[2]).toEqual('save');
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('save');
expect(result.fnArray[1]).toEqual('save');
expect(result.fnArray[2]).toEqual('save');
});
});
it('should handle three glued operations #2', function() {
@ -85,47 +93,53 @@ describe('evaluator', function() {
var resources = new ResourcesMock();
resources.Res1 = {};
var stream = new StringStream('B*Bf*');
var result = evaluator.getOperatorList(stream, resources, []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('eoFillStroke');
expect(result.fnArray[1]).toEqual('fillStroke');
expect(result.fnArray[2]).toEqual('eoFill');
var promise = evaluator.getOperatorList(stream, resources);
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('eoFillStroke');
expect(result.fnArray[1]).toEqual('fillStroke');
expect(result.fnArray[2]).toEqual('eoFill');
});
});
it('should handle glued operations and operands', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('q5 Ts');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(2);
expect(result.fnArray[0]).toEqual('save');
expect(result.fnArray[1]).toEqual('setTextRise');
expect(result.argsArray.length).toEqual(2);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(5);
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(2);
expect(result.fnArray[0]).toEqual('save');
expect(result.fnArray[1]).toEqual('setTextRise');
expect(result.argsArray.length).toEqual(2);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(5);
});
});
it('should handle glued operations and literals', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('trueifalserinullq');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('setFlatness');
expect(result.fnArray[1]).toEqual('setRenderingIntent');
expect(result.fnArray[2]).toEqual('save');
expect(result.argsArray.length).toEqual(3);
expect(result.argsArray[0].length).toEqual(1);
expect(result.argsArray[0][0]).toEqual(true);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(false);
expect(result.argsArray[2].length).toEqual(0);
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('setFlatness');
expect(result.fnArray[1]).toEqual('setRenderingIntent');
expect(result.fnArray[2]).toEqual('save');
expect(result.argsArray.length).toEqual(3);
expect(result.argsArray[0].length).toEqual(1);
expect(result.argsArray[0][0]).toEqual(true);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(false);
expect(result.argsArray[2].length).toEqual(0);
});
});
});
@ -134,31 +148,38 @@ describe('evaluator', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('5 1 d0');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(result.argsArray[0][0]).toEqual(5);
expect(result.argsArray[0][1]).toEqual(1);
expect(result.fnArray[0]).toEqual('setCharWidth');
console.log('here!');
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(result.argsArray[0][0]).toEqual(5);
expect(result.argsArray[0][1]).toEqual(1);
expect(result.fnArray[0]).toEqual('setCharWidth');
});
});
it('should execute if too many arguments', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('5 1 4 d0');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(result.argsArray[0][0]).toEqual(5);
expect(result.argsArray[0][1]).toEqual(1);
expect(result.argsArray[0][2]).toEqual(4);
expect(result.fnArray[0]).toEqual('setCharWidth');
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(result.argsArray[0][0]).toEqual(5);
expect(result.argsArray[0][1]).toEqual(1);
expect(result.argsArray[0][2]).toEqual(4);
expect(result.fnArray[0]).toEqual('setCharWidth');
});
});
it('should skip if too few arguments', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('5 d0');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(result.argsArray).toEqual([]);
expect(result.fnArray).toEqual([]);
var promise = evaluator.getOperatorList(stream, new ResourcesMock());
promise.then(function(data) {
var result = data.queue;
expect(result.argsArray).toEqual([]);
expect(result.fnArray).toEqual([]);
});
});
});
});