diff --git a/src/core/annotation.js b/src/core/annotation.js index d33cd0821..7db88e46d 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -455,8 +455,12 @@ var Annotation = (function AnnotationClosure() { return resourcesPromise.then((resources) => { var opList = new OperatorList(); opList.addOp(OPS.beginAnnotation, [data.rect, transform, matrix]); - return evaluator.getOperatorList(this.appearance, task, - resources, opList).then(() => { + return evaluator.getOperatorList({ + stream: this.appearance, + task, + resources, + operatorList: opList, + }).then(() => { opList.addOp(OPS.endAnnotation, []); this.appearance.reset(); return opList; @@ -755,8 +759,12 @@ var TextWidgetAnnotation = (function TextWidgetAnnotationClosure() { } var stream = new Stream(stringToBytes(this.data.defaultAppearance)); - return evaluator.getOperatorList(stream, task, this.fieldResources, - operatorList).then(function () { + return evaluator.getOperatorList({ + stream, + task, + resources: this.fieldResources, + operatorList, + }).then(function () { return operatorList; }); } diff --git a/src/core/document.js b/src/core/document.js index c338f7045..16615ad3d 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -233,10 +233,9 @@ var Page = (function PageClosure() { }); }, - getOperatorList(handler, task, intent, renderInteractiveForms) { - var pdfManager = this.pdfManager; - var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', - []); + getOperatorList({ handler, task, intent, renderInteractiveForms, }) { + var contentStreamPromise = this.pdfManager.ensure(this, + 'getContentStream'); var resourcesPromise = this.loadResources([ 'ExtGState', 'ColorSpace', @@ -248,12 +247,16 @@ var Page = (function PageClosure() { // Properties ]); - var partialEvaluator = new PartialEvaluator(pdfManager, this.xref, - handler, this.pageIndex, - this.idFactory, - this.fontCache, - this.builtInCMapCache, - this.evaluatorOptions); + var partialEvaluator = new PartialEvaluator({ + pdfManager: this.pdfManager, + xref: this.xref, + handler, + pageIndex: this.pageIndex, + idFactory: this.idFactory, + fontCache: this.fontCache, + builtInCMapCache: this.builtInCMapCache, + options: this.evaluatorOptions, + }); var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); var pageListPromise = dataPromises.then(([contentStream]) => { @@ -264,15 +267,19 @@ var Page = (function PageClosure() { pageIndex: this.pageIndex, intent, }); - return partialEvaluator.getOperatorList(contentStream, task, - this.resources, opList).then(function () { - return opList; - }); + return partialEvaluator.getOperatorList({ + stream: contentStream, + task, + resources: this.resources, + operatorList: opList, + }).then(function () { + return opList; + }); }); // Fetch the page's annotations and add their operator lists to the // page's operator list to render them. - var annotationsPromise = pdfManager.ensure(this, 'annotations'); + var annotationsPromise = this.pdfManager.ensure(this, 'annotations'); return Promise.all([pageListPromise, annotationsPromise]).then( function ([pageOpList, annotations]) { if (annotations.length === 0) { @@ -303,11 +310,10 @@ var Page = (function PageClosure() { }); }, - extractTextContent(handler, task, normalizeWhitespace, combineTextItems) { - var pdfManager = this.pdfManager; - var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', - []); - + extractTextContent({ handler, task, normalizeWhitespace, + combineTextItems, }) { + var contentStreamPromise = this.pdfManager.ensure(this, + 'getContentStream'); var resourcesPromise = this.loadResources([ 'ExtGState', 'XObject', @@ -316,19 +322,24 @@ var Page = (function PageClosure() { var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); return dataPromises.then(([contentStream]) => { - var partialEvaluator = new PartialEvaluator(pdfManager, this.xref, - handler, this.pageIndex, - this.idFactory, - this.fontCache, - this.builtInCMapCache, - this.evaluatorOptions); + var partialEvaluator = new PartialEvaluator({ + pdfManager: this.pdfManager, + xref: this.xref, + handler, + pageIndex: this.pageIndex, + idFactory: this.idFactory, + fontCache: this.fontCache, + builtInCMapCache: this.builtInCMapCache, + options: this.evaluatorOptions, + }); - return partialEvaluator.getTextContent(contentStream, - task, - this.resources, - /* stateManager = */ null, - normalizeWhitespace, - combineTextItems); + return partialEvaluator.getTextContent({ + stream: contentStream, + task, + resources: this.resources, + normalizeWhitespace, + combineTextItems, + }); }); }, diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 26022c596..f7014e509 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -109,7 +109,7 @@ var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph; var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode; var PartialEvaluator = (function PartialEvaluatorClosure() { - var DefaultPartialEvaluatorOptions = { + const DefaultPartialEvaluatorOptions = { forceDataSchema: false, maxImageSize: -1, disableFontFace: false, @@ -170,8 +170,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { cs.isDefaultDecode(dict.getArray('Decode', 'D')); }; - function PartialEvaluator(pdfManager, xref, handler, pageIndex, - idFactory, fontCache, builtInCMapCache, options) { + function PartialEvaluator({ pdfManager, xref, handler, pageIndex, idFactory, + fontCache, builtInCMapCache, options = null, }) { this.pdfManager = pdfManager; this.xref = xref; this.handler = handler; @@ -186,7 +186,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (cachedCMap) { return Promise.resolve(cachedCMap); } - return handler.sendWithPromise('FetchBuiltInCMap', { + return this.handler.sendWithPromise('FetchBuiltInCMap', { name, }).then((data) => { if (data.compressionType !== CMapCompressionType.NONE) { @@ -381,15 +381,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); - return this.getOperatorList(xobj, task, - (dict.get('Resources') || resources), - operatorList, initialState).then(function () { - operatorList.addOp(OPS.paintFormXObjectEnd, []); + return this.getOperatorList({ + stream: xobj, + task, + resources: dict.get('Resources') || resources, + operatorList, + initialState, + }).then(function () { + operatorList.addOp(OPS.paintFormXObjectEnd, []); - if (group) { - operatorList.addOp(OPS.endGroup, [groupOptions]); - } - }); + if (group) { + operatorList.addOp(OPS.endGroup, [groupOptions]); + } + }); }, buildPaintImageXObject: @@ -543,8 +547,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var resourcesArray = [patternDict.get('Resources'), resources]; var patternResources = Dict.merge(this.xref, resourcesArray); - return this.getOperatorList(pattern, task, patternResources, - tilingOpList).then(function () { + return this.getOperatorList({ + stream: pattern, + task, + resources: patternResources, + operatorList: tilingOpList, + }).then(function () { // Add the dependencies to the parent operator list so they are // resolved before sub operator list is executed synchronously. operatorList.addDependencies(tilingOpList.dependencies); @@ -897,21 +905,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return Promise.resolve(); }, - getOperatorList: function PartialEvaluator_getOperatorList(stream, - task, - resources, - operatorList, - initialState) { + getOperatorList({ stream, task, resources, operatorList, + initialState = null, }) { + // Ensure that `resources`/`initialState` is correctly initialized, + // even if the provided parameter is e.g. `null`. + resources = resources || Dict.empty; + initialState = initialState || new EvalState(); + + assert(operatorList, 'getOperatorList: missing "operatorList" parameter'); + var self = this; var xref = this.xref; var imageCache = Object.create(null); - assert(operatorList); - - resources = (resources || Dict.empty); var xobjs = (resources.get('XObject') || Dict.empty); var patterns = (resources.get('Pattern') || Dict.empty); - var stateManager = new StateManager(initialState || new EvalState()); + var stateManager = new StateManager(initialState); var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var timeSlotManager = new TimeSlotManager(); @@ -1212,13 +1221,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); }, - getTextContent: - function PartialEvaluator_getTextContent(stream, task, resources, - stateManager, - normalizeWhitespace, - combineTextItems) { - - stateManager = (stateManager || new StateManager(new TextState())); + getTextContent({ stream, task, resources, stateManager = null, + normalizeWhitespace = false, combineTextItems = false, }) { + // Ensure that `resources`/`stateManager` is correctly initialized, + // even if the provided parameter is e.g. `null`. + resources = resources || Dict.empty; + stateManager = stateManager || new StateManager(new TextState()); var WhitespaceRegexp = /\s/g; @@ -1250,8 +1258,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var self = this; var xref = this.xref; - resources = (xref.fetchIfRef(resources) || Dict.empty); - // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. var xobjs = null; var xobjsCache = Object.create(null); @@ -1690,16 +1696,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { xObjStateManager.transform(matrix); } - next(self.getTextContent(xobj, task, - xobj.dict.get('Resources') || resources, xObjStateManager, - normalizeWhitespace, combineTextItems).then( - function (formTextContent) { - Util.appendToArray(textContent.items, formTextContent.items); - Util.extendObj(textContent.styles, formTextContent.styles); + next(self.getTextContent({ + stream: xobj, + task, + resources: xobj.dict.get('Resources') || resources, + stateManager: xObjStateManager, + normalizeWhitespace, + combineTextItems, + }).then(function (formTextContent) { + Util.appendToArray(textContent.items, formTextContent.items); + Util.extendObj(textContent.styles, formTextContent.styles); - xobjsCache.key = name; - xobjsCache.texts = formTextContent; - })); + xobjsCache.key = name; + xobjsCache.texts = formTextContent; + })); return; case OPS.setGState: flushTextContentItem(); @@ -2518,9 +2528,12 @@ var TranslatedFont = (function TranslatedFontClosure() { loadCharProcsPromise = loadCharProcsPromise.then(function () { var glyphStream = charProcs.get(key); var operatorList = new OperatorList(); - return type3Evaluator.getOperatorList(glyphStream, task, - fontResources, operatorList). - then(function () { + return type3Evaluator.getOperatorList({ + stream: glyphStream, + task, + resources: fontResources, + operatorList, + }).then(function () { charProcOperatorList[key] = operatorList.getIR(); // Add the dependencies to the parent operator list so they are diff --git a/src/core/worker.js b/src/core/worker.js index eeea840c2..f52cdefbd 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -848,9 +848,12 @@ var WorkerMessageHandler = { var pageNum = pageIndex + 1; var start = Date.now(); // Pre compile the pdf page and fetch the fonts/images. - page.getOperatorList(handler, task, data.intent, - data.renderInteractiveForms).then( - function(operatorList) { + page.getOperatorList({ + handler, + task, + intent: data.intent, + renderInteractiveForms: data.renderInteractiveForms, + }).then(function(operatorList) { finishWorkerTask(task); info('page=' + pageNum + ' - getOperatorList: time=' + @@ -906,10 +909,14 @@ var WorkerMessageHandler = { var pageNum = pageIndex + 1; var start = Date.now(); - return page.extractTextContent(handler, task, data.normalizeWhitespace, - data.combineTextItems).then( - function(textContent) { + return page.extractTextContent({ + handler, + task, + normalizeWhitespace: data.normalizeWhitespace, + combineTextItems: data.combineTextItems, + }).then(function(textContent) { finishWorkerTask(task); + info('text indexing: page=' + pageNum + ' - time=' + (Date.now() - start) + 'ms'); return textContent; diff --git a/test/unit/evaluator_spec.js b/test/unit/evaluator_spec.js index 820f121d3..cc84d878e 100644 --- a/test/unit/evaluator_spec.js +++ b/test/unit/evaluator_spec.js @@ -48,20 +48,36 @@ describe('evaluator', function() { function runOperatorListCheck(evaluator, stream, resources, callback) { var result = new OperatorList(); var task = new WorkerTask('OperatorListCheck'); - evaluator.getOperatorList(stream, task, resources, result).then( - function () { + evaluator.getOperatorList({ + stream, + task, + resources, + operatorList: result, + }).then(function() { callback(result); }); } + var partialEvaluator; + + beforeAll(function(done) { + partialEvaluator = new PartialEvaluator({ + pdfManager: new PdfManagerMock(), + xref: new XrefMock(), + handler: new HandlerMock(), + pageIndex: 0, + }); + done(); + }); + + afterAll(function() { + partialEvaluator = null; + }); + describe('splitCombinedOperations', function() { it('should reject unknown operations', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('fTT'); - - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function(result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(1); @@ -72,11 +88,8 @@ describe('evaluator', function() { }); it('should handle one operations', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('Q'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function(result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(1); @@ -86,13 +99,11 @@ describe('evaluator', function() { }); it('should handle two glued operations', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var resources = new ResourcesMock(); resources.Res1 = {}; var stream = new StringStream('/Res1 DoQ'); - runOperatorListCheck(evaluator, stream, resources, function (result) { + runOperatorListCheck(partialEvaluator, stream, resources, + function(result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(2); expect(result.fnArray[0]).toEqual(OPS.paintXObject); @@ -102,11 +113,8 @@ describe('evaluator', function() { }); it('should handle tree glued operations', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('fff'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(3); @@ -118,13 +126,11 @@ describe('evaluator', function() { }); it('should handle three glued operations #2', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var resources = new ResourcesMock(); resources.Res1 = {}; var stream = new StringStream('B*Bf*'); - runOperatorListCheck(evaluator, stream, resources, function (result) { + runOperatorListCheck(partialEvaluator, stream, resources, + function(result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(3); expect(result.fnArray[0]).toEqual(OPS.eoFillStroke); @@ -135,11 +141,8 @@ describe('evaluator', function() { }); it('should handle glued operations and operands', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('f5 Ts'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(2); @@ -153,11 +156,8 @@ describe('evaluator', function() { }); it('should handle glued operations and literals', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('trueifalserinulln'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(3); @@ -177,11 +177,8 @@ describe('evaluator', function() { describe('validateNumberOfArgs', function() { it('should execute if correct number of arguments', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('5 1 d0'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(result.argsArray[0][0]).toEqual(5); expect(result.argsArray[0][1]).toEqual(1); @@ -190,11 +187,8 @@ describe('evaluator', function() { }); }); it('should execute if too many arguments', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('5 1 4 d0'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(result.argsArray[0][0]).toEqual(1); expect(result.argsArray[0][1]).toEqual(4); @@ -203,11 +197,8 @@ describe('evaluator', function() { }); }); it('should execute if nested commands', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('/F2 /GS2 gs 5.711 Tf'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(result.fnArray.length).toEqual(3); expect(result.fnArray[0]).toEqual(OPS.setGState); @@ -221,11 +212,8 @@ describe('evaluator', function() { }); }); it('should skip if too few arguments', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('5 d0'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(result.argsArray).toEqual([]); expect(result.fnArray).toEqual([]); @@ -233,11 +221,8 @@ describe('evaluator', function() { }); }); it('should close opened saves', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('qq'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(!!result.fnArray && !!result.argsArray).toEqual(true); expect(result.fnArray.length).toEqual(4); @@ -249,11 +234,8 @@ describe('evaluator', function() { }); }); it('should skip paintXObject if name is missing', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('/ Do'); - runOperatorListCheck(evaluator, stream, new ResourcesMock(), + runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), function (result) { expect(result.argsArray).toEqual([]); expect(result.fnArray).toEqual([]); @@ -261,9 +243,6 @@ describe('evaluator', function() { }); }); it('should skip paintXObject if subtype is PS', function(done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var xobjStreamDict = new Dict(); xobjStreamDict.set('Subtype', Name.get('PS')); var xobjStream = new Stream([], 0, 0, xobjStreamDict); @@ -275,7 +254,8 @@ describe('evaluator', function() { resources.set('XObject', xobjs); var stream = new StringStream('/Res1 Do'); - runOperatorListCheck(evaluator, stream, resources, function (result) { + runOperatorListCheck(partialEvaluator, stream, resources, + function(result) { expect(result.argsArray).toEqual([]); expect(result.fnArray).toEqual([]); done(); @@ -285,34 +265,35 @@ describe('evaluator', function() { describe('thread control', function() { it('should abort operator list parsing', function (done) { - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('qqQQ'); var resources = new ResourcesMock(); var result = new OperatorList(); var task = new WorkerTask('OperatorListAbort'); task.terminate(); - evaluator.getOperatorList(stream, task, resources, result).catch( - function () { - expect(!!result.fnArray && !!result.argsArray).toEqual(true); - expect(result.fnArray.length).toEqual(0); - done(); - }); + partialEvaluator.getOperatorList({ + stream, + task, + resources, + operatorList: result, + }).catch(function() { + expect(!!result.fnArray && !!result.argsArray).toEqual(true); + expect(result.fnArray.length).toEqual(0); + done(); + }); }); it('should abort text parsing parsing', function (done) { var resources = new ResourcesMock(); - var evaluator = new PartialEvaluator(new PdfManagerMock(), - new XrefMock(), new HandlerMock(), - 'prefix'); var stream = new StringStream('qqQQ'); var task = new WorkerTask('TextContentAbort'); task.terminate(); - evaluator.getTextContent(stream, task, resources).catch( - function () { - expect(true).toEqual(true); - done(); - }); + partialEvaluator.getTextContent({ + stream, + task, + resources, + }).catch(function() { + expect(true).toEqual(true); + done(); + }); }); });