From 01ce3d056c95c0671a0af1dd9427343a1b49d4c6 Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Tue, 4 Jun 2013 17:57:52 -0700 Subject: [PATCH] Load all resources before getOperatorList/getTextContent. --- src/annotation.js | 88 +++--- src/chunked_stream.js | 107 ++++--- src/core.js | 61 ++-- src/evaluator.js | 673 ++++++++++++++++++++---------------------- src/obj.js | 203 ++++++++++++- 5 files changed, 684 insertions(+), 448 deletions(-) diff --git a/src/annotation.js b/src/annotation.js index 6cb889dc5..65ed7ffce 100644 --- a/src/annotation.js +++ b/src/annotation.js @@ -16,7 +16,7 @@ */ /* globals Util, isDict, isName, stringToPDFString, TODO, Dict, Stream, stringToBytes, PDFJS, isWorker, assert, NotImplementedException, - Promise, isArray */ + Promise, isArray, ObjectLoader */ 'use strict'; @@ -139,6 +139,20 @@ var Annotation = (function AnnotationClosure() { ); }, + loadResources: function(keys) { + var promise = new Promise(); + this.appearance.dict.getAsync('Resources').then(function(resources) { + var objectLoader = new ObjectLoader(resources.map, + keys, + resources.xref); + objectLoader.load().then(function() { + promise.resolve(resources); + }); + }.bind(this)); + + return promise; + }, + getOperatorList: function Annotation_getToOperatorList(evaluator) { var promise = new Promise(); @@ -157,26 +171,37 @@ var Annotation = (function AnnotationClosure() { var data = this.data; var appearanceDict = this.appearance.dict; - var resources = appearanceDict.get('Resources'); + var resourcesPromise = this.loadResources([ + 'ExtGState', + 'ColorSpace', + 'Pattern', + 'Shading', + 'XObject', + 'Font' + // ProcSet + // Properties + ]); var bbox = appearanceDict.get('BBox') || [0, 0, 1, 1]; var matrix = appearanceDict.get('Matrix') || [1, 0, 0, 1, 0 ,0]; var transform = getTransformMatrix(data.rect, bbox, matrix); var border = data.border; - var listPromise = evaluator.getOperatorList(this.appearance, resources); - listPromise.then(function(appearanceStreamData) { - var fnArray = appearanceStreamData.queue.fnArray; - var argsArray = appearanceStreamData.queue.argsArray; + resourcesPromise.then(function(resources) { + var listPromise = evaluator.getOperatorList(this.appearance, resources); + listPromise.then(function(appearanceStreamData) { + var fnArray = appearanceStreamData.queue.fnArray; + var argsArray = appearanceStreamData.queue.argsArray; - fnArray.unshift('beginAnnotation'); - argsArray.unshift([data.rect, transform, matrix]); + fnArray.unshift('beginAnnotation'); + argsArray.unshift([data.rect, transform, matrix]); - fnArray.push('endAnnotation'); - argsArray.push([]); + fnArray.push('endAnnotation'); + argsArray.push([]); - promise.resolve(appearanceStreamData); - }); + promise.resolve(appearanceStreamData); + }); + }.bind(this)); return promise; } @@ -263,32 +288,27 @@ var Annotation = (function AnnotationClosure() { var annotationsReadyPromise = new Promise(); - var ensurePromises = []; + var annotationPromises = []; for (var i = 0, n = annotations.length; i < n; ++i) { - var ensurePromise = pdfManager.ensure(annotations[i], - 'getOperatorList', - [partialEvaluator]); - ensurePromises.push(ensurePromise); + annotationPromises.push(annotations[i].getOperatorList(partialEvaluator)); } - Promise.all(ensurePromises).then(function(listPromises) { - Promise.all(listPromises).then(function(datas) { - var fnArray = pageQueue.fnArray; - var argsArray = pageQueue.argsArray; - fnArray.push('beginAnnotations'); - argsArray.push([]); - for (var i = 0, n = datas.length; i < n; ++i) { - var annotationData = datas[i]; - var annotationQueue = annotationData.queue; - Util.concatenateToArray(fnArray, annotationQueue.fnArray); - Util.concatenateToArray(argsArray, annotationQueue.argsArray); - Util.extendObj(dependencies, annotationData.dependencies); - } - fnArray.push('endAnnotations'); - argsArray.push([]); + Promise.all(annotationPromises).then(function(datas) { + var fnArray = pageQueue.fnArray; + var argsArray = pageQueue.argsArray; + fnArray.push('beginAnnotations'); + argsArray.push([]); + for (var i = 0, n = datas.length; i < n; ++i) { + var annotationData = datas[i]; + var annotationQueue = annotationData.queue; + Util.concatenateToArray(fnArray, annotationQueue.fnArray); + Util.concatenateToArray(argsArray, annotationQueue.argsArray); + Util.extendObj(dependencies, annotationData.dependencies); + } + fnArray.push('endAnnotations'); + argsArray.push([]); - annotationsReadyPromise.resolve(); - }, reject); + annotationsReadyPromise.resolve(); }, reject); return annotationsReadyPromise; diff --git a/src/chunked_stream.js b/src/chunked_stream.js index 82599a9de..8d8811bd1 100644 --- a/src/chunked_stream.js +++ b/src/chunked_stream.js @@ -20,7 +20,7 @@ 'use strict'; var ChunkedStream = (function ChunkedStreamClosure() { - function ChunkedStream(length, chunkSize) { + function ChunkedStream(length, chunkSize, manager) { this.bytes = new Uint8Array(length); this.start = 0; this.pos = 0; @@ -29,6 +29,7 @@ var ChunkedStream = (function ChunkedStreamClosure() { this.loadedChunks = []; this.numChunksLoaded = 0; this.numChunks = Math.ceil(length / chunkSize); + this.manager = manager; } // required methods for a stream. if a particular stream does not @@ -178,6 +179,18 @@ var ChunkedStream = (function ChunkedStreamClosure() { makeSubStream: function ChunkedStream_makeSubStream(start, length, dict) { function ChunkedStreamSubstream() {} ChunkedStreamSubstream.prototype = Object.create(this); + ChunkedStreamSubstream.prototype.getMissingChunks = function() { + var chunkSize = this.chunkSize; + var beginChunk = Math.floor(this.start / chunkSize); + var endChunk = Math.floor((this.end - 1) / chunkSize) + 1; + var missingChunks = []; + for (var chunk = beginChunk; chunk < endChunk; ++chunk) { + if (!(chunk in this.loadedChunks)) { + missingChunks.push(chunk); + } + } + return missingChunks; + }; var subStream = new ChunkedStreamSubstream(); subStream.pos = subStream.start = start; subStream.end = start + length || this.end; @@ -195,7 +208,7 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { function ChunkedStreamManager(length, chunkSize, url, args) { var self = this; - this.stream = new ChunkedStream(length, chunkSize); + this.stream = new ChunkedStream(length, chunkSize, this); this.length = length; this.chunkSize = chunkSize; this.url = url; @@ -248,50 +261,26 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { // contiguous ranges to load in as few requests as possible requestAllChunks: function ChunkedStreamManager_requestAllChunks() { var missingChunks = this.stream.getMissingChunks(); - var chunksToRequest = []; - for (var i = 0, n = missingChunks.length; i < n; ++i) { - var chunk = missingChunks[i]; - if (!(chunk in this.requestsByChunk)) { - this.requestsByChunk[chunk] = []; - chunksToRequest.push(chunk); - } - } - var groupedChunks = this.groupChunks(chunksToRequest); - for (var i = 0, n = groupedChunks.length; i < n; ++i) { - var groupedChunk = groupedChunks[i]; - var begin = groupedChunk.beginChunk * this.chunkSize; - var end = Math.min(groupedChunk.endChunk * this.chunkSize, this.length); - this.sendRequest(begin, end); - } - + this.requestChunks(missingChunks); return this.loadedStream; }, - getStream: function ChunkedStreamManager_getStream() { - return this.stream; - }, - - // Loads any chunks in the requested range that are not yet loaded - requestRange: function ChunkedStreamManager_requestRange( - begin, end, callback) { - - end = Math.min(end, this.length); - - var beginChunk = this.getBeginChunk(begin); - var endChunk = this.getEndChunk(end); - + requestChunks: function ChunkedStreamManager_requestChunks(chunks, + callback) { var requestId = this.currRequestId++; var chunksNeeded; this.chunksNeededByRequest[requestId] = chunksNeeded = {}; - for (var chunk = beginChunk; chunk < endChunk; ++chunk) { - if (!this.stream.hasChunk(chunk)) { - chunksNeeded[chunk] = true; + for (var i = 0, ii = chunks.length; i < ii; i++) { + if (!this.stream.hasChunk(chunks[i])) { + chunksNeeded[chunks[i]] = true; } } if (isEmptyObj(chunksNeeded)) { - callback(); + if (callback) { + callback(); + } return; } @@ -321,6 +310,46 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { } }, + getStream: function ChunkedStreamManager_getStream() { + return this.stream; + }, + + // Loads any chunks in the requested range that are not yet loaded + requestRange: function ChunkedStreamManager_requestRange( + begin, end, callback) { + + end = Math.min(end, this.length); + + var beginChunk = this.getBeginChunk(begin); + var endChunk = this.getEndChunk(end); + + var chunks = []; + for (var chunk = beginChunk; chunk < endChunk; ++chunk) { + chunks.push(chunk); + } + + this.requestChunks(chunks, callback); + }, + + requestRanges: function ChunkedStreamManager_requestRanges(ranges, + callback) { + ranges = ranges || []; + var chunksToRequest = []; + + for (var i = 0; i < ranges.length; i++) { + var beginChunk = this.getBeginChunk(ranges[i].begin); + var endChunk = this.getEndChunk(ranges[i].end); + for (var chunk = beginChunk; chunk < endChunk; ++chunk) { + if (chunksToRequest.indexOf(chunk) < 0) { + chunksToRequest.push(chunk); + } + } + } + + chunksToRequest.sort(function(a, b) { return a - b; }); + this.requestChunks(chunksToRequest, callback); + }, + // Groups a sorted array of chunks into as few continguous larger // chunks as possible groupChunks: function ChunkedStreamManager_groupChunks(chunks) { @@ -409,9 +438,7 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { nextEmptyChunk = this.stream.nextEmptyChunk(endChunk); } if (isInt(nextEmptyChunk)) { - var nextEmptyByte = nextEmptyChunk * this.chunkSize; - this.requestRange(nextEmptyByte, nextEmptyByte + this.chunkSize, - function() {}); + this.requestChunks([nextEmptyChunk]); } } @@ -419,7 +446,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() { var requestId = loadedRequests[i]; var callback = this.callbacksByRequest[requestId]; delete this.callbacksByRequest[requestId]; - callback(); + if (callback) { + callback(); + } } this.msgHandler.send('DocProgress', { diff --git a/src/core.js b/src/core.js index df2c05d5e..3437b9bfb 100644 --- a/src/core.js +++ b/src/core.js @@ -18,7 +18,7 @@ isArrayBuffer, isDict, isName, isStream, isString, Lexer, Linearization, NullStream, PartialEvaluator, shadow, Stream, StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef, - MissingDataException, Promise, Annotation */ + MissingDataException, Promise, Annotation, ObjectLoader */ 'use strict'; @@ -51,6 +51,7 @@ var Page = (function PageClosure() { font: 0, obj: 0 }; + this.resourcesPromise = null; } Page.prototype = { @@ -133,6 +134,22 @@ var Page = (function PageClosure() { } return stream; }, + loadResources: function(keys) { + if (!this.resourcesPromise) { + // TODO: add async inheritPageProp and remove this. + this.resourcesPromise = this.pdfManager.ensure(this, 'resources'); + } + var promise = new Promise(); + this.resourcesPromise.then(function resourceSuccess() { + var objectLoader = new ObjectLoader(this.resources.map, + keys, + this.xref); + objectLoader.load().then(function objectLoaderSuccess() { + promise.resolve(); + }); + }.bind(this)); + return promise; + }, getOperatorList: function Page_getOperatorList(handler) { var self = this; var promise = new Promise(); @@ -146,7 +163,16 @@ var Page = (function PageClosure() { var pdfManager = this.pdfManager; var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', []); - var resourcesPromise = pdfManager.ensure(this, 'resources'); + var resourcesPromise = this.loadResources([ + 'ExtGState', + 'ColorSpace', + 'Pattern', + 'Shading', + 'XObject', + 'Font', + // ProcSet + // Properties + ]); var partialEvaluator = new PartialEvaluator( pdfManager, this.xref, handler, @@ -157,14 +183,10 @@ var Page = (function PageClosure() { [contentStreamPromise, resourcesPromise], reject); dataPromises.then(function(data) { var contentStream = data[0]; - var resources = data[1]; - pdfManager.ensure(partialEvaluator, 'getOperatorList', - [contentStream, resources]).then( - function(opListPromise) { - opListPromise.then(function(data) { - pageListPromise.resolve(data); - }); + partialEvaluator.getOperatorList(contentStream, self.resources).then( + function(data) { + pageListPromise.resolve(data); }, reject ); @@ -175,6 +197,7 @@ var Page = (function PageClosure() { var pageData = datas[0]; var pageQueue = pageData.queue; var annotations = datas[1]; + if (annotations.length === 0) { PartialEvaluator.optimizeQueue(pageQueue); promise.resolve(pageData); @@ -186,6 +209,7 @@ var Page = (function PageClosure() { annotations, pageQueue, pdfManager, dependencies, partialEvaluator); annotationsReadyPromise.then(function () { PartialEvaluator.optimizeQueue(pageQueue); + promise.resolve(pageData); }, reject); }, reject); @@ -205,27 +229,24 @@ var Page = (function PageClosure() { var pdfManager = this.pdfManager; var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', []); - var resourcesPromise = new Promise(); - pdfManager.ensure(this, 'resources').then(function(resources) { - pdfManager.ensure(self.xref, 'fetchIfRef', [resources]).then( - function(resources) { - resourcesPromise.resolve(resources); - } - ); - }); + + var resourcesPromise = this.loadResources([ + 'ExtGState', + 'XObject', + 'Font' + ]); var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); dataPromises.then(function(data) { var contentStream = data[0]; - var resources = data[1]; var partialEvaluator = new PartialEvaluator( pdfManager, self.xref, handler, self.pageIndex, 'p' + self.pageIndex + '_', self.idCounters); partialEvaluator.getTextContent( - contentStream, resources).then(function(bidiTexts) { + contentStream, self.resources).then(function(bidiTexts) { textContentPromise.resolve({ bidiTexts: bidiTexts }); @@ -282,7 +303,7 @@ var PDFDocument = (function PDFDocumentClosure() { assertWellFormed(stream.length > 0, 'stream must have data'); this.pdfManager = pdfManager; this.stream = stream; - var xref = new XRef(this.stream, password); + var xref = new XRef(this.stream, password, pdfManager); this.xref = xref; } diff --git a/src/evaluator.js b/src/evaluator.js index c1bd5ab52..b031c36a1 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -540,9 +540,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { try { translated = this.translateFont(font, xref); } catch (e) { - if (e instanceof MissingDataException) { - throw e; - } translated = new ErrorFont(e instanceof Error ? e.message : e); } font.translated = translated; @@ -611,221 +608,208 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var parser = new Parser(new Lexer(stream, OP_MAP), false, xref); var promise = new Promise(); - function parseCommands() { - try { - parser.restoreState(); - var args = []; - while (true) { + var args = []; + while (true) { - var obj = parser.getObj(); + var obj = parser.getObj(); - if (isEOF(obj)) { - break; + if (isEOF(obj)) { + break; + } + + if (isCmd(obj)) { + var cmd = obj.cmd; + + // Check that the command is valid + var opSpec = OP_MAP[cmd]; + if (!opSpec) { + warn('Unknown command "' + cmd + '"'); + continue; + } + + var fn = opSpec.fnName; + + // Validate the number of arguments for the command + if (opSpec.variableArgs) { + if (args.length > opSpec.numArgs) { + info('Command ' + fn + ': expected [0,' + opSpec.numArgs + + '] args, but received ' + args.length + ' args'); } + } else { + if (args.length < opSpec.numArgs) { + // If we receive too few args, it's not possible to possible + // to execute the command, so skip the command + info('Command ' + fn + ': because expected ' + + opSpec.numArgs + ' args, but received ' + args.length + + ' args; skipping'); + args = []; + continue; + } else if (args.length > opSpec.numArgs) { + info('Command ' + fn + ': expected ' + opSpec.numArgs + + ' args, but received ' + args.length + ' args'); + } + } - if (isCmd(obj)) { - var cmd = obj.cmd; + // TODO figure out how to type-check vararg functions - // Check that the command is valid - var opSpec = OP_MAP[cmd]; - if (!opSpec) { - warn('Unknown command "' + cmd + '"'); - continue; - } + if ((cmd == 'SCN' || cmd == 'scn') && + !args[args.length - 1].code) { + // compile tiling patterns + var patternName = args[args.length - 1]; + // SCN/scn applies patterns along with normal colors + var pattern; + if (isName(patternName) && + (pattern = patterns.get(patternName.name))) { - var fn = opSpec.fnName; + var dict = isStream(pattern) ? pattern.dict : pattern; + var typeNum = dict.get('PatternType'); - // Validate the number of arguments for the command - if (opSpec.variableArgs) { - if (args.length > opSpec.numArgs) { - info('Command ' + fn + ': expected [0,' + opSpec.numArgs + - '] args, but received ' + args.length + ' args'); - } - } else { - if (args.length < opSpec.numArgs) { - // If we receive too few args, it's not possible to possible - // to execute the command, so skip the command - info('Command ' + fn + ': because expected ' + - opSpec.numArgs + ' args, but received ' + args.length + - ' args; skipping'); - args = []; - continue; - } else if (args.length > opSpec.numArgs) { - info('Command ' + fn + ': expected ' + opSpec.numArgs + - ' args, but received ' + args.length + ' args'); - } - } - - // TODO figure out how to type-check vararg functions - - if ((cmd == 'SCN' || cmd == 'scn') && - !args[args.length - 1].code) { - // compile tiling patterns - var patternName = args[args.length - 1]; - // SCN/scn applies patterns along with normal colors - var pattern; - if (isName(patternName) && - (pattern = patterns.get(patternName.name))) { - - var dict = isStream(pattern) ? pattern.dict : pattern; - var typeNum = dict.get('PatternType'); - - if (typeNum == TILING_PATTERN) { - var patternPromise = self.handleTilingType( - fn, args, resources, pattern, dict); - fn = 'promise'; - args = [patternPromise]; - } else if (typeNum == SHADING_PATTERN) { - var shading = dict.get('Shading'); - var matrix = dict.get('Matrix'); - var pattern = Pattern.parseShading(shading, matrix, xref, - resources); - args = pattern.getIR(); - } else { - error('Unkown PatternType ' + typeNum); - } - } - } else if (cmd == 'Do' && !args[0].code) { - // eagerly compile XForm objects - var name = args[0].name; - var xobj = xobjs.get(name); - if (xobj) { - assertWellFormed( - isStream(xobj), 'XObject should be a stream'); - - var type = xobj.dict.get('Subtype'); - assertWellFormed( - isName(type), - 'XObject should have a Name subtype' - ); - - if ('Form' == type.name) { - fn = 'promise'; - args = [self.buildFormXObject(resources, xobj)]; - } else if ('Image' == type.name) { - var data = self.buildPaintImageXObject( - resources, xobj, false); - Util.extendObj(dependencies, data.dependencies); - self.insertDependencies(queue, data.dependencies); - fn = data.fn; - args = data.args; - } else { - error('Unhandled XObject subtype ' + type.name); - } - } - } else if (cmd == 'Tf') { // eagerly collect all fonts + if (typeNum == TILING_PATTERN) { + var patternPromise = self.handleTilingType( + fn, args, resources, pattern, dict); fn = 'promise'; - args = [self.handleSetFont(resources, args)]; - } else if (cmd == 'EI') { + args = [patternPromise]; + } else if (typeNum == SHADING_PATTERN) { + var shading = dict.get('Shading'); + var matrix = dict.get('Matrix'); + var pattern = Pattern.parseShading(shading, matrix, xref, + resources); + args = pattern.getIR(); + } else { + error('Unkown PatternType ' + typeNum); + } + } + } else if (cmd == 'Do' && !args[0].code) { + // eagerly compile XForm objects + var name = args[0].name; + var xobj = xobjs.get(name); + if (xobj) { + assertWellFormed( + isStream(xobj), 'XObject should be a stream'); + + var type = xobj.dict.get('Subtype'); + assertWellFormed( + isName(type), + 'XObject should have a Name subtype' + ); + + if ('Form' == type.name) { + fn = 'promise'; + args = [self.buildFormXObject(resources, xobj)]; + } else if ('Image' == type.name) { var data = self.buildPaintImageXObject( - resources, args[0], true); + resources, xobj, false); Util.extendObj(dependencies, data.dependencies); self.insertDependencies(queue, data.dependencies); fn = data.fn; args = data.args; - } - - switch (fn) { - // Parse the ColorSpace data to a raw format. - case 'setFillColorSpace': - case 'setStrokeColorSpace': - args = [ColorSpace.parseToIR(args[0], xref, resources)]; - break; - case 'shadingFill': - var shadingRes = resources.get('Shading'); - if (!shadingRes) - error('No shading resource found'); - - var shading = shadingRes.get(args[0].name); - if (!shading) - error('No shading object found'); - - var shadingFill = Pattern.parseShading( - shading, null, xref, resources); - var patternIR = shadingFill.getIR(); - args = [patternIR]; - fn = 'shadingFill'; - break; - case 'setGState': - var dictName = args[0]; - var extGState = resources.get('ExtGState'); - - if (!isDict(extGState) || !extGState.has(dictName.name)) - break; - - var gState = extGState.get(dictName.name); - fn = 'promise'; - args = [self.setGState(resources, gState)]; - } // switch - - fnArray.push(fn); - argsArray.push(args); - args = []; - parser.saveState(); - } else if (obj !== null && obj !== undefined) { - args.push(obj instanceof Dict ? obj.getAll() : obj); - assertWellFormed(args.length <= 33, 'Too many arguments'); - } - } - - var subQueuePromises = []; - for (var i = 0; i < fnArray.length; ++i) { - if (fnArray[i] === 'promise') { - subQueuePromises.push(argsArray[i][0]); - } - } - Promise.all(subQueuePromises).then(function(datas) { - // TODO(mack): Optimize by using repositioning elements - // in original queue rather than creating new queue - - for (var i = 0, n = datas.length; i < n; ++i) { - var data = datas[i]; - var subQueue = data.queue; - queue.transparency = subQueue.transparency || queue.transparency; - Util.extendObj(dependencies, data.dependencies); - } - - var newFnArray = []; - var newArgsArray = []; - var currOffset = 0; - var subQueueIdx = 0; - for (var i = 0, n = fnArray.length; i < n; ++i) { - var offset = i + currOffset; - if (fnArray[i] === 'promise') { - var data = datas[subQueueIdx++]; - var subQueue = data.queue; - var subQueueFnArray = subQueue.fnArray; - var subQueueArgsArray = subQueue.argsArray; - for (var j = 0, nn = subQueueFnArray.length; j < nn; ++j) { - newFnArray[offset + j] = subQueueFnArray[j]; - newArgsArray[offset + j] = subQueueArgsArray[j]; - } - currOffset += subQueueFnArray.length - 1; } else { - newFnArray[offset] = fnArray[i]; - newArgsArray[offset] = argsArray[i]; + error('Unhandled XObject subtype ' + type.name); } } - - promise.resolve({ - queue: { - fnArray: newFnArray, - argsArray: newArgsArray, - transparency: queue.transparency - }, - dependencies: dependencies - }); - }); - } catch (e) { - if (!(e instanceof MissingDataException)) { - throw e; + } else if (cmd == 'Tf') { // eagerly collect all fonts + fn = 'promise'; + args = [self.handleSetFont(resources, args)]; + } else if (cmd == 'EI') { + var data = self.buildPaintImageXObject( + resources, args[0], true); + Util.extendObj(dependencies, data.dependencies); + self.insertDependencies(queue, data.dependencies); + fn = data.fn; + args = data.args; } - self.pdfManager.requestRange(e.begin, e.end).then(parseCommands); + switch (fn) { + // Parse the ColorSpace data to a raw format. + case 'setFillColorSpace': + case 'setStrokeColorSpace': + args = [ColorSpace.parseToIR(args[0], xref, resources)]; + break; + case 'shadingFill': + var shadingRes = resources.get('Shading'); + if (!shadingRes) + error('No shading resource found'); + + var shading = shadingRes.get(args[0].name); + if (!shading) + error('No shading object found'); + + var shadingFill = Pattern.parseShading( + shading, null, xref, resources); + var patternIR = shadingFill.getIR(); + args = [patternIR]; + fn = 'shadingFill'; + break; + case 'setGState': + var dictName = args[0]; + var extGState = resources.get('ExtGState'); + + if (!isDict(extGState) || !extGState.has(dictName.name)) + break; + + var gState = extGState.get(dictName.name); + fn = 'promise'; + args = [self.setGState(resources, gState)]; + } // switch + + fnArray.push(fn); + argsArray.push(args); + args = []; + parser.saveState(); + } else if (obj !== null && obj !== undefined) { + args.push(obj instanceof Dict ? obj.getAll() : obj); + assertWellFormed(args.length <= 33, 'Too many arguments'); } } - parser.saveState(); - parseCommands(); + + var subQueuePromises = []; + for (var i = 0; i < fnArray.length; ++i) { + if (fnArray[i] === 'promise') { + subQueuePromises.push(argsArray[i][0]); + } + } + Promise.all(subQueuePromises).then(function(datas) { + // TODO(mack): Optimize by using repositioning elements + // in original queue rather than creating new queue + + for (var i = 0, n = datas.length; i < n; ++i) { + var data = datas[i]; + var subQueue = data.queue; + queue.transparency = subQueue.transparency || queue.transparency; + Util.extendObj(dependencies, data.dependencies); + } + + var newFnArray = []; + var newArgsArray = []; + var currOffset = 0; + var subQueueIdx = 0; + for (var i = 0, n = fnArray.length; i < n; ++i) { + var offset = i + currOffset; + if (fnArray[i] === 'promise') { + var data = datas[subQueueIdx++]; + var subQueue = data.queue; + var subQueueFnArray = subQueue.fnArray; + var subQueueArgsArray = subQueue.argsArray; + for (var j = 0, nn = subQueueFnArray.length; j < nn; ++j) { + newFnArray[offset + j] = subQueueFnArray[j]; + newArgsArray[offset + j] = subQueueArgsArray[j]; + } + currOffset += subQueueFnArray.length - 1; + } else { + newFnArray[offset] = fnArray[i]; + newArgsArray[offset] = argsArray[i]; + } + } + + promise.resolve({ + queue: { + fnArray: newFnArray, + argsArray: newArgsArray, + transparency: queue.transparency + }, + dependencies: dependencies + }); + }); return promise; }, @@ -863,161 +847,148 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var chunkPromises = []; var fontPromise; - function parseCommands() { - try { - parser.restoreState(); - var args = []; + var args = []; - while (true) { - var obj = parser.getObj(); - if (isEOF(obj)) { - break; - } - - if (isCmd(obj)) { - var cmd = obj.cmd; - switch (cmd) { - // TODO: Add support for SAVE/RESTORE and XFORM here. - case 'Tf': - fontPromise = handleSetFont(args[0].name, null, resources); - //.translated; - break; - case 'TJ': - var chunkPromise = new Promise(); - chunkPromises.push(chunkPromise); - fontPromise.then(function(items, chunkPromise, font) { - var chunk = ''; - for (var j = 0, jj = items.length; j < jj; j++) { - if (typeof items[j] === 'string') { - chunk += fontCharsToUnicode(items[j], font); - } else if (items[j] < 0 && font.spaceWidth > 0) { - var fakeSpaces = -items[j] / font.spaceWidth; - if (fakeSpaces > MULTI_SPACE_FACTOR) { - fakeSpaces = Math.round(fakeSpaces); - while (fakeSpaces--) { - chunk += ' '; - } - } else if (fakeSpaces > SPACE_FACTOR) { - chunk += ' '; - } - } - } - chunkPromise.resolve( - getBidiText(chunk, -1, font.vertical)); - }.bind(null, args[0], chunkPromise)); - break; - case 'Tj': - var chunkPromise = new Promise(); - chunkPromises.push(chunkPromise); - fontPromise.then(function(charCodes, chunkPromise, font) { - var chunk = fontCharsToUnicode(charCodes, font); - chunkPromise.resolve( - getBidiText(chunk, -1, font.vertical)); - }.bind(null, args[0], chunkPromise)); - break; - case '\'': - // For search, adding a extra white space for line breaks - // would be better here, but that causes too much spaces in - // the text-selection divs. - var chunkPromise = new Promise(); - chunkPromises.push(chunkPromise); - fontPromise.then(function(charCodes, chunkPromise, font) { - var chunk = fontCharsToUnicode(charCodes, font); - chunkPromise.resolve( - getBidiText(chunk, -1, font.vertical)); - }.bind(null, args[0], chunkPromise)); - break; - case '"': - // Note comment in "'" - var chunkPromise = new Promise(); - chunkPromises.push(chunkPromise); - fontPromise.then(function(charCodes, chunkPromise, font) { - var chunk = fontCharsToUnicode(charCodes, font); - chunkPromise.resolve( - getBidiText(chunk, -1, font.vertical)); - }.bind(null, args[2], chunkPromise)); - break; - case 'Do': - if (args[0].code) { - break; - } - - if (!xobjs) { - xobjs = resources.get('XObject') || new Dict(); - } - - var name = args[0].name; - var xobj = xobjs.get(name); - if (!xobj) - break; - assertWellFormed(isStream(xobj), - 'XObject should be a stream'); - - var type = xobj.dict.get('Subtype'); - assertWellFormed( - isName(type), - 'XObject should have a Name subtype' - ); - - if ('Form' !== type.name) - break; - - var chunkPromise = self.getTextContent( - xobj, - xobj.dict.get('Resources') || resources - ); - chunkPromises.push(chunkPromise); - break; - case 'gs': - var dictName = args[0]; - var extGState = resources.get('ExtGState'); - - if (!isDict(extGState) || !extGState.has(dictName.name)) - break; - - var gsState = extGState.get(dictName.name); - - for (var i = 0; i < gsState.length; i++) { - if (gsState[i] === 'Font') { - fontPromise = handleSetFont( - args[0].name, null, resources); - } - } - break; - } // switch - - args = []; - parser.saveState(); - } else if (obj !== null && obj !== undefined) { - assertWellFormed(args.length <= 33, 'Too many arguments'); - args.push(obj); - } - } // while - - Promise.all(chunkPromises).then(function(datas) { - var bidiTexts = []; - for (var i = 0, n = datas.length; i < n; ++i) { - var bidiText = datas[i]; - if (!bidiText) { - continue; - } else if (isArray(bidiText)) { - Util.concatenateToArray(bidiTexts, bidiText); - } else { - bidiTexts.push(bidiText); - } - } - statePromise.resolve(bidiTexts); - }); - } catch (e) { - if (!(e instanceof MissingDataException)) { - throw e; - } - - self.pdfManager.requestRange(e.begin, e.end).then(parseCommands); + while (true) { + var obj = parser.getObj(); + if (isEOF(obj)) { + break; } - } - parser.saveState(); - parseCommands(); + + if (isCmd(obj)) { + var cmd = obj.cmd; + switch (cmd) { + // TODO: Add support for SAVE/RESTORE and XFORM here. + case 'Tf': + fontPromise = handleSetFont(args[0].name, null, resources); + //.translated; + break; + case 'TJ': + var chunkPromise = new Promise(); + chunkPromises.push(chunkPromise); + fontPromise.then(function(items, chunkPromise, font) { + var chunk = ''; + for (var j = 0, jj = items.length; j < jj; j++) { + if (typeof items[j] === 'string') { + chunk += fontCharsToUnicode(items[j], font); + } else if (items[j] < 0 && font.spaceWidth > 0) { + var fakeSpaces = -items[j] / font.spaceWidth; + if (fakeSpaces > MULTI_SPACE_FACTOR) { + fakeSpaces = Math.round(fakeSpaces); + while (fakeSpaces--) { + chunk += ' '; + } + } else if (fakeSpaces > SPACE_FACTOR) { + chunk += ' '; + } + } + } + chunkPromise.resolve( + getBidiText(chunk, -1, font.vertical)); + }.bind(null, args[0], chunkPromise)); + break; + case 'Tj': + var chunkPromise = new Promise(); + chunkPromises.push(chunkPromise); + fontPromise.then(function(charCodes, chunkPromise, font) { + var chunk = fontCharsToUnicode(charCodes, font); + chunkPromise.resolve( + getBidiText(chunk, -1, font.vertical)); + }.bind(null, args[0], chunkPromise)); + break; + case '\'': + // For search, adding a extra white space for line breaks + // would be better here, but that causes too much spaces in + // the text-selection divs. + var chunkPromise = new Promise(); + chunkPromises.push(chunkPromise); + fontPromise.then(function(charCodes, chunkPromise, font) { + var chunk = fontCharsToUnicode(charCodes, font); + chunkPromise.resolve( + getBidiText(chunk, -1, font.vertical)); + }.bind(null, args[0], chunkPromise)); + break; + case '"': + // Note comment in "'" + var chunkPromise = new Promise(); + chunkPromises.push(chunkPromise); + fontPromise.then(function(charCodes, chunkPromise, font) { + var chunk = fontCharsToUnicode(charCodes, font); + chunkPromise.resolve( + getBidiText(chunk, -1, font.vertical)); + }.bind(null, args[2], chunkPromise)); + break; + case 'Do': + if (args[0].code) { + break; + } + + if (!xobjs) { + xobjs = resources.get('XObject') || new Dict(); + } + + var name = args[0].name; + var xobj = xobjs.get(name); + if (!xobj) + break; + assertWellFormed(isStream(xobj), + 'XObject should be a stream'); + + var type = xobj.dict.get('Subtype'); + assertWellFormed( + isName(type), + 'XObject should have a Name subtype' + ); + + if ('Form' !== type.name) + break; + + var chunkPromise = self.getTextContent( + xobj, + xobj.dict.get('Resources') || resources + ); + chunkPromises.push(chunkPromise); + break; + case 'gs': + var dictName = args[0]; + var extGState = resources.get('ExtGState'); + + if (!isDict(extGState) || !extGState.has(dictName.name)) + break; + + var gsState = extGState.get(dictName.name); + + for (var i = 0; i < gsState.length; i++) { + if (gsState[i] === 'Font') { + fontPromise = handleSetFont( + args[0].name, null, resources); + } + } + break; + } // switch + + args = []; + parser.saveState(); + } else if (obj !== null && obj !== undefined) { + assertWellFormed(args.length <= 33, 'Too many arguments'); + args.push(obj); + } + } // while + + Promise.all(chunkPromises).then(function(datas) { + var bidiTexts = []; + for (var i = 0, n = datas.length; i < n; ++i) { + var bidiText = datas[i]; + if (!bidiText) { + continue; + } else if (isArray(bidiText)) { + Util.concatenateToArray(bidiTexts, bidiText); + } else { + bidiTexts.push(bidiText); + } + } + statePromise.resolve(bidiTexts); + }); return statePromise; }, diff --git a/src/obj.js b/src/obj.js index 09501a82a..9d787faf8 100644 --- a/src/obj.js +++ b/src/obj.js @@ -18,7 +18,8 @@ InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef, isStream, JpegStream, Lexer, log, Page, Parser, Promise, shadow, stringToPDFString, stringToUTF8String, warn, isString, assert, - Promise, MissingDataException, XRefParseException, Stream */ + Promise, MissingDataException, XRefParseException, Stream, + ChunkedStream */ 'use strict'; @@ -86,6 +87,38 @@ var Dict = (function DictClosure() { return xref ? xref.fetchIfRef(value) : value; }, + // Same as get(), but returns a promise and uses fetchIfRefAsync(). + getAsync: function Dict_getAsync(key1, key2, key3) { + var value; + var promise; + var xref = this.xref; + if (typeof (value = this.map[key1]) !== undefined || key1 in this.map || + typeof key2 === undefined) { + if (xref) { + return xref.fetchIfRefAsync(value); + } + promise = new Promise(); + promise.resolve(value); + return promise; + } + if (typeof (value = this.map[key2]) !== undefined || key2 in this.map || + typeof key3 === undefined) { + if (xref) { + return xref.fetchIfRefAsync(value); + } + promise = new Promise(); + promise.resolve(value); + return promise; + } + value = this.map[key3] || null; + if (xref) { + return xref.fetchIfRefAsync(value); + } + promise = new Promise(); + promise.resolve(value); + return promise; + }, + // no dereferencing getRaw: function Dict_getRaw(key) { return this.map[key]; @@ -139,11 +172,15 @@ var RefSet = (function RefSetClosure() { RefSet.prototype = { has: function RefSet_has(ref) { - return !!this.dict['R' + ref.num + '.' + ref.gen]; + return ('R' + ref.num + '.' + ref.gen) in this.dict; }, put: function RefSet_put(ref) { - this.dict['R' + ref.num + '.' + ref.gen] = ref; + this.dict['R' + ref.num + '.' + ref.gen] = true; + }, + + remove: function RefSet_remove(ref) { + delete this.dict['R' + ref.num + '.' + ref.gen]; } }; @@ -811,7 +848,6 @@ var XRef = (function XRefClosure() { if (e instanceof MissingDataException) { throw e; } - log('(while reading XRef): ' + e); } @@ -938,6 +974,30 @@ var XRef = (function XRefClosure() { } return e; }, + fetchIfRefAsync: function XRef_fetchIfRefAsync(obj) { + if (!isRef(obj)) { + var promise = new Promise(); + promise.resolve(obj); + return promise; + } + return this.fetchAsync(obj); + }, + fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) { + var promise = new Promise(); + var tryFetch = function (promise) { + try { + promise.resolve(this.fetch(ref, suppressEncryption)); + } catch (e) { + if (e instanceof MissingDataException) { + this.stream.manager.requestRange(e.begin, e.end, tryFetch); + return; + } + promise.reject(e); + } + }.bind(this, promise); + tryFetch(); + return promise; + }, getCatalogObj: function XRef_getCatalogObj() { return this.root; } @@ -1114,3 +1174,138 @@ var PDFObjects = (function PDFObjectsClosure() { return PDFObjects; })(); +/** + * A helper for loading missing data in object graphs. It traverses the graph + * depth first and queues up any objects that have missing data. Once it has + * has traversed as many objects that are available it attempts to bundle the + * missing data requests and then resume from the nodes that weren't ready. + * + * NOTE: It provides protection from circular references by keeping track of + * of loaded references. However, you must be careful not to load any graphs + * that have references to the catalog or other pages since that will cause the + * entire PDF document object graph to be traversed. + */ +var ObjectLoader = (function() { + + function mayHaveChildren(value) { + return isRef(value) || isDict(value) || isArray(value) || isStream(value); + } + + function addChildren(node, nodesToVisit) { + if (isDict(node) || isStream(node)) { + var map; + if (isDict(node)) { + map = node.map; + } else { + map = node.dict.map; + } + for (var key in map) { + var value = map[key]; + if (mayHaveChildren(value)) { + nodesToVisit.push(value); + } + } + } else if (isArray(node)) { + for (var i = 0, ii = node.length; i < ii; i++) { + var value = node[i]; + if (mayHaveChildren(value)) { + nodesToVisit.push(value); + } + } + } + } + + function ObjectLoader(obj, keys, xref) { + this.obj = obj; + this.keys = keys; + this.xref = xref; + this.refSet = null; + } + + ObjectLoader.prototype = { + + load: function ObjectLoader_load() { + var keys = this.keys; + this.promise = new Promise(); + // Don't walk the graph if all the data is already loaded. + if (!(this.xref.stream instanceof ChunkedStream) || + this.xref.stream.getMissingChunks().length === 0) { + this.promise.resolve(); + return this.promise; + } + + this.refSet = new RefSet(); + // Setup the initial nodes to visit. + var nodesToVisit = []; + for (var i = 0; i < keys.length; i++) { + nodesToVisit.push(this.obj[keys[i]]); + } + + this.walk(nodesToVisit); + return this.promise; + }, + + walk: function ObjectLoader_walk(nodesToVisit) { + var nodesToRevisit = []; + var pendingRequests = []; + // DFS walk of the object graph. + while (nodesToVisit.length) { + var currentNode = nodesToVisit.pop(); + + // Only references or chunked streams can cause missing data exceptions. + if (isRef(currentNode)) { + // Skip nodes that have already been visited. + if (this.refSet.has(currentNode)) { + continue; + } + try { + var ref = currentNode; + this.refSet.put(ref); + currentNode = this.xref.fetch(currentNode); + } catch (e) { + if (!(e instanceof MissingDataException)) { + throw e; + } + nodesToRevisit.push(currentNode); + pendingRequests.push({ begin: e.begin, end: e.end }); + } + } + if (currentNode instanceof ChunkedStream && + currentNode.getMissingChunks().length) { + nodesToRevisit.push(currentNode); + pendingRequests.push({ + begin: currentNode.start, + end: currentNode.end + }); + } + + addChildren(currentNode, nodesToVisit); + } + + if (pendingRequests.length) { + this.xref.stream.manager.requestRanges(pendingRequests, + function pendingRequestCallback() { + nodesToVisit = nodesToRevisit; + for (var i = 0; i < nodesToRevisit.length; i++) { + var node = nodesToRevisit[i]; + // Remove any reference nodes from the currrent refset so they + // aren't skipped when we revist them. + if (isRef(node)) { + this.refSet.remove(node); + } + } + this.walk(nodesToVisit); + }.bind(this)); + return; + } + // Everything is loaded. + this.refSet = null; + this.promise.resolve(); + } + + }; + + return ObjectLoader; +})(); + +