Load all resources before getOperatorList/getTextContent.

This commit is contained in:
Brendan Dahl 2013-06-04 17:57:52 -07:00
parent 8e1db0bb7c
commit 01ce3d056c
5 changed files with 684 additions and 448 deletions

View File

@ -16,7 +16,7 @@
*/ */
/* globals Util, isDict, isName, stringToPDFString, TODO, Dict, Stream, /* globals Util, isDict, isName, stringToPDFString, TODO, Dict, Stream,
stringToBytes, PDFJS, isWorker, assert, NotImplementedException, stringToBytes, PDFJS, isWorker, assert, NotImplementedException,
Promise, isArray */ Promise, isArray, ObjectLoader */
'use strict'; 'use strict';
@ -139,6 +139,20 @@ var Annotation = (function AnnotationClosure() {
); );
}, },
loadResources: function(keys) {
var promise = new Promise();
this.appearance.dict.getAsync('Resources').then(function(resources) {
var objectLoader = new ObjectLoader(resources.map,
keys,
resources.xref);
objectLoader.load().then(function() {
promise.resolve(resources);
});
}.bind(this));
return promise;
},
getOperatorList: function Annotation_getToOperatorList(evaluator) { getOperatorList: function Annotation_getToOperatorList(evaluator) {
var promise = new Promise(); var promise = new Promise();
@ -157,26 +171,37 @@ var Annotation = (function AnnotationClosure() {
var data = this.data; var data = this.data;
var appearanceDict = this.appearance.dict; var appearanceDict = this.appearance.dict;
var resources = appearanceDict.get('Resources'); var resourcesPromise = this.loadResources([
'ExtGState',
'ColorSpace',
'Pattern',
'Shading',
'XObject',
'Font'
// ProcSet
// Properties
]);
var bbox = appearanceDict.get('BBox') || [0, 0, 1, 1]; var bbox = appearanceDict.get('BBox') || [0, 0, 1, 1];
var matrix = appearanceDict.get('Matrix') || [1, 0, 0, 1, 0 ,0]; var matrix = appearanceDict.get('Matrix') || [1, 0, 0, 1, 0 ,0];
var transform = getTransformMatrix(data.rect, bbox, matrix); var transform = getTransformMatrix(data.rect, bbox, matrix);
var border = data.border; var border = data.border;
var listPromise = evaluator.getOperatorList(this.appearance, resources); resourcesPromise.then(function(resources) {
listPromise.then(function(appearanceStreamData) { var listPromise = evaluator.getOperatorList(this.appearance, resources);
var fnArray = appearanceStreamData.queue.fnArray; listPromise.then(function(appearanceStreamData) {
var argsArray = appearanceStreamData.queue.argsArray; var fnArray = appearanceStreamData.queue.fnArray;
var argsArray = appearanceStreamData.queue.argsArray;
fnArray.unshift('beginAnnotation'); fnArray.unshift('beginAnnotation');
argsArray.unshift([data.rect, transform, matrix]); argsArray.unshift([data.rect, transform, matrix]);
fnArray.push('endAnnotation'); fnArray.push('endAnnotation');
argsArray.push([]); argsArray.push([]);
promise.resolve(appearanceStreamData); promise.resolve(appearanceStreamData);
}); });
}.bind(this));
return promise; return promise;
} }
@ -263,32 +288,27 @@ var Annotation = (function AnnotationClosure() {
var annotationsReadyPromise = new Promise(); var annotationsReadyPromise = new Promise();
var ensurePromises = []; var annotationPromises = [];
for (var i = 0, n = annotations.length; i < n; ++i) { for (var i = 0, n = annotations.length; i < n; ++i) {
var ensurePromise = pdfManager.ensure(annotations[i], annotationPromises.push(annotations[i].getOperatorList(partialEvaluator));
'getOperatorList',
[partialEvaluator]);
ensurePromises.push(ensurePromise);
} }
Promise.all(ensurePromises).then(function(listPromises) { Promise.all(annotationPromises).then(function(datas) {
Promise.all(listPromises).then(function(datas) { var fnArray = pageQueue.fnArray;
var fnArray = pageQueue.fnArray; var argsArray = pageQueue.argsArray;
var argsArray = pageQueue.argsArray; fnArray.push('beginAnnotations');
fnArray.push('beginAnnotations'); argsArray.push([]);
argsArray.push([]); for (var i = 0, n = datas.length; i < n; ++i) {
for (var i = 0, n = datas.length; i < n; ++i) { var annotationData = datas[i];
var annotationData = datas[i]; var annotationQueue = annotationData.queue;
var annotationQueue = annotationData.queue; Util.concatenateToArray(fnArray, annotationQueue.fnArray);
Util.concatenateToArray(fnArray, annotationQueue.fnArray); Util.concatenateToArray(argsArray, annotationQueue.argsArray);
Util.concatenateToArray(argsArray, annotationQueue.argsArray); Util.extendObj(dependencies, annotationData.dependencies);
Util.extendObj(dependencies, annotationData.dependencies); }
} fnArray.push('endAnnotations');
fnArray.push('endAnnotations'); argsArray.push([]);
argsArray.push([]);
annotationsReadyPromise.resolve(); annotationsReadyPromise.resolve();
}, reject);
}, reject); }, reject);
return annotationsReadyPromise; return annotationsReadyPromise;

View File

@ -20,7 +20,7 @@
'use strict'; 'use strict';
var ChunkedStream = (function ChunkedStreamClosure() { var ChunkedStream = (function ChunkedStreamClosure() {
function ChunkedStream(length, chunkSize) { function ChunkedStream(length, chunkSize, manager) {
this.bytes = new Uint8Array(length); this.bytes = new Uint8Array(length);
this.start = 0; this.start = 0;
this.pos = 0; this.pos = 0;
@ -29,6 +29,7 @@ var ChunkedStream = (function ChunkedStreamClosure() {
this.loadedChunks = []; this.loadedChunks = [];
this.numChunksLoaded = 0; this.numChunksLoaded = 0;
this.numChunks = Math.ceil(length / chunkSize); this.numChunks = Math.ceil(length / chunkSize);
this.manager = manager;
} }
// required methods for a stream. if a particular stream does not // required methods for a stream. if a particular stream does not
@ -178,6 +179,18 @@ var ChunkedStream = (function ChunkedStreamClosure() {
makeSubStream: function ChunkedStream_makeSubStream(start, length, dict) { makeSubStream: function ChunkedStream_makeSubStream(start, length, dict) {
function ChunkedStreamSubstream() {} function ChunkedStreamSubstream() {}
ChunkedStreamSubstream.prototype = Object.create(this); ChunkedStreamSubstream.prototype = Object.create(this);
ChunkedStreamSubstream.prototype.getMissingChunks = function() {
var chunkSize = this.chunkSize;
var beginChunk = Math.floor(this.start / chunkSize);
var endChunk = Math.floor((this.end - 1) / chunkSize) + 1;
var missingChunks = [];
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!(chunk in this.loadedChunks)) {
missingChunks.push(chunk);
}
}
return missingChunks;
};
var subStream = new ChunkedStreamSubstream(); var subStream = new ChunkedStreamSubstream();
subStream.pos = subStream.start = start; subStream.pos = subStream.start = start;
subStream.end = start + length || this.end; subStream.end = start + length || this.end;
@ -195,7 +208,7 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
function ChunkedStreamManager(length, chunkSize, url, args) { function ChunkedStreamManager(length, chunkSize, url, args) {
var self = this; var self = this;
this.stream = new ChunkedStream(length, chunkSize); this.stream = new ChunkedStream(length, chunkSize, this);
this.length = length; this.length = length;
this.chunkSize = chunkSize; this.chunkSize = chunkSize;
this.url = url; this.url = url;
@ -248,50 +261,26 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
// contiguous ranges to load in as few requests as possible // contiguous ranges to load in as few requests as possible
requestAllChunks: function ChunkedStreamManager_requestAllChunks() { requestAllChunks: function ChunkedStreamManager_requestAllChunks() {
var missingChunks = this.stream.getMissingChunks(); var missingChunks = this.stream.getMissingChunks();
var chunksToRequest = []; this.requestChunks(missingChunks);
for (var i = 0, n = missingChunks.length; i < n; ++i) {
var chunk = missingChunks[i];
if (!(chunk in this.requestsByChunk)) {
this.requestsByChunk[chunk] = [];
chunksToRequest.push(chunk);
}
}
var groupedChunks = this.groupChunks(chunksToRequest);
for (var i = 0, n = groupedChunks.length; i < n; ++i) {
var groupedChunk = groupedChunks[i];
var begin = groupedChunk.beginChunk * this.chunkSize;
var end = Math.min(groupedChunk.endChunk * this.chunkSize, this.length);
this.sendRequest(begin, end);
}
return this.loadedStream; return this.loadedStream;
}, },
getStream: function ChunkedStreamManager_getStream() { requestChunks: function ChunkedStreamManager_requestChunks(chunks,
return this.stream; callback) {
},
// Loads any chunks in the requested range that are not yet loaded
requestRange: function ChunkedStreamManager_requestRange(
begin, end, callback) {
end = Math.min(end, this.length);
var beginChunk = this.getBeginChunk(begin);
var endChunk = this.getEndChunk(end);
var requestId = this.currRequestId++; var requestId = this.currRequestId++;
var chunksNeeded; var chunksNeeded;
this.chunksNeededByRequest[requestId] = chunksNeeded = {}; this.chunksNeededByRequest[requestId] = chunksNeeded = {};
for (var chunk = beginChunk; chunk < endChunk; ++chunk) { for (var i = 0, ii = chunks.length; i < ii; i++) {
if (!this.stream.hasChunk(chunk)) { if (!this.stream.hasChunk(chunks[i])) {
chunksNeeded[chunk] = true; chunksNeeded[chunks[i]] = true;
} }
} }
if (isEmptyObj(chunksNeeded)) { if (isEmptyObj(chunksNeeded)) {
callback(); if (callback) {
callback();
}
return; return;
} }
@ -321,6 +310,46 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
} }
}, },
getStream: function ChunkedStreamManager_getStream() {
return this.stream;
},
// Loads any chunks in the requested range that are not yet loaded
requestRange: function ChunkedStreamManager_requestRange(
begin, end, callback) {
end = Math.min(end, this.length);
var beginChunk = this.getBeginChunk(begin);
var endChunk = this.getEndChunk(end);
var chunks = [];
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
chunks.push(chunk);
}
this.requestChunks(chunks, callback);
},
requestRanges: function ChunkedStreamManager_requestRanges(ranges,
callback) {
ranges = ranges || [];
var chunksToRequest = [];
for (var i = 0; i < ranges.length; i++) {
var beginChunk = this.getBeginChunk(ranges[i].begin);
var endChunk = this.getEndChunk(ranges[i].end);
for (var chunk = beginChunk; chunk < endChunk; ++chunk) {
if (chunksToRequest.indexOf(chunk) < 0) {
chunksToRequest.push(chunk);
}
}
}
chunksToRequest.sort(function(a, b) { return a - b; });
this.requestChunks(chunksToRequest, callback);
},
// Groups a sorted array of chunks into as few continguous larger // Groups a sorted array of chunks into as few continguous larger
// chunks as possible // chunks as possible
groupChunks: function ChunkedStreamManager_groupChunks(chunks) { groupChunks: function ChunkedStreamManager_groupChunks(chunks) {
@ -409,9 +438,7 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
nextEmptyChunk = this.stream.nextEmptyChunk(endChunk); nextEmptyChunk = this.stream.nextEmptyChunk(endChunk);
} }
if (isInt(nextEmptyChunk)) { if (isInt(nextEmptyChunk)) {
var nextEmptyByte = nextEmptyChunk * this.chunkSize; this.requestChunks([nextEmptyChunk]);
this.requestRange(nextEmptyByte, nextEmptyByte + this.chunkSize,
function() {});
} }
} }
@ -419,7 +446,9 @@ var ChunkedStreamManager = (function ChunkedStreamManagerClosure() {
var requestId = loadedRequests[i]; var requestId = loadedRequests[i];
var callback = this.callbacksByRequest[requestId]; var callback = this.callbacksByRequest[requestId];
delete this.callbacksByRequest[requestId]; delete this.callbacksByRequest[requestId];
callback(); if (callback) {
callback();
}
} }
this.msgHandler.send('DocProgress', { this.msgHandler.send('DocProgress', {

View File

@ -18,7 +18,7 @@
isArrayBuffer, isDict, isName, isStream, isString, Lexer, isArrayBuffer, isDict, isName, isStream, isString, Lexer,
Linearization, NullStream, PartialEvaluator, shadow, Stream, Linearization, NullStream, PartialEvaluator, shadow, Stream,
StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef, StreamsSequenceStream, stringToPDFString, TODO, Util, warn, XRef,
MissingDataException, Promise, Annotation */ MissingDataException, Promise, Annotation, ObjectLoader */
'use strict'; 'use strict';
@ -51,6 +51,7 @@ var Page = (function PageClosure() {
font: 0, font: 0,
obj: 0 obj: 0
}; };
this.resourcesPromise = null;
} }
Page.prototype = { Page.prototype = {
@ -133,6 +134,22 @@ var Page = (function PageClosure() {
} }
return stream; return stream;
}, },
loadResources: function(keys) {
if (!this.resourcesPromise) {
// TODO: add async inheritPageProp and remove this.
this.resourcesPromise = this.pdfManager.ensure(this, 'resources');
}
var promise = new Promise();
this.resourcesPromise.then(function resourceSuccess() {
var objectLoader = new ObjectLoader(this.resources.map,
keys,
this.xref);
objectLoader.load().then(function objectLoaderSuccess() {
promise.resolve();
});
}.bind(this));
return promise;
},
getOperatorList: function Page_getOperatorList(handler) { getOperatorList: function Page_getOperatorList(handler) {
var self = this; var self = this;
var promise = new Promise(); var promise = new Promise();
@ -146,7 +163,16 @@ var Page = (function PageClosure() {
var pdfManager = this.pdfManager; var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]); []);
var resourcesPromise = pdfManager.ensure(this, 'resources'); var resourcesPromise = this.loadResources([
'ExtGState',
'ColorSpace',
'Pattern',
'Shading',
'XObject',
'Font',
// ProcSet
// Properties
]);
var partialEvaluator = new PartialEvaluator( var partialEvaluator = new PartialEvaluator(
pdfManager, this.xref, handler, pdfManager, this.xref, handler,
@ -157,14 +183,10 @@ var Page = (function PageClosure() {
[contentStreamPromise, resourcesPromise], reject); [contentStreamPromise, resourcesPromise], reject);
dataPromises.then(function(data) { dataPromises.then(function(data) {
var contentStream = data[0]; var contentStream = data[0];
var resources = data[1];
pdfManager.ensure(partialEvaluator, 'getOperatorList', partialEvaluator.getOperatorList(contentStream, self.resources).then(
[contentStream, resources]).then( function(data) {
function(opListPromise) { pageListPromise.resolve(data);
opListPromise.then(function(data) {
pageListPromise.resolve(data);
});
}, },
reject reject
); );
@ -175,6 +197,7 @@ var Page = (function PageClosure() {
var pageData = datas[0]; var pageData = datas[0];
var pageQueue = pageData.queue; var pageQueue = pageData.queue;
var annotations = datas[1]; var annotations = datas[1];
if (annotations.length === 0) { if (annotations.length === 0) {
PartialEvaluator.optimizeQueue(pageQueue); PartialEvaluator.optimizeQueue(pageQueue);
promise.resolve(pageData); promise.resolve(pageData);
@ -186,6 +209,7 @@ var Page = (function PageClosure() {
annotations, pageQueue, pdfManager, dependencies, partialEvaluator); annotations, pageQueue, pdfManager, dependencies, partialEvaluator);
annotationsReadyPromise.then(function () { annotationsReadyPromise.then(function () {
PartialEvaluator.optimizeQueue(pageQueue); PartialEvaluator.optimizeQueue(pageQueue);
promise.resolve(pageData); promise.resolve(pageData);
}, reject); }, reject);
}, reject); }, reject);
@ -205,27 +229,24 @@ var Page = (function PageClosure() {
var pdfManager = this.pdfManager; var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream', var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]); []);
var resourcesPromise = new Promise();
pdfManager.ensure(this, 'resources').then(function(resources) { var resourcesPromise = this.loadResources([
pdfManager.ensure(self.xref, 'fetchIfRef', [resources]).then( 'ExtGState',
function(resources) { 'XObject',
resourcesPromise.resolve(resources); 'Font'
} ]);
);
});
var dataPromises = Promise.all([contentStreamPromise, var dataPromises = Promise.all([contentStreamPromise,
resourcesPromise]); resourcesPromise]);
dataPromises.then(function(data) { dataPromises.then(function(data) {
var contentStream = data[0]; var contentStream = data[0];
var resources = data[1];
var partialEvaluator = new PartialEvaluator( var partialEvaluator = new PartialEvaluator(
pdfManager, self.xref, handler, pdfManager, self.xref, handler,
self.pageIndex, 'p' + self.pageIndex + '_', self.pageIndex, 'p' + self.pageIndex + '_',
self.idCounters); self.idCounters);
partialEvaluator.getTextContent( partialEvaluator.getTextContent(
contentStream, resources).then(function(bidiTexts) { contentStream, self.resources).then(function(bidiTexts) {
textContentPromise.resolve({ textContentPromise.resolve({
bidiTexts: bidiTexts bidiTexts: bidiTexts
}); });
@ -282,7 +303,7 @@ var PDFDocument = (function PDFDocumentClosure() {
assertWellFormed(stream.length > 0, 'stream must have data'); assertWellFormed(stream.length > 0, 'stream must have data');
this.pdfManager = pdfManager; this.pdfManager = pdfManager;
this.stream = stream; this.stream = stream;
var xref = new XRef(this.stream, password); var xref = new XRef(this.stream, password, pdfManager);
this.xref = xref; this.xref = xref;
} }

View File

@ -540,9 +540,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
try { try {
translated = this.translateFont(font, xref); translated = this.translateFont(font, xref);
} catch (e) { } catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
translated = new ErrorFont(e instanceof Error ? e.message : e); translated = new ErrorFont(e instanceof Error ? e.message : e);
} }
font.translated = translated; font.translated = translated;
@ -611,221 +608,208 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var parser = new Parser(new Lexer(stream, OP_MAP), false, xref); var parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
var promise = new Promise(); var promise = new Promise();
function parseCommands() { var args = [];
try { while (true) {
parser.restoreState();
var args = [];
while (true) {
var obj = parser.getObj(); var obj = parser.getObj();
if (isEOF(obj)) { if (isEOF(obj)) {
break; break;
}
if (isCmd(obj)) {
var cmd = obj.cmd;
// Check that the command is valid
var opSpec = OP_MAP[cmd];
if (!opSpec) {
warn('Unknown command "' + cmd + '"');
continue;
}
var fn = opSpec.fnName;
// Validate the number of arguments for the command
if (opSpec.variableArgs) {
if (args.length > opSpec.numArgs) {
info('Command ' + fn + ': expected [0,' + opSpec.numArgs +
'] args, but received ' + args.length + ' args');
} }
} else {
if (args.length < opSpec.numArgs) {
// If we receive too few args, it's not possible to possible
// to execute the command, so skip the command
info('Command ' + fn + ': because expected ' +
opSpec.numArgs + ' args, but received ' + args.length +
' args; skipping');
args = [];
continue;
} else if (args.length > opSpec.numArgs) {
info('Command ' + fn + ': expected ' + opSpec.numArgs +
' args, but received ' + args.length + ' args');
}
}
if (isCmd(obj)) { // TODO figure out how to type-check vararg functions
var cmd = obj.cmd;
// Check that the command is valid if ((cmd == 'SCN' || cmd == 'scn') &&
var opSpec = OP_MAP[cmd]; !args[args.length - 1].code) {
if (!opSpec) { // compile tiling patterns
warn('Unknown command "' + cmd + '"'); var patternName = args[args.length - 1];
continue; // SCN/scn applies patterns along with normal colors
} var pattern;
if (isName(patternName) &&
(pattern = patterns.get(patternName.name))) {
var fn = opSpec.fnName; var dict = isStream(pattern) ? pattern.dict : pattern;
var typeNum = dict.get('PatternType');
// Validate the number of arguments for the command if (typeNum == TILING_PATTERN) {
if (opSpec.variableArgs) { var patternPromise = self.handleTilingType(
if (args.length > opSpec.numArgs) { fn, args, resources, pattern, dict);
info('Command ' + fn + ': expected [0,' + opSpec.numArgs +
'] args, but received ' + args.length + ' args');
}
} else {
if (args.length < opSpec.numArgs) {
// If we receive too few args, it's not possible to possible
// to execute the command, so skip the command
info('Command ' + fn + ': because expected ' +
opSpec.numArgs + ' args, but received ' + args.length +
' args; skipping');
args = [];
continue;
} else if (args.length > opSpec.numArgs) {
info('Command ' + fn + ': expected ' + opSpec.numArgs +
' args, but received ' + args.length + ' args');
}
}
// TODO figure out how to type-check vararg functions
if ((cmd == 'SCN' || cmd == 'scn') &&
!args[args.length - 1].code) {
// compile tiling patterns
var patternName = args[args.length - 1];
// SCN/scn applies patterns along with normal colors
var pattern;
if (isName(patternName) &&
(pattern = patterns.get(patternName.name))) {
var dict = isStream(pattern) ? pattern.dict : pattern;
var typeNum = dict.get('PatternType');
if (typeNum == TILING_PATTERN) {
var patternPromise = self.handleTilingType(
fn, args, resources, pattern, dict);
fn = 'promise';
args = [patternPromise];
} else if (typeNum == SHADING_PATTERN) {
var shading = dict.get('Shading');
var matrix = dict.get('Matrix');
var pattern = Pattern.parseShading(shading, matrix, xref,
resources);
args = pattern.getIR();
} else {
error('Unkown PatternType ' + typeNum);
}
}
} else if (cmd == 'Do' && !args[0].code) {
// eagerly compile XForm objects
var name = args[0].name;
var xobj = xobjs.get(name);
if (xobj) {
assertWellFormed(
isStream(xobj), 'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' == type.name) {
fn = 'promise';
args = [self.buildFormXObject(resources, xobj)];
} else if ('Image' == type.name) {
var data = self.buildPaintImageXObject(
resources, xobj, false);
Util.extendObj(dependencies, data.dependencies);
self.insertDependencies(queue, data.dependencies);
fn = data.fn;
args = data.args;
} else {
error('Unhandled XObject subtype ' + type.name);
}
}
} else if (cmd == 'Tf') { // eagerly collect all fonts
fn = 'promise'; fn = 'promise';
args = [self.handleSetFont(resources, args)]; args = [patternPromise];
} else if (cmd == 'EI') { } else if (typeNum == SHADING_PATTERN) {
var shading = dict.get('Shading');
var matrix = dict.get('Matrix');
var pattern = Pattern.parseShading(shading, matrix, xref,
resources);
args = pattern.getIR();
} else {
error('Unkown PatternType ' + typeNum);
}
}
} else if (cmd == 'Do' && !args[0].code) {
// eagerly compile XForm objects
var name = args[0].name;
var xobj = xobjs.get(name);
if (xobj) {
assertWellFormed(
isStream(xobj), 'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' == type.name) {
fn = 'promise';
args = [self.buildFormXObject(resources, xobj)];
} else if ('Image' == type.name) {
var data = self.buildPaintImageXObject( var data = self.buildPaintImageXObject(
resources, args[0], true); resources, xobj, false);
Util.extendObj(dependencies, data.dependencies); Util.extendObj(dependencies, data.dependencies);
self.insertDependencies(queue, data.dependencies); self.insertDependencies(queue, data.dependencies);
fn = data.fn; fn = data.fn;
args = data.args; args = data.args;
}
switch (fn) {
// Parse the ColorSpace data to a raw format.
case 'setFillColorSpace':
case 'setStrokeColorSpace':
args = [ColorSpace.parseToIR(args[0], xref, resources)];
break;
case 'shadingFill':
var shadingRes = resources.get('Shading');
if (!shadingRes)
error('No shading resource found');
var shading = shadingRes.get(args[0].name);
if (!shading)
error('No shading object found');
var shadingFill = Pattern.parseShading(
shading, null, xref, resources);
var patternIR = shadingFill.getIR();
args = [patternIR];
fn = 'shadingFill';
break;
case 'setGState':
var dictName = args[0];
var extGState = resources.get('ExtGState');
if (!isDict(extGState) || !extGState.has(dictName.name))
break;
var gState = extGState.get(dictName.name);
fn = 'promise';
args = [self.setGState(resources, gState)];
} // switch
fnArray.push(fn);
argsArray.push(args);
args = [];
parser.saveState();
} else if (obj !== null && obj !== undefined) {
args.push(obj instanceof Dict ? obj.getAll() : obj);
assertWellFormed(args.length <= 33, 'Too many arguments');
}
}
var subQueuePromises = [];
for (var i = 0; i < fnArray.length; ++i) {
if (fnArray[i] === 'promise') {
subQueuePromises.push(argsArray[i][0]);
}
}
Promise.all(subQueuePromises).then(function(datas) {
// TODO(mack): Optimize by using repositioning elements
// in original queue rather than creating new queue
for (var i = 0, n = datas.length; i < n; ++i) {
var data = datas[i];
var subQueue = data.queue;
queue.transparency = subQueue.transparency || queue.transparency;
Util.extendObj(dependencies, data.dependencies);
}
var newFnArray = [];
var newArgsArray = [];
var currOffset = 0;
var subQueueIdx = 0;
for (var i = 0, n = fnArray.length; i < n; ++i) {
var offset = i + currOffset;
if (fnArray[i] === 'promise') {
var data = datas[subQueueIdx++];
var subQueue = data.queue;
var subQueueFnArray = subQueue.fnArray;
var subQueueArgsArray = subQueue.argsArray;
for (var j = 0, nn = subQueueFnArray.length; j < nn; ++j) {
newFnArray[offset + j] = subQueueFnArray[j];
newArgsArray[offset + j] = subQueueArgsArray[j];
}
currOffset += subQueueFnArray.length - 1;
} else { } else {
newFnArray[offset] = fnArray[i]; error('Unhandled XObject subtype ' + type.name);
newArgsArray[offset] = argsArray[i];
} }
} }
} else if (cmd == 'Tf') { // eagerly collect all fonts
promise.resolve({ fn = 'promise';
queue: { args = [self.handleSetFont(resources, args)];
fnArray: newFnArray, } else if (cmd == 'EI') {
argsArray: newArgsArray, var data = self.buildPaintImageXObject(
transparency: queue.transparency resources, args[0], true);
}, Util.extendObj(dependencies, data.dependencies);
dependencies: dependencies self.insertDependencies(queue, data.dependencies);
}); fn = data.fn;
}); args = data.args;
} catch (e) {
if (!(e instanceof MissingDataException)) {
throw e;
} }
self.pdfManager.requestRange(e.begin, e.end).then(parseCommands); switch (fn) {
// Parse the ColorSpace data to a raw format.
case 'setFillColorSpace':
case 'setStrokeColorSpace':
args = [ColorSpace.parseToIR(args[0], xref, resources)];
break;
case 'shadingFill':
var shadingRes = resources.get('Shading');
if (!shadingRes)
error('No shading resource found');
var shading = shadingRes.get(args[0].name);
if (!shading)
error('No shading object found');
var shadingFill = Pattern.parseShading(
shading, null, xref, resources);
var patternIR = shadingFill.getIR();
args = [patternIR];
fn = 'shadingFill';
break;
case 'setGState':
var dictName = args[0];
var extGState = resources.get('ExtGState');
if (!isDict(extGState) || !extGState.has(dictName.name))
break;
var gState = extGState.get(dictName.name);
fn = 'promise';
args = [self.setGState(resources, gState)];
} // switch
fnArray.push(fn);
argsArray.push(args);
args = [];
parser.saveState();
} else if (obj !== null && obj !== undefined) {
args.push(obj instanceof Dict ? obj.getAll() : obj);
assertWellFormed(args.length <= 33, 'Too many arguments');
} }
} }
parser.saveState();
parseCommands(); var subQueuePromises = [];
for (var i = 0; i < fnArray.length; ++i) {
if (fnArray[i] === 'promise') {
subQueuePromises.push(argsArray[i][0]);
}
}
Promise.all(subQueuePromises).then(function(datas) {
// TODO(mack): Optimize by using repositioning elements
// in original queue rather than creating new queue
for (var i = 0, n = datas.length; i < n; ++i) {
var data = datas[i];
var subQueue = data.queue;
queue.transparency = subQueue.transparency || queue.transparency;
Util.extendObj(dependencies, data.dependencies);
}
var newFnArray = [];
var newArgsArray = [];
var currOffset = 0;
var subQueueIdx = 0;
for (var i = 0, n = fnArray.length; i < n; ++i) {
var offset = i + currOffset;
if (fnArray[i] === 'promise') {
var data = datas[subQueueIdx++];
var subQueue = data.queue;
var subQueueFnArray = subQueue.fnArray;
var subQueueArgsArray = subQueue.argsArray;
for (var j = 0, nn = subQueueFnArray.length; j < nn; ++j) {
newFnArray[offset + j] = subQueueFnArray[j];
newArgsArray[offset + j] = subQueueArgsArray[j];
}
currOffset += subQueueFnArray.length - 1;
} else {
newFnArray[offset] = fnArray[i];
newArgsArray[offset] = argsArray[i];
}
}
promise.resolve({
queue: {
fnArray: newFnArray,
argsArray: newArgsArray,
transparency: queue.transparency
},
dependencies: dependencies
});
});
return promise; return promise;
}, },
@ -863,161 +847,148 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var chunkPromises = []; var chunkPromises = [];
var fontPromise; var fontPromise;
function parseCommands() { var args = [];
try {
parser.restoreState();
var args = [];
while (true) { while (true) {
var obj = parser.getObj(); var obj = parser.getObj();
if (isEOF(obj)) { if (isEOF(obj)) {
break; break;
}
if (isCmd(obj)) {
var cmd = obj.cmd;
switch (cmd) {
// TODO: Add support for SAVE/RESTORE and XFORM here.
case 'Tf':
fontPromise = handleSetFont(args[0].name, null, resources);
//.translated;
break;
case 'TJ':
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(items, chunkPromise, font) {
var chunk = '';
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font);
} else if (items[j] < 0 && font.spaceWidth > 0) {
var fakeSpaces = -items[j] / font.spaceWidth;
if (fakeSpaces > MULTI_SPACE_FACTOR) {
fakeSpaces = Math.round(fakeSpaces);
while (fakeSpaces--) {
chunk += ' ';
}
} else if (fakeSpaces > SPACE_FACTOR) {
chunk += ' ';
}
}
}
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case 'Tj':
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case '\'':
// For search, adding a extra white space for line breaks
// would be better here, but that causes too much spaces in
// the text-selection divs.
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case '"':
// Note comment in "'"
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[2], chunkPromise));
break;
case 'Do':
if (args[0].code) {
break;
}
if (!xobjs) {
xobjs = resources.get('XObject') || new Dict();
}
var name = args[0].name;
var xobj = xobjs.get(name);
if (!xobj)
break;
assertWellFormed(isStream(xobj),
'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' !== type.name)
break;
var chunkPromise = self.getTextContent(
xobj,
xobj.dict.get('Resources') || resources
);
chunkPromises.push(chunkPromise);
break;
case 'gs':
var dictName = args[0];
var extGState = resources.get('ExtGState');
if (!isDict(extGState) || !extGState.has(dictName.name))
break;
var gsState = extGState.get(dictName.name);
for (var i = 0; i < gsState.length; i++) {
if (gsState[i] === 'Font') {
fontPromise = handleSetFont(
args[0].name, null, resources);
}
}
break;
} // switch
args = [];
parser.saveState();
} else if (obj !== null && obj !== undefined) {
assertWellFormed(args.length <= 33, 'Too many arguments');
args.push(obj);
}
} // while
Promise.all(chunkPromises).then(function(datas) {
var bidiTexts = [];
for (var i = 0, n = datas.length; i < n; ++i) {
var bidiText = datas[i];
if (!bidiText) {
continue;
} else if (isArray(bidiText)) {
Util.concatenateToArray(bidiTexts, bidiText);
} else {
bidiTexts.push(bidiText);
}
}
statePromise.resolve(bidiTexts);
});
} catch (e) {
if (!(e instanceof MissingDataException)) {
throw e;
}
self.pdfManager.requestRange(e.begin, e.end).then(parseCommands);
} }
}
parser.saveState(); if (isCmd(obj)) {
parseCommands(); var cmd = obj.cmd;
switch (cmd) {
// TODO: Add support for SAVE/RESTORE and XFORM here.
case 'Tf':
fontPromise = handleSetFont(args[0].name, null, resources);
//.translated;
break;
case 'TJ':
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(items, chunkPromise, font) {
var chunk = '';
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font);
} else if (items[j] < 0 && font.spaceWidth > 0) {
var fakeSpaces = -items[j] / font.spaceWidth;
if (fakeSpaces > MULTI_SPACE_FACTOR) {
fakeSpaces = Math.round(fakeSpaces);
while (fakeSpaces--) {
chunk += ' ';
}
} else if (fakeSpaces > SPACE_FACTOR) {
chunk += ' ';
}
}
}
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case 'Tj':
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case '\'':
// For search, adding a extra white space for line breaks
// would be better here, but that causes too much spaces in
// the text-selection divs.
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[0], chunkPromise));
break;
case '"':
// Note comment in "'"
var chunkPromise = new Promise();
chunkPromises.push(chunkPromise);
fontPromise.then(function(charCodes, chunkPromise, font) {
var chunk = fontCharsToUnicode(charCodes, font);
chunkPromise.resolve(
getBidiText(chunk, -1, font.vertical));
}.bind(null, args[2], chunkPromise));
break;
case 'Do':
if (args[0].code) {
break;
}
if (!xobjs) {
xobjs = resources.get('XObject') || new Dict();
}
var name = args[0].name;
var xobj = xobjs.get(name);
if (!xobj)
break;
assertWellFormed(isStream(xobj),
'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' !== type.name)
break;
var chunkPromise = self.getTextContent(
xobj,
xobj.dict.get('Resources') || resources
);
chunkPromises.push(chunkPromise);
break;
case 'gs':
var dictName = args[0];
var extGState = resources.get('ExtGState');
if (!isDict(extGState) || !extGState.has(dictName.name))
break;
var gsState = extGState.get(dictName.name);
for (var i = 0; i < gsState.length; i++) {
if (gsState[i] === 'Font') {
fontPromise = handleSetFont(
args[0].name, null, resources);
}
}
break;
} // switch
args = [];
parser.saveState();
} else if (obj !== null && obj !== undefined) {
assertWellFormed(args.length <= 33, 'Too many arguments');
args.push(obj);
}
} // while
Promise.all(chunkPromises).then(function(datas) {
var bidiTexts = [];
for (var i = 0, n = datas.length; i < n; ++i) {
var bidiText = datas[i];
if (!bidiText) {
continue;
} else if (isArray(bidiText)) {
Util.concatenateToArray(bidiTexts, bidiText);
} else {
bidiTexts.push(bidiText);
}
}
statePromise.resolve(bidiTexts);
});
return statePromise; return statePromise;
}, },

View File

@ -18,7 +18,8 @@
InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef, InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef,
isStream, JpegStream, Lexer, log, Page, Parser, Promise, shadow, isStream, JpegStream, Lexer, log, Page, Parser, Promise, shadow,
stringToPDFString, stringToUTF8String, warn, isString, assert, stringToPDFString, stringToUTF8String, warn, isString, assert,
Promise, MissingDataException, XRefParseException, Stream */ Promise, MissingDataException, XRefParseException, Stream,
ChunkedStream */
'use strict'; 'use strict';
@ -86,6 +87,38 @@ var Dict = (function DictClosure() {
return xref ? xref.fetchIfRef(value) : value; return xref ? xref.fetchIfRef(value) : value;
}, },
// Same as get(), but returns a promise and uses fetchIfRefAsync().
getAsync: function Dict_getAsync(key1, key2, key3) {
var value;
var promise;
var xref = this.xref;
if (typeof (value = this.map[key1]) !== undefined || key1 in this.map ||
typeof key2 === undefined) {
if (xref) {
return xref.fetchIfRefAsync(value);
}
promise = new Promise();
promise.resolve(value);
return promise;
}
if (typeof (value = this.map[key2]) !== undefined || key2 in this.map ||
typeof key3 === undefined) {
if (xref) {
return xref.fetchIfRefAsync(value);
}
promise = new Promise();
promise.resolve(value);
return promise;
}
value = this.map[key3] || null;
if (xref) {
return xref.fetchIfRefAsync(value);
}
promise = new Promise();
promise.resolve(value);
return promise;
},
// no dereferencing // no dereferencing
getRaw: function Dict_getRaw(key) { getRaw: function Dict_getRaw(key) {
return this.map[key]; return this.map[key];
@ -139,11 +172,15 @@ var RefSet = (function RefSetClosure() {
RefSet.prototype = { RefSet.prototype = {
has: function RefSet_has(ref) { has: function RefSet_has(ref) {
return !!this.dict['R' + ref.num + '.' + ref.gen]; return ('R' + ref.num + '.' + ref.gen) in this.dict;
}, },
put: function RefSet_put(ref) { put: function RefSet_put(ref) {
this.dict['R' + ref.num + '.' + ref.gen] = ref; this.dict['R' + ref.num + '.' + ref.gen] = true;
},
remove: function RefSet_remove(ref) {
delete this.dict['R' + ref.num + '.' + ref.gen];
} }
}; };
@ -811,7 +848,6 @@ var XRef = (function XRefClosure() {
if (e instanceof MissingDataException) { if (e instanceof MissingDataException) {
throw e; throw e;
} }
log('(while reading XRef): ' + e); log('(while reading XRef): ' + e);
} }
@ -938,6 +974,30 @@ var XRef = (function XRefClosure() {
} }
return e; return e;
}, },
fetchIfRefAsync: function XRef_fetchIfRefAsync(obj) {
if (!isRef(obj)) {
var promise = new Promise();
promise.resolve(obj);
return promise;
}
return this.fetchAsync(obj);
},
fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) {
var promise = new Promise();
var tryFetch = function (promise) {
try {
promise.resolve(this.fetch(ref, suppressEncryption));
} catch (e) {
if (e instanceof MissingDataException) {
this.stream.manager.requestRange(e.begin, e.end, tryFetch);
return;
}
promise.reject(e);
}
}.bind(this, promise);
tryFetch();
return promise;
},
getCatalogObj: function XRef_getCatalogObj() { getCatalogObj: function XRef_getCatalogObj() {
return this.root; return this.root;
} }
@ -1114,3 +1174,138 @@ var PDFObjects = (function PDFObjectsClosure() {
return PDFObjects; return PDFObjects;
})(); })();
/**
* A helper for loading missing data in object graphs. It traverses the graph
* depth first and queues up any objects that have missing data. Once it has
* has traversed as many objects that are available it attempts to bundle the
* missing data requests and then resume from the nodes that weren't ready.
*
* NOTE: It provides protection from circular references by keeping track of
* of loaded references. However, you must be careful not to load any graphs
* that have references to the catalog or other pages since that will cause the
* entire PDF document object graph to be traversed.
*/
var ObjectLoader = (function() {
function mayHaveChildren(value) {
return isRef(value) || isDict(value) || isArray(value) || isStream(value);
}
function addChildren(node, nodesToVisit) {
if (isDict(node) || isStream(node)) {
var map;
if (isDict(node)) {
map = node.map;
} else {
map = node.dict.map;
}
for (var key in map) {
var value = map[key];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
} else if (isArray(node)) {
for (var i = 0, ii = node.length; i < ii; i++) {
var value = node[i];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
}
}
function ObjectLoader(obj, keys, xref) {
this.obj = obj;
this.keys = keys;
this.xref = xref;
this.refSet = null;
}
ObjectLoader.prototype = {
load: function ObjectLoader_load() {
var keys = this.keys;
this.promise = new Promise();
// Don't walk the graph if all the data is already loaded.
if (!(this.xref.stream instanceof ChunkedStream) ||
this.xref.stream.getMissingChunks().length === 0) {
this.promise.resolve();
return this.promise;
}
this.refSet = new RefSet();
// Setup the initial nodes to visit.
var nodesToVisit = [];
for (var i = 0; i < keys.length; i++) {
nodesToVisit.push(this.obj[keys[i]]);
}
this.walk(nodesToVisit);
return this.promise;
},
walk: function ObjectLoader_walk(nodesToVisit) {
var nodesToRevisit = [];
var pendingRequests = [];
// DFS walk of the object graph.
while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop();
// Only references or chunked streams can cause missing data exceptions.
if (isRef(currentNode)) {
// Skip nodes that have already been visited.
if (this.refSet.has(currentNode)) {
continue;
}
try {
var ref = currentNode;
this.refSet.put(ref);
currentNode = this.xref.fetch(currentNode);
} catch (e) {
if (!(e instanceof MissingDataException)) {
throw e;
}
nodesToRevisit.push(currentNode);
pendingRequests.push({ begin: e.begin, end: e.end });
}
}
if (currentNode instanceof ChunkedStream &&
currentNode.getMissingChunks().length) {
nodesToRevisit.push(currentNode);
pendingRequests.push({
begin: currentNode.start,
end: currentNode.end
});
}
addChildren(currentNode, nodesToVisit);
}
if (pendingRequests.length) {
this.xref.stream.manager.requestRanges(pendingRequests,
function pendingRequestCallback() {
nodesToVisit = nodesToRevisit;
for (var i = 0; i < nodesToRevisit.length; i++) {
var node = nodesToRevisit[i];
// Remove any reference nodes from the currrent refset so they
// aren't skipped when we revist them.
if (isRef(node)) {
this.refSet.remove(node);
}
}
this.walk(nodesToVisit);
}.bind(this));
return;
}
// Everything is loaded.
this.refSet = null;
this.promise.resolve();
}
};
return ObjectLoader;
})();