Don't traverse all pages to get a single page.

This commit is contained in:
Brendan Dahl 2013-11-13 15:27:46 -08:00
parent 516d2e79be
commit c2d65fc4ab
8 changed files with 183 additions and 84 deletions

View File

@ -494,10 +494,6 @@ var PDFDocument = (function PDFDocumentClosure() {
return shadow(this, 'fingerprint', fileID); return shadow(this, 'fingerprint', fileID);
}, },
traversePages: function PDFDocument_traversePages() {
this.catalog.traversePages();
},
getPage: function PDFDocument_getPage(pageIndex) { getPage: function PDFDocument_getPage(pageIndex) {
return this.catalog.getPage(pageIndex); return this.catalog.getPage(pageIndex);
} }

View File

@ -217,14 +217,7 @@ var Catalog = (function CatalogClosure() {
assertWellFormed(isDict(this.catDict), assertWellFormed(isDict(this.catDict),
'catalog object is not a dictionary'); 'catalog object is not a dictionary');
// Stores state as we traverse the pages catalog so that we can resume
// parsing if an exception is thrown
this.traversePagesQueue = [{
pagesDict: this.toplevelPagesDict,
posInKids: 0
}];
this.pagePromises = []; this.pagePromises = [];
this.currPageIndex = 0;
} }
Catalog.prototype = { Catalog.prototype = {
@ -408,58 +401,146 @@ var Catalog = (function CatalogClosure() {
}, },
getPage: function Catalog_getPage(pageIndex) { getPage: function Catalog_getPage(pageIndex) {
if (pageIndex < 0 || pageIndex >= this.numPages ||
(pageIndex|0) !== pageIndex) {
var pagePromise = new Promise();
pagePromise.reject(new Error('Invalid page index'));
return pagePromise;
}
if (!(pageIndex in this.pagePromises)) { if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new Promise(); this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then(
function (a) {
var dict = a[0];
var ref = a[1];
return new Page(this.pdfManager, this.xref, pageIndex, dict, ref);
}.bind(this)
);
} }
return this.pagePromises[pageIndex]; return this.pagePromises[pageIndex];
}, },
// Traverses pages in DFS order so that pages are processed in increasing getPageDict: function Catalog_getPageDict(pageIndex) {
// order var promise = new Promise();
traversePages: function Catalog_traversePages() { var nodesToVisit = [this.catDict.getRaw('Pages')];
var queue = this.traversePagesQueue; var currentPageIndex = 0;
while (queue.length) { var xref = this.xref;
var queueItem = queue[queue.length - 1];
var pagesDict = queueItem.pagesDict;
var kids = pagesDict.get('Kids'); function next() {
assert(isArray(kids), 'page dictionary kids object is not an array'); while (nodesToVisit.length) {
if (queueItem.posInKids >= kids.length) { var currentNode = nodesToVisit.pop();
queue.pop();
continue;
}
var kidRef = kids[queueItem.posInKids];
assert(isRef(kidRef), 'page dictionary kid is not a reference');
var kid = this.xref.fetch(kidRef); if (isRef(currentNode)) {
if (isDict(kid, 'Page') || (isDict(kid) && !kid.has('Kids'))) { xref.fetchAsync(currentNode).then(function (obj) {
var pageIndex = this.currPageIndex++; if ((isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids')))) {
var page = new Page(this.pdfManager, this.xref, pageIndex, kid, if (pageIndex === currentPageIndex) {
kidRef); promise.resolve([obj, currentNode]);
if (!(pageIndex in this.pagePromises)) { } else {
this.pagePromises[pageIndex] = new Promise(); currentPageIndex++;
next();
}
return;
}
nodesToVisit.push(obj);
next();
}.bind(this), promise.reject.bind(promise));
return;
} }
this.pagePromises[pageIndex].resolve(page);
} else { // must be a child page dictionary // must be a child page dictionary
assert( assert(
isDict(kid), isDict(currentNode),
'page dictionary kid reference points to wrong type of object' 'page dictionary kid reference points to wrong type of object'
); );
var count = currentNode.get('Count');
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
queue.push({ var kids = currentNode.get('Kids');
pagesDict: kid, assert(isArray(kids), 'page dictionary kids object is not an array');
posInKids: 0 if (count === kids.length) {
}); // Nodes that don't have the page have been skipped and this is the
// bottom of the tree which means the page requested must be a
// descendant of this pages node. Ideally we would just resolve the
// promise with the page ref here, but there is the case where more
// pages nodes could link to single a page (see issue 3666 pdf). To
// handle this push it back on the queue so if it is a pages node it
// will be descended into.
nodesToVisit = [kids[pageIndex - currentPageIndex]];
currentPageIndex = pageIndex;
continue;
} else {
for (var last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
}
}
} }
++queueItem.posInKids; promise.reject('Page index ' + pageIndex + ' not found.');
} }
next();
return promise;
},
getPageIndex: function Catalog_getPageIndex(ref) {
// The page tree nodes have the count of all the leaves below them. To get
// how many pages are before we just have to walk up the tree and keep
// adding the count of siblings to the left of the node.
var xref = this.xref;
function pagesBeforeRef(kidRef) {
var total = 0;
var parentRef;
return xref.fetchAsync(kidRef).then(function (node) {
if (!node) {
return null;
}
parentRef = node.getRaw('Parent');
return node.getAsync('Parent');
}).then(function (parent) {
if (!parent) {
return null;
}
return parent.getAsync('Kids');
}).then(function (kids) {
if (!kids) {
return null;
}
var kidPromises = [];
var found = false;
for (var i = 0; i < kids.length; i++) {
var kid = kids[i];
assert(isRef(kid), 'kids must be an ref');
if (kid.num == kidRef.num) {
found = true;
break;
}
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
if (kid.has('Count')) {
var count = kid.get('Count');
total += count;
} else { // page leaf node
total++;
}
}));
}
if (!found) {
error('kid ref not found in parents kids');
}
return Promise.all(kidPromises).then(function () {
return [total, parentRef];
});
});
}
var total = 0;
function next(ref) {
return pagesBeforeRef(ref).then(function (args) {
if (!args) {
return total;
}
var count = args[0];
var parentRef = args[1];
total += count;
return next(parentRef);
});
}
return next(ref);
} }
}; };

View File

@ -18,7 +18,7 @@
MissingPDFException, PasswordException, PDFJS, Promise, MissingPDFException, PasswordException, PDFJS, Promise,
UnknownErrorException, NetworkManager, LocalPdfManager, UnknownErrorException, NetworkManager, LocalPdfManager,
NetworkPdfManager, XRefParseException, NetworkPdfManager, XRefParseException,
isInt, PasswordResponses, MessageHandler */ isInt, PasswordResponses, MessageHandler, Ref */
'use strict'; 'use strict';
@ -193,7 +193,6 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
var onSuccess = function(doc) { var onSuccess = function(doc) {
handler.send('GetDoc', { pdfInfo: doc }); handler.send('GetDoc', { pdfInfo: doc });
pdfManager.ensureModel('traversePages', []).then(null, onFailure);
}; };
var onFailure = function(e) { var onFailure = function(e) {
@ -270,6 +269,13 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
}); });
}); });
handler.on('GetPageIndex', function wphSetupGetPageIndex(data, promise) {
var ref = new Ref(data.ref.num, data.ref.gen);
pdfManager.pdfModel.catalog.getPageIndex(ref).then(function (pageIndex) {
promise.resolve(pageIndex);
}, promise.reject.bind(promise));
});
handler.on('GetDestinations', handler.on('GetDestinations',
function wphSetupGetDestinations(data, promise) { function wphSetupGetDestinations(data, promise) {
pdfManager.ensureCatalog('destinations').then(function(destinations) { pdfManager.ensureCatalog('destinations').then(function(destinations) {

View File

@ -187,6 +187,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
getPage: function PDFDocumentProxy_getPage(number) { getPage: function PDFDocumentProxy_getPage(number) {
return this.transport.getPage(number); return this.transport.getPage(number);
}, },
/**
* @param {object} Must have 'num' and 'gen' properties.
* @return {Promise} A promise that is resolved with the page index that is
* associated with the reference.
*/
getPageIndex: function PDFDocumentProxy_getPageIndex(ref) {
return this.transport.getPageIndex(ref);
},
/** /**
* @return {Promise} A promise that is resolved with a lookup table for * @return {Promise} A promise that is resolved with a lookup table for
* mapping named destinations to reference numbers. * mapping named destinations to reference numbers.
@ -861,6 +869,16 @@ var WorkerTransport = (function WorkerTransportClosure() {
return promise; return promise;
}, },
getPageIndex: function WorkerTransport_getPageIndexByRef(ref) {
var promise = new PDFJS.Promise();
this.messageHandler.send('GetPageIndex', { ref: ref },
function (pageIndex) {
promise.resolve(pageIndex);
}
);
return promise;
},
getAnnotations: function WorkerTransport_getAnnotations(pageIndex) { getAnnotations: function WorkerTransport_getAnnotations(pageIndex) {
this.messageHandler.send('GetAnnotationsRequest', this.messageHandler.send('GetAnnotationsRequest',
{ pageIndex: pageIndex }); { pageIndex: pageIndex });

View File

@ -355,7 +355,7 @@ var MissingDataException = (function MissingDataExceptionClosure() {
function MissingDataException(begin, end) { function MissingDataException(begin, end) {
this.begin = begin; this.begin = begin;
this.end = end; this.end = end;
this.message = 'Missing data [begin, end)'; this.message = 'Missing data [' + begin + ', ' + end + ')';
} }
MissingDataException.prototype = new Error(); MissingDataException.prototype = new Error();
@ -928,7 +928,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
/** /**
* Builds a promise that is resolved when all the passed in promises are * Builds a promise that is resolved when all the passed in promises are
* resolved. * resolved.
* @param {Promise[]} promises Array of promises to wait for. * @param {array} array of data and/or promises to wait for.
* @return {Promise} New dependant promise. * @return {Promise} New dependant promise.
*/ */
Promise.all = function Promise_all(promises) { Promise.all = function Promise_all(promises) {
@ -948,7 +948,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
} }
for (var i = 0, ii = promises.length; i < ii; ++i) { for (var i = 0, ii = promises.length; i < ii; ++i) {
var promise = promises[i]; var promise = promises[i];
promise.then((function(i) { var resolve = (function(i) {
return function(value) { return function(value) {
if (deferred._status === STATUS_REJECTED) { if (deferred._status === STATUS_REJECTED) {
return; return;
@ -958,11 +958,24 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
if (unresolved === 0) if (unresolved === 0)
deferred.resolve(results); deferred.resolve(results);
}; };
})(i), reject); })(i);
if (Promise.isPromise(promise)) {
promise.then(resolve, reject);
} else {
resolve(promise);
}
} }
return deferred; return deferred;
}; };
/**
* Checks if the value is likely a promise (has a 'then' function).
* @return {boolean} true if x is thenable
*/
Promise.isPromise = function Promise_isPromise(value) {
return value && typeof value.then === 'function';
};
Promise.prototype = { Promise.prototype = {
_status: null, _status: null,
_value: null, _value: null,
@ -976,7 +989,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
} }
if (status == STATUS_RESOLVED && if (status == STATUS_RESOLVED &&
value && typeof(value.then) === 'function') { Promise.isPromise(value)) {
value.then(this._updateStatus.bind(this, STATUS_RESOLVED), value.then(this._updateStatus.bind(this, STATUS_RESOLVED),
this._updateStatus.bind(this, STATUS_REJECTED)); this._updateStatus.bind(this, STATUS_REJECTED));
return; return;

View File

@ -0,0 +1,2 @@
http://www.unicode.org/L2/L2006/06334-reph-telugu-gurmukhi.pdf

View File

@ -611,6 +611,15 @@
"link": true, "link": true,
"type": "eq" "type": "eq"
}, },
{ "id": "issue3848",
"file": "pdfs/issue3848.pdf",
"md5": "2498cf0650cc97ceca3e24dfa0425a73",
"rounds": 1,
"lastPage": 1,
"link": true,
"type": "load",
"about": "Document tree with pages and page nodes on the same level."
},
{ "id": "issue1015", { "id": "issue1015",
"file": "pdfs/issue1015.pdf", "file": "pdfs/issue1015.pdf",
"md5": "b61503d1b445742b665212866afb60e2", "md5": "b61503d1b445742b665212866afb60e2",

View File

@ -623,9 +623,9 @@ var PDFView = {
// Update the browsing history. // Update the browsing history.
PDFHistory.push({ dest: dest, hash: destString, page: pageNumber }); PDFHistory.push({ dest: dest, hash: destString, page: pageNumber });
} else { } else {
self.pendingRefStrLoaded = new PDFJS.Promise(); self.pdfDocument.getPageIndex(destRef).then(function (pageIndex) {
self.pendingRefStr = destRef.num + ' ' + destRef.gen + ' R'; var pageNum = pageIndex + 1;
self.pendingRefStrLoaded.then(function() { self.pagesRefMap[destRef.num + ' ' + destRef.gen + ' R'] = pageNum;
goToDestination(destRef); goToDestination(destRef);
}); });
} }
@ -849,32 +849,6 @@ var PDFView = {
PDFView.loadingBar.setWidth(container); PDFView.loadingBar.setWidth(container);
for (var pageNum = 1; pageNum <= pagesCount; ++pageNum) {
var pagePromise = pdfDocument.getPage(pageNum);
pagePromise.then(function(pdfPage) {
var pageNum = pdfPage.pageNumber;
var pageView = pages[pageNum - 1];
if (!pageView.pdfPage) {
// The pdfPage might already be set if we've already entered
// pageView.draw()
pageView.setPdfPage(pdfPage);
}
var thumbnailView = thumbnails[pageNum - 1];
if (!thumbnailView.pdfPage) {
thumbnailView.setPdfPage(pdfPage);
}
var pageRef = pdfPage.ref;
var refStr = pageRef.num + ' ' + pageRef.gen + ' R';
pagesRefMap[refStr] = pdfPage.pageNumber;
if (self.pendingRefStr && self.pendingRefStr === refStr) {
self.pendingRefStrLoaded.resolve();
}
});
pagePromises.push(pagePromise);
}
PDFFindController.firstPagePromise.resolve(); PDFFindController.firstPagePromise.resolve();
PDFJS.Promise.all(pagePromises).then(function(pages) { PDFJS.Promise.all(pagePromises).then(function(pages) {