Don't traverse all pages to get a single page.

This commit is contained in:
Brendan Dahl 2013-11-13 15:27:46 -08:00
parent 516d2e79be
commit c2d65fc4ab
8 changed files with 183 additions and 84 deletions

View File

@ -494,10 +494,6 @@ var PDFDocument = (function PDFDocumentClosure() {
return shadow(this, 'fingerprint', fileID);
},
traversePages: function PDFDocument_traversePages() {
this.catalog.traversePages();
},
getPage: function PDFDocument_getPage(pageIndex) {
return this.catalog.getPage(pageIndex);
}

View File

@ -217,14 +217,7 @@ var Catalog = (function CatalogClosure() {
assertWellFormed(isDict(this.catDict),
'catalog object is not a dictionary');
// Stores state as we traverse the pages catalog so that we can resume
// parsing if an exception is thrown
this.traversePagesQueue = [{
pagesDict: this.toplevelPagesDict,
posInKids: 0
}];
this.pagePromises = [];
this.currPageIndex = 0;
}
Catalog.prototype = {
@ -408,58 +401,146 @@ var Catalog = (function CatalogClosure() {
},
getPage: function Catalog_getPage(pageIndex) {
if (pageIndex < 0 || pageIndex >= this.numPages ||
(pageIndex|0) !== pageIndex) {
var pagePromise = new Promise();
pagePromise.reject(new Error('Invalid page index'));
return pagePromise;
}
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new Promise();
this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then(
function (a) {
var dict = a[0];
var ref = a[1];
return new Page(this.pdfManager, this.xref, pageIndex, dict, ref);
}.bind(this)
);
}
return this.pagePromises[pageIndex];
},
// Traverses pages in DFS order so that pages are processed in increasing
// order
traversePages: function Catalog_traversePages() {
var queue = this.traversePagesQueue;
while (queue.length) {
var queueItem = queue[queue.length - 1];
var pagesDict = queueItem.pagesDict;
getPageDict: function Catalog_getPageDict(pageIndex) {
var promise = new Promise();
var nodesToVisit = [this.catDict.getRaw('Pages')];
var currentPageIndex = 0;
var xref = this.xref;
var kids = pagesDict.get('Kids');
assert(isArray(kids), 'page dictionary kids object is not an array');
if (queueItem.posInKids >= kids.length) {
queue.pop();
continue;
}
var kidRef = kids[queueItem.posInKids];
assert(isRef(kidRef), 'page dictionary kid is not a reference');
function next() {
while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop();
var kid = this.xref.fetch(kidRef);
if (isDict(kid, 'Page') || (isDict(kid) && !kid.has('Kids'))) {
var pageIndex = this.currPageIndex++;
var page = new Page(this.pdfManager, this.xref, pageIndex, kid,
kidRef);
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = new Promise();
if (isRef(currentNode)) {
xref.fetchAsync(currentNode).then(function (obj) {
if ((isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids')))) {
if (pageIndex === currentPageIndex) {
promise.resolve([obj, currentNode]);
} else {
currentPageIndex++;
next();
}
return;
}
nodesToVisit.push(obj);
next();
}.bind(this), promise.reject.bind(promise));
return;
}
this.pagePromises[pageIndex].resolve(page);
} else { // must be a child page dictionary
// must be a child page dictionary
assert(
isDict(kid),
isDict(currentNode),
'page dictionary kid reference points to wrong type of object'
);
var count = currentNode.get('Count');
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
queue.push({
pagesDict: kid,
posInKids: 0
});
var kids = currentNode.get('Kids');
assert(isArray(kids), 'page dictionary kids object is not an array');
if (count === kids.length) {
// Nodes that don't have the page have been skipped and this is the
// bottom of the tree which means the page requested must be a
// descendant of this pages node. Ideally we would just resolve the
// promise with the page ref here, but there is the case where more
// pages nodes could link to single a page (see issue 3666 pdf). To
// handle this push it back on the queue so if it is a pages node it
// will be descended into.
nodesToVisit = [kids[pageIndex - currentPageIndex]];
currentPageIndex = pageIndex;
continue;
} else {
for (var last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
}
}
}
++queueItem.posInKids;
promise.reject('Page index ' + pageIndex + ' not found.');
}
next();
return promise;
},
getPageIndex: function Catalog_getPageIndex(ref) {
// The page tree nodes have the count of all the leaves below them. To get
// how many pages are before we just have to walk up the tree and keep
// adding the count of siblings to the left of the node.
var xref = this.xref;
function pagesBeforeRef(kidRef) {
var total = 0;
var parentRef;
return xref.fetchAsync(kidRef).then(function (node) {
if (!node) {
return null;
}
parentRef = node.getRaw('Parent');
return node.getAsync('Parent');
}).then(function (parent) {
if (!parent) {
return null;
}
return parent.getAsync('Kids');
}).then(function (kids) {
if (!kids) {
return null;
}
var kidPromises = [];
var found = false;
for (var i = 0; i < kids.length; i++) {
var kid = kids[i];
assert(isRef(kid), 'kids must be an ref');
if (kid.num == kidRef.num) {
found = true;
break;
}
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
if (kid.has('Count')) {
var count = kid.get('Count');
total += count;
} else { // page leaf node
total++;
}
}));
}
if (!found) {
error('kid ref not found in parents kids');
}
return Promise.all(kidPromises).then(function () {
return [total, parentRef];
});
});
}
var total = 0;
function next(ref) {
return pagesBeforeRef(ref).then(function (args) {
if (!args) {
return total;
}
var count = args[0];
var parentRef = args[1];
total += count;
return next(parentRef);
});
}
return next(ref);
}
};

View File

@ -18,7 +18,7 @@
MissingPDFException, PasswordException, PDFJS, Promise,
UnknownErrorException, NetworkManager, LocalPdfManager,
NetworkPdfManager, XRefParseException,
isInt, PasswordResponses, MessageHandler */
isInt, PasswordResponses, MessageHandler, Ref */
'use strict';
@ -193,7 +193,6 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
var onSuccess = function(doc) {
handler.send('GetDoc', { pdfInfo: doc });
pdfManager.ensureModel('traversePages', []).then(null, onFailure);
};
var onFailure = function(e) {
@ -270,6 +269,13 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
});
});
handler.on('GetPageIndex', function wphSetupGetPageIndex(data, promise) {
var ref = new Ref(data.ref.num, data.ref.gen);
pdfManager.pdfModel.catalog.getPageIndex(ref).then(function (pageIndex) {
promise.resolve(pageIndex);
}, promise.reject.bind(promise));
});
handler.on('GetDestinations',
function wphSetupGetDestinations(data, promise) {
pdfManager.ensureCatalog('destinations').then(function(destinations) {

View File

@ -187,6 +187,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
getPage: function PDFDocumentProxy_getPage(number) {
return this.transport.getPage(number);
},
/**
* @param {object} Must have 'num' and 'gen' properties.
* @return {Promise} A promise that is resolved with the page index that is
* associated with the reference.
*/
getPageIndex: function PDFDocumentProxy_getPageIndex(ref) {
return this.transport.getPageIndex(ref);
},
/**
* @return {Promise} A promise that is resolved with a lookup table for
* mapping named destinations to reference numbers.
@ -861,6 +869,16 @@ var WorkerTransport = (function WorkerTransportClosure() {
return promise;
},
getPageIndex: function WorkerTransport_getPageIndexByRef(ref) {
var promise = new PDFJS.Promise();
this.messageHandler.send('GetPageIndex', { ref: ref },
function (pageIndex) {
promise.resolve(pageIndex);
}
);
return promise;
},
getAnnotations: function WorkerTransport_getAnnotations(pageIndex) {
this.messageHandler.send('GetAnnotationsRequest',
{ pageIndex: pageIndex });

View File

@ -355,7 +355,7 @@ var MissingDataException = (function MissingDataExceptionClosure() {
function MissingDataException(begin, end) {
this.begin = begin;
this.end = end;
this.message = 'Missing data [begin, end)';
this.message = 'Missing data [' + begin + ', ' + end + ')';
}
MissingDataException.prototype = new Error();
@ -928,7 +928,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
/**
* Builds a promise that is resolved when all the passed in promises are
* resolved.
* @param {Promise[]} promises Array of promises to wait for.
* @param {array} array of data and/or promises to wait for.
* @return {Promise} New dependant promise.
*/
Promise.all = function Promise_all(promises) {
@ -948,7 +948,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
}
for (var i = 0, ii = promises.length; i < ii; ++i) {
var promise = promises[i];
promise.then((function(i) {
var resolve = (function(i) {
return function(value) {
if (deferred._status === STATUS_REJECTED) {
return;
@ -958,11 +958,24 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
if (unresolved === 0)
deferred.resolve(results);
};
})(i), reject);
})(i);
if (Promise.isPromise(promise)) {
promise.then(resolve, reject);
} else {
resolve(promise);
}
}
return deferred;
};
/**
* Checks if the value is likely a promise (has a 'then' function).
* @return {boolean} true if x is thenable
*/
Promise.isPromise = function Promise_isPromise(value) {
return value && typeof value.then === 'function';
};
Promise.prototype = {
_status: null,
_value: null,
@ -976,7 +989,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() {
}
if (status == STATUS_RESOLVED &&
value && typeof(value.then) === 'function') {
Promise.isPromise(value)) {
value.then(this._updateStatus.bind(this, STATUS_RESOLVED),
this._updateStatus.bind(this, STATUS_REJECTED));
return;

View File

@ -0,0 +1,2 @@
http://www.unicode.org/L2/L2006/06334-reph-telugu-gurmukhi.pdf

View File

@ -611,6 +611,15 @@
"link": true,
"type": "eq"
},
{ "id": "issue3848",
"file": "pdfs/issue3848.pdf",
"md5": "2498cf0650cc97ceca3e24dfa0425a73",
"rounds": 1,
"lastPage": 1,
"link": true,
"type": "load",
"about": "Document tree with pages and page nodes on the same level."
},
{ "id": "issue1015",
"file": "pdfs/issue1015.pdf",
"md5": "b61503d1b445742b665212866afb60e2",

View File

@ -623,9 +623,9 @@ var PDFView = {
// Update the browsing history.
PDFHistory.push({ dest: dest, hash: destString, page: pageNumber });
} else {
self.pendingRefStrLoaded = new PDFJS.Promise();
self.pendingRefStr = destRef.num + ' ' + destRef.gen + ' R';
self.pendingRefStrLoaded.then(function() {
self.pdfDocument.getPageIndex(destRef).then(function (pageIndex) {
var pageNum = pageIndex + 1;
self.pagesRefMap[destRef.num + ' ' + destRef.gen + ' R'] = pageNum;
goToDestination(destRef);
});
}
@ -849,32 +849,6 @@ var PDFView = {
PDFView.loadingBar.setWidth(container);
for (var pageNum = 1; pageNum <= pagesCount; ++pageNum) {
var pagePromise = pdfDocument.getPage(pageNum);
pagePromise.then(function(pdfPage) {
var pageNum = pdfPage.pageNumber;
var pageView = pages[pageNum - 1];
if (!pageView.pdfPage) {
// The pdfPage might already be set if we've already entered
// pageView.draw()
pageView.setPdfPage(pdfPage);
}
var thumbnailView = thumbnails[pageNum - 1];
if (!thumbnailView.pdfPage) {
thumbnailView.setPdfPage(pdfPage);
}
var pageRef = pdfPage.ref;
var refStr = pageRef.num + ' ' + pageRef.gen + ' R';
pagesRefMap[refStr] = pdfPage.pageNumber;
if (self.pendingRefStr && self.pendingRefStr === refStr) {
self.pendingRefStrLoaded.resolve();
}
});
pagePromises.push(pagePromise);
}
PDFFindController.firstPagePromise.resolve();
PDFJS.Promise.all(pagePromises).then(function(pages) {