diff --git a/src/core/core.js b/src/core/core.js index 69a76f6d8..72aa2f4b4 100644 --- a/src/core/core.js +++ b/src/core/core.js @@ -494,10 +494,6 @@ var PDFDocument = (function PDFDocumentClosure() { return shadow(this, 'fingerprint', fileID); }, - traversePages: function PDFDocument_traversePages() { - this.catalog.traversePages(); - }, - getPage: function PDFDocument_getPage(pageIndex) { return this.catalog.getPage(pageIndex); } diff --git a/src/core/obj.js b/src/core/obj.js index eead3141d..837ced393 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -217,14 +217,7 @@ var Catalog = (function CatalogClosure() { assertWellFormed(isDict(this.catDict), 'catalog object is not a dictionary'); - // Stores state as we traverse the pages catalog so that we can resume - // parsing if an exception is thrown - this.traversePagesQueue = [{ - pagesDict: this.toplevelPagesDict, - posInKids: 0 - }]; this.pagePromises = []; - this.currPageIndex = 0; } Catalog.prototype = { @@ -408,58 +401,146 @@ var Catalog = (function CatalogClosure() { }, getPage: function Catalog_getPage(pageIndex) { - if (pageIndex < 0 || pageIndex >= this.numPages || - (pageIndex|0) !== pageIndex) { - var pagePromise = new Promise(); - pagePromise.reject(new Error('Invalid page index')); - return pagePromise; - } if (!(pageIndex in this.pagePromises)) { - this.pagePromises[pageIndex] = new Promise(); + this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then( + function (a) { + var dict = a[0]; + var ref = a[1]; + return new Page(this.pdfManager, this.xref, pageIndex, dict, ref); + }.bind(this) + ); } return this.pagePromises[pageIndex]; }, - // Traverses pages in DFS order so that pages are processed in increasing - // order - traversePages: function Catalog_traversePages() { - var queue = this.traversePagesQueue; - while (queue.length) { - var queueItem = queue[queue.length - 1]; - var pagesDict = queueItem.pagesDict; + getPageDict: function Catalog_getPageDict(pageIndex) { + var promise = new Promise(); + var nodesToVisit = [this.catDict.getRaw('Pages')]; + var currentPageIndex = 0; + var xref = this.xref; - var kids = pagesDict.get('Kids'); - assert(isArray(kids), 'page dictionary kids object is not an array'); - if (queueItem.posInKids >= kids.length) { - queue.pop(); - continue; - } - var kidRef = kids[queueItem.posInKids]; - assert(isRef(kidRef), 'page dictionary kid is not a reference'); + function next() { + while (nodesToVisit.length) { + var currentNode = nodesToVisit.pop(); - var kid = this.xref.fetch(kidRef); - if (isDict(kid, 'Page') || (isDict(kid) && !kid.has('Kids'))) { - var pageIndex = this.currPageIndex++; - var page = new Page(this.pdfManager, this.xref, pageIndex, kid, - kidRef); - if (!(pageIndex in this.pagePromises)) { - this.pagePromises[pageIndex] = new Promise(); + if (isRef(currentNode)) { + xref.fetchAsync(currentNode).then(function (obj) { + if ((isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids')))) { + if (pageIndex === currentPageIndex) { + promise.resolve([obj, currentNode]); + } else { + currentPageIndex++; + next(); + } + return; + } + nodesToVisit.push(obj); + next(); + }.bind(this), promise.reject.bind(promise)); + return; } - this.pagePromises[pageIndex].resolve(page); - } else { // must be a child page dictionary + // must be a child page dictionary assert( - isDict(kid), + isDict(currentNode), 'page dictionary kid reference points to wrong type of object' ); + var count = currentNode.get('Count'); + // Skip nodes where the page can't be. + if (currentPageIndex + count <= pageIndex) { + currentPageIndex += count; + continue; + } - queue.push({ - pagesDict: kid, - posInKids: 0 - }); + var kids = currentNode.get('Kids'); + assert(isArray(kids), 'page dictionary kids object is not an array'); + if (count === kids.length) { + // Nodes that don't have the page have been skipped and this is the + // bottom of the tree which means the page requested must be a + // descendant of this pages node. Ideally we would just resolve the + // promise with the page ref here, but there is the case where more + // pages nodes could link to single a page (see issue 3666 pdf). To + // handle this push it back on the queue so if it is a pages node it + // will be descended into. + nodesToVisit = [kids[pageIndex - currentPageIndex]]; + currentPageIndex = pageIndex; + continue; + } else { + for (var last = kids.length - 1; last >= 0; last--) { + nodesToVisit.push(kids[last]); + } + } } - ++queueItem.posInKids; + promise.reject('Page index ' + pageIndex + ' not found.'); } + next(); + return promise; + }, + + getPageIndex: function Catalog_getPageIndex(ref) { + // The page tree nodes have the count of all the leaves below them. To get + // how many pages are before we just have to walk up the tree and keep + // adding the count of siblings to the left of the node. + var xref = this.xref; + function pagesBeforeRef(kidRef) { + var total = 0; + var parentRef; + return xref.fetchAsync(kidRef).then(function (node) { + if (!node) { + return null; + } + parentRef = node.getRaw('Parent'); + return node.getAsync('Parent'); + }).then(function (parent) { + if (!parent) { + return null; + } + return parent.getAsync('Kids'); + }).then(function (kids) { + if (!kids) { + return null; + } + var kidPromises = []; + var found = false; + for (var i = 0; i < kids.length; i++) { + var kid = kids[i]; + assert(isRef(kid), 'kids must be an ref'); + if (kid.num == kidRef.num) { + found = true; + break; + } + kidPromises.push(xref.fetchAsync(kid).then(function (kid) { + if (kid.has('Count')) { + var count = kid.get('Count'); + total += count; + } else { // page leaf node + total++; + } + })); + } + if (!found) { + error('kid ref not found in parents kids'); + } + return Promise.all(kidPromises).then(function () { + return [total, parentRef]; + }); + }); + } + + var total = 0; + function next(ref) { + return pagesBeforeRef(ref).then(function (args) { + if (!args) { + return total; + } + var count = args[0]; + var parentRef = args[1]; + total += count; + return next(parentRef); + }); + } + + return next(ref); } }; diff --git a/src/core/worker.js b/src/core/worker.js index d43ef327d..9d93fc7d7 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -18,7 +18,7 @@ MissingPDFException, PasswordException, PDFJS, Promise, UnknownErrorException, NetworkManager, LocalPdfManager, NetworkPdfManager, XRefParseException, - isInt, PasswordResponses, MessageHandler */ + isInt, PasswordResponses, MessageHandler, Ref */ 'use strict'; @@ -193,7 +193,6 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { var onSuccess = function(doc) { handler.send('GetDoc', { pdfInfo: doc }); - pdfManager.ensureModel('traversePages', []).then(null, onFailure); }; var onFailure = function(e) { @@ -270,6 +269,13 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { }); }); + handler.on('GetPageIndex', function wphSetupGetPageIndex(data, promise) { + var ref = new Ref(data.ref.num, data.ref.gen); + pdfManager.pdfModel.catalog.getPageIndex(ref).then(function (pageIndex) { + promise.resolve(pageIndex); + }, promise.reject.bind(promise)); + }); + handler.on('GetDestinations', function wphSetupGetDestinations(data, promise) { pdfManager.ensureCatalog('destinations').then(function(destinations) { diff --git a/src/display/api.js b/src/display/api.js index 2ad762dd1..28e0a8480 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -187,6 +187,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() { getPage: function PDFDocumentProxy_getPage(number) { return this.transport.getPage(number); }, + /** + * @param {object} Must have 'num' and 'gen' properties. + * @return {Promise} A promise that is resolved with the page index that is + * associated with the reference. + */ + getPageIndex: function PDFDocumentProxy_getPageIndex(ref) { + return this.transport.getPageIndex(ref); + }, /** * @return {Promise} A promise that is resolved with a lookup table for * mapping named destinations to reference numbers. @@ -861,6 +869,16 @@ var WorkerTransport = (function WorkerTransportClosure() { return promise; }, + getPageIndex: function WorkerTransport_getPageIndexByRef(ref) { + var promise = new PDFJS.Promise(); + this.messageHandler.send('GetPageIndex', { ref: ref }, + function (pageIndex) { + promise.resolve(pageIndex); + } + ); + return promise; + }, + getAnnotations: function WorkerTransport_getAnnotations(pageIndex) { this.messageHandler.send('GetAnnotationsRequest', { pageIndex: pageIndex }); diff --git a/src/shared/util.js b/src/shared/util.js index cccd284d2..6400c5d7a 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -355,7 +355,7 @@ var MissingDataException = (function MissingDataExceptionClosure() { function MissingDataException(begin, end) { this.begin = begin; this.end = end; - this.message = 'Missing data [begin, end)'; + this.message = 'Missing data [' + begin + ', ' + end + ')'; } MissingDataException.prototype = new Error(); @@ -928,7 +928,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() { /** * Builds a promise that is resolved when all the passed in promises are * resolved. - * @param {Promise[]} promises Array of promises to wait for. + * @param {array} array of data and/or promises to wait for. * @return {Promise} New dependant promise. */ Promise.all = function Promise_all(promises) { @@ -948,7 +948,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() { } for (var i = 0, ii = promises.length; i < ii; ++i) { var promise = promises[i]; - promise.then((function(i) { + var resolve = (function(i) { return function(value) { if (deferred._status === STATUS_REJECTED) { return; @@ -958,11 +958,24 @@ var Promise = PDFJS.Promise = (function PromiseClosure() { if (unresolved === 0) deferred.resolve(results); }; - })(i), reject); + })(i); + if (Promise.isPromise(promise)) { + promise.then(resolve, reject); + } else { + resolve(promise); + } } return deferred; }; + /** + * Checks if the value is likely a promise (has a 'then' function). + * @return {boolean} true if x is thenable + */ + Promise.isPromise = function Promise_isPromise(value) { + return value && typeof value.then === 'function'; + }; + Promise.prototype = { _status: null, _value: null, @@ -976,7 +989,7 @@ var Promise = PDFJS.Promise = (function PromiseClosure() { } if (status == STATUS_RESOLVED && - value && typeof(value.then) === 'function') { + Promise.isPromise(value)) { value.then(this._updateStatus.bind(this, STATUS_RESOLVED), this._updateStatus.bind(this, STATUS_REJECTED)); return; diff --git a/test/pdfs/issue3848.pdf.link b/test/pdfs/issue3848.pdf.link new file mode 100644 index 000000000..a56b2418b --- /dev/null +++ b/test/pdfs/issue3848.pdf.link @@ -0,0 +1,2 @@ +http://www.unicode.org/L2/L2006/06334-reph-telugu-gurmukhi.pdf + diff --git a/test/test_manifest.json b/test/test_manifest.json index 82ada5c10..7003737da 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -611,6 +611,15 @@ "link": true, "type": "eq" }, + { "id": "issue3848", + "file": "pdfs/issue3848.pdf", + "md5": "2498cf0650cc97ceca3e24dfa0425a73", + "rounds": 1, + "lastPage": 1, + "link": true, + "type": "load", + "about": "Document tree with pages and page nodes on the same level." + }, { "id": "issue1015", "file": "pdfs/issue1015.pdf", "md5": "b61503d1b445742b665212866afb60e2", diff --git a/web/viewer.js b/web/viewer.js index 701bf67d2..267da1b89 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -623,9 +623,9 @@ var PDFView = { // Update the browsing history. PDFHistory.push({ dest: dest, hash: destString, page: pageNumber }); } else { - self.pendingRefStrLoaded = new PDFJS.Promise(); - self.pendingRefStr = destRef.num + ' ' + destRef.gen + ' R'; - self.pendingRefStrLoaded.then(function() { + self.pdfDocument.getPageIndex(destRef).then(function (pageIndex) { + var pageNum = pageIndex + 1; + self.pagesRefMap[destRef.num + ' ' + destRef.gen + ' R'] = pageNum; goToDestination(destRef); }); } @@ -849,32 +849,6 @@ var PDFView = { PDFView.loadingBar.setWidth(container); - for (var pageNum = 1; pageNum <= pagesCount; ++pageNum) { - var pagePromise = pdfDocument.getPage(pageNum); - pagePromise.then(function(pdfPage) { - var pageNum = pdfPage.pageNumber; - var pageView = pages[pageNum - 1]; - if (!pageView.pdfPage) { - // The pdfPage might already be set if we've already entered - // pageView.draw() - pageView.setPdfPage(pdfPage); - } - var thumbnailView = thumbnails[pageNum - 1]; - if (!thumbnailView.pdfPage) { - thumbnailView.setPdfPage(pdfPage); - } - - var pageRef = pdfPage.ref; - var refStr = pageRef.num + ' ' + pageRef.gen + ' R'; - pagesRefMap[refStr] = pdfPage.pageNumber; - - if (self.pendingRefStr && self.pendingRefStr === refStr) { - self.pendingRefStrLoaded.resolve(); - } - }); - pagePromises.push(pagePromise); - } - PDFFindController.firstPagePromise.resolve(); PDFJS.Promise.all(pagePromises).then(function(pages) {