Merge pull request #8207 from Snuffleupagus/cache-getPageDict

Use a simple `RefSetCache` to significantly improve the performance of `Catalog.getPageDict` for certain long documents (PR 8105 follow-up)
This commit is contained in:
Brendan Dahl 2017-03-30 09:28:11 -07:00 committed by GitHub
commit 72eeb1ccb3

View File

@ -50,6 +50,7 @@ var stringToUTF8String = sharedUtil.stringToUTF8String;
var warn = sharedUtil.warn; var warn = sharedUtil.warn;
var createValidAbsoluteUrl = sharedUtil.createValidAbsoluteUrl; var createValidAbsoluteUrl = sharedUtil.createValidAbsoluteUrl;
var Util = sharedUtil.Util; var Util = sharedUtil.Util;
var Dict = corePrimitives.Dict;
var Ref = corePrimitives.Ref; var Ref = corePrimitives.Ref;
var RefSet = corePrimitives.RefSet; var RefSet = corePrimitives.RefSet;
var RefSetCache = corePrimitives.RefSetCache; var RefSetCache = corePrimitives.RefSetCache;
@ -70,10 +71,11 @@ var Catalog = (function CatalogClosure() {
this.pdfManager = pdfManager; this.pdfManager = pdfManager;
this.xref = xref; this.xref = xref;
this.catDict = xref.getCatalogObj(); this.catDict = xref.getCatalogObj();
this.fontCache = new RefSetCache();
this.builtInCMapCache = Object.create(null);
assert(isDict(this.catDict), 'catalog object is not a dictionary'); assert(isDict(this.catDict), 'catalog object is not a dictionary');
this.fontCache = new RefSetCache();
this.builtInCMapCache = Object.create(null);
this.pageKidsCountCache = new RefSetCache();
// TODO refactor to move getPage() to the PDFDocument. // TODO refactor to move getPage() to the PDFDocument.
this.pageFactory = pageFactory; this.pageFactory = pageFactory;
this.pagePromises = []; this.pagePromises = [];
@ -421,6 +423,8 @@ var Catalog = (function CatalogClosure() {
}, },
cleanup: function Catalog_cleanup() { cleanup: function Catalog_cleanup() {
this.pageKidsCountCache.clear();
var promises = []; var promises = [];
this.fontCache.forEach(function (promise) { this.fontCache.forEach(function (promise) {
promises.push(promise); promises.push(promise);
@ -453,17 +457,30 @@ var Catalog = (function CatalogClosure() {
getPageDict: function Catalog_getPageDict(pageIndex) { getPageDict: function Catalog_getPageDict(pageIndex) {
var capability = createPromiseCapability(); var capability = createPromiseCapability();
var nodesToVisit = [this.catDict.getRaw('Pages')]; var nodesToVisit = [this.catDict.getRaw('Pages')];
var currentPageIndex = 0; var count, currentPageIndex = 0;
var xref = this.xref; var xref = this.xref, pageKidsCountCache = this.pageKidsCountCache;
function next() { function next() {
while (nodesToVisit.length) { while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop(); var currentNode = nodesToVisit.pop();
if (isRef(currentNode)) { if (isRef(currentNode)) {
count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be.
if (count > 0 && currentPageIndex + count < pageIndex) {
currentPageIndex += count;
continue;
}
xref.fetchAsync(currentNode).then(function (obj) { xref.fetchAsync(currentNode).then(function (obj) {
if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) { if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) {
if (pageIndex === currentPageIndex) { if (pageIndex === currentPageIndex) {
// Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree.
if (currentNode && !pageKidsCountCache.has(currentNode)) {
pageKidsCountCache.put(currentNode, 1);
}
capability.resolve([obj, currentNode]); capability.resolve([obj, currentNode]);
} else { } else {
currentPageIndex++; currentPageIndex++;
@ -481,7 +498,13 @@ var Catalog = (function CatalogClosure() {
assert(isDict(currentNode), assert(isDict(currentNode),
'page dictionary kid reference points to wrong type of object'); 'page dictionary kid reference points to wrong type of object');
var count = currentNode.get('Count'); count = currentNode.get('Count');
// Cache the Kids count, since it can reduce redundant lookups in long
// documents where all nodes are found at *one* level of the tree.
var objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be. // Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) { if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count; currentPageIndex += count;
@ -1251,7 +1274,7 @@ var XRef = (function XRefClosure() {
var cacheEntry = this.cache[num]; var cacheEntry = this.cache[num];
// In documents with Object Streams, it's possible that cached `Dict`s // In documents with Object Streams, it's possible that cached `Dict`s
// have not been assigned an `objId` yet (see e.g. issue3115r.pdf). // have not been assigned an `objId` yet (see e.g. issue3115r.pdf).
if (isDict(cacheEntry) && !cacheEntry.objId) { if (cacheEntry instanceof Dict && !cacheEntry.objId) {
cacheEntry.objId = ref.toString(); cacheEntry.objId = ref.toString();
} }
return cacheEntry; return cacheEntry;