Merge pull request #14400 from Snuffleupagus/getPageDict-async

[api-minor] Convert `Catalog.getPageDict` to an asynchronous method
This commit is contained in:
Tim van der Meij 2021-12-28 19:40:34 +01:00 committed by GitHub
commit e42d54e1b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 102 additions and 120 deletions

View File

@ -34,7 +34,6 @@ import {
XRefEntryException, XRefEntryException,
} from "./core_utils.js"; } from "./core_utils.js";
import { import {
createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
DocumentActionEventType, DocumentActionEventType,
FormatError, FormatError,
@ -1091,8 +1090,7 @@ class Catalog {
}); });
} }
getPageDict(pageIndex) { async getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.toplevelPagesDict]; const nodesToVisit = [this.toplevelPagesDict];
const visitedNodes = new RefSet(); const visitedNodes = new RefSet();
@ -1104,130 +1102,105 @@ class Catalog {
pageKidsCountCache = this.pageKidsCountCache; pageKidsCountCache = this.pageKidsCountCache;
let currentPageIndex = 0; let currentPageIndex = 0;
function next() { while (nodesToVisit.length) {
while (nodesToVisit.length) { const currentNode = nodesToVisit.pop();
const currentNode = nodesToVisit.pop();
if (currentNode instanceof Ref) { if (currentNode instanceof Ref) {
const count = pageKidsCountCache.get(currentNode); const count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be. // Skip nodes where the page can't be.
if (count >= 0 && currentPageIndex + count <= pageIndex) { if (count >= 0 && currentPageIndex + count <= pageIndex) {
currentPageIndex += count; currentPageIndex += count;
continue; continue;
}
// Prevent circular references in the /Pages tree.
if (visitedNodes.has(currentNode)) {
throw new FormatError("Pages tree contains circular reference.");
}
visitedNodes.put(currentNode);
const obj = await xref.fetchAsync(currentNode);
if (obj instanceof Dict) {
let type = obj.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
} }
// Prevent circular references in the /Pages tree. if (isName(type, "Page") || !obj.has("Kids")) {
if (visitedNodes.has(currentNode)) { // Cache the Page reference, since it can *greatly* improve
capability.reject( // performance by reducing redundant lookups in long documents
new FormatError("Pages tree contains circular reference.") // where all nodes are found at *one* level of the tree.
); if (currentNode && !pageKidsCountCache.has(currentNode)) {
return; pageKidsCountCache.put(currentNode, 1);
}
visitedNodes.put(currentNode);
xref.fetchAsync(currentNode).then(function (obj) {
if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) {
// Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree.
if (currentNode && !pageKidsCountCache.has(currentNode)) {
pageKidsCountCache.put(currentNode, 1);
}
if (pageIndex === currentPageIndex) {
capability.resolve([obj, currentNode]);
} else {
currentPageIndex++;
next();
}
return;
} }
nodesToVisit.push(obj);
next();
}, capability.reject);
return;
}
// Must be a child page dictionary.
if (!(currentNode instanceof Dict)) {
capability.reject(
new FormatError(
"Page dictionary kid reference points to wrong type of object."
)
);
return;
}
let count;
try {
count = currentNode.get("Count");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
const objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
}
let kids;
try {
kids = currentNode.get("Kids");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
let type;
try {
type = currentNode.get("Type");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (
isName(type, "Page") ||
(!currentNode.has("Type") && currentNode.has("Contents"))
) {
if (currentPageIndex === pageIndex) { if (currentPageIndex === pageIndex) {
capability.resolve([currentNode, null]); return [obj, currentNode];
return;
} }
currentPageIndex++; currentPageIndex++;
continue; continue;
} }
}
nodesToVisit.push(obj);
continue;
}
capability.reject( // Must be a child page dictionary.
new FormatError("Page dictionary kids object is not an array.") if (!(currentNode instanceof Dict)) {
); throw new FormatError(
return; "Page dictionary kid reference points to wrong type of object."
);
}
const { objId } = currentNode;
let count = currentNode.getRaw("Count");
if (count instanceof Ref) {
count = await xref.fetchAsync(count);
}
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
} }
// Always check all `Kids` nodes, to avoid getting stuck in an empty // Skip nodes where the page can't be.
// node further down in the tree (see issue5644.pdf, issue8088.pdf), if (currentPageIndex + count <= pageIndex) {
// and to ensure that we actually find the correct `Page` dict. currentPageIndex += count;
for (let last = kids.length - 1; last >= 0; last--) { continue;
nodesToVisit.push(kids[last]);
} }
} }
capability.reject(new Error(`Page index ${pageIndex} not found.`));
let kids = currentNode.getRaw("Kids");
if (kids instanceof Ref) {
kids = await xref.fetchAsync(kids);
}
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
let type = currentNode.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || !currentNode.has("Kids")) {
if (currentPageIndex === pageIndex) {
return [currentNode, null];
}
currentPageIndex++;
continue;
}
throw new FormatError("Page dictionary kids object is not an array.");
}
// Always check all `Kids` nodes, to avoid getting stuck in an empty
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
// and to ensure that we actually find the correct `Page` dict.
for (let last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
}
} }
next();
return capability.promise; throw new Error(`Page index ${pageIndex} not found.`);
} }
/** /**
@ -1319,7 +1292,20 @@ class Catalog {
break; break;
} }
if (isDict(obj, "Page") || !obj.has("Kids")) { let type;
try {
type = obj.get("Type");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex;
}
addPageError(ex);
break;
}
if (isName(type, "Page") || !obj.has("Kids")) {
addPageDict(obj, kidObj instanceof Ref ? kidObj : null); addPageDict(obj, kidObj instanceof Ref ? kidObj : null);
} else { } else {
queue.push({ currentNode: obj, posInKids: 0 }); queue.push({ currentNode: obj, posInKids: 0 });

View File

@ -622,9 +622,7 @@ describe("api", function () {
expect(false).toEqual(true); expect(false).toEqual(true);
} catch (reason) { } catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual( expect(reason.message).toEqual("Illegal character: 41");
"Page dictionary kids object is not an array."
);
} }
try { try {
await pdfDocument2.getPage(1); await pdfDocument2.getPage(1);
@ -633,9 +631,7 @@ describe("api", function () {
expect(false).toEqual(true); expect(false).toEqual(true);
} catch (reason) { } catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual( expect(reason.message).toEqual("End of file inside array.");
"Page dictionary kids object is not an array."
);
} }
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);

View File

@ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions";
const PagesCountLimit = { const PagesCountLimit = {
FORCE_SCROLL_MODE_PAGE: 15000, FORCE_SCROLL_MODE_PAGE: 15000,
FORCE_LAZY_PAGE_INIT: 7500, FORCE_LAZY_PAGE_INIT: 7500,
PAUSE_EAGER_PAGE_INIT: 500, PAUSE_EAGER_PAGE_INIT: 250,
}; };
/** /**