[api-minor] Convert Catalog.getPageDict to an asynchronous method

Besides converting `Catalog.getPageDict` to an `async` method, thus simplifying the code, this patch also allows us to pro-actively fix a existing issue.
Note how we're looking up References in such a way that `MissingDataException`s won't cause trouble, however it's *technically possible* that the entries (i.e. /Count, /Kids, and /Type) in a /Pages Dictionary could actually be indirect objects as well. In the existing code this could lead to *some*, or even all, pages failing to load/render as intended.
In practice that doesn't *appear* to happen in real-world PDF documents, but given all the weird things that PDF software do I'd prefer to fix this pro-actively (rather than waiting for a bug report).
With `Catalog.getPageDict` being `async` this is now really simple to address, however I didn't want to introduce a bunch more *unconditional* asynchronicity in this method if it could be avoided (since that could slow things down). Hence we'll *synchronously* lookup the *raw* data in a /Pages Dictionary, and only fallback to asynchronous data lookup when a Reference was encountered.

In addition to the above, this patch also makes the following notable changes:
 - Let `Catalog.getPageDict` *consistently* reject with the actual error, regardless of what data we're fetching. Previously we'd "swallow" the actual errors except when looking up Dictionary entries, which is inconsistent and thus seem unfortunate. As can be seen from the updated unit-tests this change is API-observable, hence why the patch is tagged `[api-minor]`.

 - Improve the consistency of the Dictionary /Type-checks in both the `Catalog.getPageDict` and `Catalog.getAllPageDicts` methods.
   In `Catalog.getPageDict` there's a fallback code-path where we're *incorrectly* checking the /Page Dictionary for a /Contents-entry, which is wrong since a /Page Dictionary doesn't need to have a /Contents-entry in order to be valid.
   For consistency the `Catalog.getAllPageDicts` method is also updated to handle errors in the /Type-lookup correctly.

 - Reduce the `PagesCountLimit.PAUSE_EAGER_PAGE_INIT` viewer constant, to further improve loading/rendering performance of the *second* page during initialization of very long documents; PR 14359 follow-up.
This commit is contained in:
Jonas Jenwald 2021-12-24 13:46:35 +01:00
parent 41dab8e7b6
commit b513c64d9d
3 changed files with 102 additions and 120 deletions

View File

@ -34,7 +34,6 @@ import {
XRefEntryException, XRefEntryException,
} from "./core_utils.js"; } from "./core_utils.js";
import { import {
createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
DocumentActionEventType, DocumentActionEventType,
FormatError, FormatError,
@ -1091,8 +1090,7 @@ class Catalog {
}); });
} }
getPageDict(pageIndex) { async getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.toplevelPagesDict]; const nodesToVisit = [this.toplevelPagesDict];
const visitedNodes = new RefSet(); const visitedNodes = new RefSet();
@ -1104,7 +1102,6 @@ class Catalog {
pageKidsCountCache = this.pageKidsCountCache; pageKidsCountCache = this.pageKidsCountCache;
let currentPageIndex = 0; let currentPageIndex = 0;
function next() {
while (nodesToVisit.length) { while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop(); const currentNode = nodesToVisit.pop();
@ -1117,15 +1114,17 @@ class Catalog {
} }
// Prevent circular references in the /Pages tree. // Prevent circular references in the /Pages tree.
if (visitedNodes.has(currentNode)) { if (visitedNodes.has(currentNode)) {
capability.reject( throw new FormatError("Pages tree contains circular reference.");
new FormatError("Pages tree contains circular reference.")
);
return;
} }
visitedNodes.put(currentNode); visitedNodes.put(currentNode);
xref.fetchAsync(currentNode).then(function (obj) { const obj = await xref.fetchAsync(currentNode);
if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) { if (obj instanceof Dict) {
let type = obj.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || !obj.has("Kids")) {
// Cache the Page reference, since it can *greatly* improve // Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents // performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree. // where all nodes are found at *one* level of the tree.
@ -1133,45 +1132,36 @@ class Catalog {
pageKidsCountCache.put(currentNode, 1); pageKidsCountCache.put(currentNode, 1);
} }
if (pageIndex === currentPageIndex) { if (currentPageIndex === pageIndex) {
capability.resolve([obj, currentNode]); return [obj, currentNode];
} else { }
currentPageIndex++; currentPageIndex++;
next(); continue;
} }
return;
} }
nodesToVisit.push(obj); nodesToVisit.push(obj);
next(); continue;
}, capability.reject);
return;
} }
// Must be a child page dictionary. // Must be a child page dictionary.
if (!(currentNode instanceof Dict)) { if (!(currentNode instanceof Dict)) {
capability.reject( throw new FormatError(
new FormatError(
"Page dictionary kid reference points to wrong type of object." "Page dictionary kid reference points to wrong type of object."
)
); );
return;
} }
const { objId } = currentNode;
let count; let count = currentNode.getRaw("Count");
try { if (count instanceof Ref) {
count = currentNode.get("Count"); count = await xref.fetchAsync(count);
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
} }
if (Number.isInteger(count) && count >= 0) { if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in // Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree. // documents where all nodes are found at *one* level of the tree.
const objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) { if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count); pageKidsCountCache.put(objId, count);
} }
// Skip nodes where the page can't be. // Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) { if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count; currentPageIndex += count;
@ -1179,42 +1169,27 @@ class Catalog {
} }
} }
let kids; let kids = currentNode.getRaw("Kids");
try { if (kids instanceof Ref) {
kids = currentNode.get("Kids"); kids = await xref.fetchAsync(kids);
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
} }
if (!Array.isArray(kids)) { if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the // Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids // specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf). // array, rather than using indirect objects (fixes issue9540.pdf).
let type; let type = currentNode.getRaw("Type");
try { if (type instanceof Ref) {
type = currentNode.get("Type"); type = await xref.fetchAsync(type);
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
} }
} if (isName(type, "Page") || !currentNode.has("Kids")) {
if (
isName(type, "Page") ||
(!currentNode.has("Type") && currentNode.has("Contents"))
) {
if (currentPageIndex === pageIndex) { if (currentPageIndex === pageIndex) {
capability.resolve([currentNode, null]); return [currentNode, null];
return;
} }
currentPageIndex++; currentPageIndex++;
continue; continue;
} }
capability.reject( throw new FormatError("Page dictionary kids object is not an array.");
new FormatError("Page dictionary kids object is not an array.")
);
return;
} }
// Always check all `Kids` nodes, to avoid getting stuck in an empty // Always check all `Kids` nodes, to avoid getting stuck in an empty
@ -1224,10 +1199,8 @@ class Catalog {
nodesToVisit.push(kids[last]); nodesToVisit.push(kids[last]);
} }
} }
capability.reject(new Error(`Page index ${pageIndex} not found.`));
} throw new Error(`Page index ${pageIndex} not found.`);
next();
return capability.promise;
} }
/** /**
@ -1319,7 +1292,20 @@ class Catalog {
break; break;
} }
if (isDict(obj, "Page") || !obj.has("Kids")) { let type;
try {
type = obj.get("Type");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
if (ex instanceof XRefEntryException && !recoveryMode) {
throw ex;
}
addPageError(ex);
break;
}
if (isName(type, "Page") || !obj.has("Kids")) {
addPageDict(obj, kidObj instanceof Ref ? kidObj : null); addPageDict(obj, kidObj instanceof Ref ? kidObj : null);
} else { } else {
queue.push({ currentNode: obj, posInKids: 0 }); queue.push({ currentNode: obj, posInKids: 0 });

View File

@ -622,9 +622,7 @@ describe("api", function () {
expect(false).toEqual(true); expect(false).toEqual(true);
} catch (reason) { } catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual( expect(reason.message).toEqual("Illegal character: 41");
"Page dictionary kids object is not an array."
);
} }
try { try {
await pdfDocument2.getPage(1); await pdfDocument2.getPage(1);
@ -633,9 +631,7 @@ describe("api", function () {
expect(false).toEqual(true); expect(false).toEqual(true);
} catch (reason) { } catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual( expect(reason.message).toEqual("End of file inside array.");
"Page dictionary kids object is not an array."
);
} }
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);

View File

@ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions";
const PagesCountLimit = { const PagesCountLimit = {
FORCE_SCROLL_MODE_PAGE: 15000, FORCE_SCROLL_MODE_PAGE: 15000,
FORCE_LAZY_PAGE_INIT: 7500, FORCE_LAZY_PAGE_INIT: 7500,
PAUSE_EAGER_PAGE_INIT: 500, PAUSE_EAGER_PAGE_INIT: 250,
}; };
/** /**