Merge pull request #14400 from Snuffleupagus/getPageDict-async
[api-minor] Convert `Catalog.getPageDict` to an asynchronous method
This commit is contained in:
commit
e42d54e1b5
@ -34,7 +34,6 @@ import {
|
|||||||
XRefEntryException,
|
XRefEntryException,
|
||||||
} from "./core_utils.js";
|
} from "./core_utils.js";
|
||||||
import {
|
import {
|
||||||
createPromiseCapability,
|
|
||||||
createValidAbsoluteUrl,
|
createValidAbsoluteUrl,
|
||||||
DocumentActionEventType,
|
DocumentActionEventType,
|
||||||
FormatError,
|
FormatError,
|
||||||
@ -1091,8 +1090,7 @@ class Catalog {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
getPageDict(pageIndex) {
|
async getPageDict(pageIndex) {
|
||||||
const capability = createPromiseCapability();
|
|
||||||
const nodesToVisit = [this.toplevelPagesDict];
|
const nodesToVisit = [this.toplevelPagesDict];
|
||||||
const visitedNodes = new RefSet();
|
const visitedNodes = new RefSet();
|
||||||
|
|
||||||
@ -1104,130 +1102,105 @@ class Catalog {
|
|||||||
pageKidsCountCache = this.pageKidsCountCache;
|
pageKidsCountCache = this.pageKidsCountCache;
|
||||||
let currentPageIndex = 0;
|
let currentPageIndex = 0;
|
||||||
|
|
||||||
function next() {
|
while (nodesToVisit.length) {
|
||||||
while (nodesToVisit.length) {
|
const currentNode = nodesToVisit.pop();
|
||||||
const currentNode = nodesToVisit.pop();
|
|
||||||
|
|
||||||
if (currentNode instanceof Ref) {
|
if (currentNode instanceof Ref) {
|
||||||
const count = pageKidsCountCache.get(currentNode);
|
const count = pageKidsCountCache.get(currentNode);
|
||||||
// Skip nodes where the page can't be.
|
// Skip nodes where the page can't be.
|
||||||
if (count >= 0 && currentPageIndex + count <= pageIndex) {
|
if (count >= 0 && currentPageIndex + count <= pageIndex) {
|
||||||
currentPageIndex += count;
|
currentPageIndex += count;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
// Prevent circular references in the /Pages tree.
|
||||||
|
if (visitedNodes.has(currentNode)) {
|
||||||
|
throw new FormatError("Pages tree contains circular reference.");
|
||||||
|
}
|
||||||
|
visitedNodes.put(currentNode);
|
||||||
|
|
||||||
|
const obj = await xref.fetchAsync(currentNode);
|
||||||
|
if (obj instanceof Dict) {
|
||||||
|
let type = obj.getRaw("Type");
|
||||||
|
if (type instanceof Ref) {
|
||||||
|
type = await xref.fetchAsync(type);
|
||||||
}
|
}
|
||||||
// Prevent circular references in the /Pages tree.
|
if (isName(type, "Page") || !obj.has("Kids")) {
|
||||||
if (visitedNodes.has(currentNode)) {
|
// Cache the Page reference, since it can *greatly* improve
|
||||||
capability.reject(
|
// performance by reducing redundant lookups in long documents
|
||||||
new FormatError("Pages tree contains circular reference.")
|
// where all nodes are found at *one* level of the tree.
|
||||||
);
|
if (currentNode && !pageKidsCountCache.has(currentNode)) {
|
||||||
return;
|
pageKidsCountCache.put(currentNode, 1);
|
||||||
}
|
|
||||||
visitedNodes.put(currentNode);
|
|
||||||
|
|
||||||
xref.fetchAsync(currentNode).then(function (obj) {
|
|
||||||
if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) {
|
|
||||||
// Cache the Page reference, since it can *greatly* improve
|
|
||||||
// performance by reducing redundant lookups in long documents
|
|
||||||
// where all nodes are found at *one* level of the tree.
|
|
||||||
if (currentNode && !pageKidsCountCache.has(currentNode)) {
|
|
||||||
pageKidsCountCache.put(currentNode, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pageIndex === currentPageIndex) {
|
|
||||||
capability.resolve([obj, currentNode]);
|
|
||||||
} else {
|
|
||||||
currentPageIndex++;
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
nodesToVisit.push(obj);
|
|
||||||
next();
|
|
||||||
}, capability.reject);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Must be a child page dictionary.
|
|
||||||
if (!(currentNode instanceof Dict)) {
|
|
||||||
capability.reject(
|
|
||||||
new FormatError(
|
|
||||||
"Page dictionary kid reference points to wrong type of object."
|
|
||||||
)
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count;
|
|
||||||
try {
|
|
||||||
count = currentNode.get("Count");
|
|
||||||
} catch (ex) {
|
|
||||||
if (ex instanceof MissingDataException) {
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Number.isInteger(count) && count >= 0) {
|
|
||||||
// Cache the Kids count, since it can reduce redundant lookups in
|
|
||||||
// documents where all nodes are found at *one* level of the tree.
|
|
||||||
const objId = currentNode.objId;
|
|
||||||
if (objId && !pageKidsCountCache.has(objId)) {
|
|
||||||
pageKidsCountCache.put(objId, count);
|
|
||||||
}
|
|
||||||
// Skip nodes where the page can't be.
|
|
||||||
if (currentPageIndex + count <= pageIndex) {
|
|
||||||
currentPageIndex += count;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let kids;
|
|
||||||
try {
|
|
||||||
kids = currentNode.get("Kids");
|
|
||||||
} catch (ex) {
|
|
||||||
if (ex instanceof MissingDataException) {
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!Array.isArray(kids)) {
|
|
||||||
// Prevent errors in corrupt PDF documents that violate the
|
|
||||||
// specification by *inlining* Page dicts directly in the Kids
|
|
||||||
// array, rather than using indirect objects (fixes issue9540.pdf).
|
|
||||||
let type;
|
|
||||||
try {
|
|
||||||
type = currentNode.get("Type");
|
|
||||||
} catch (ex) {
|
|
||||||
if (ex instanceof MissingDataException) {
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
isName(type, "Page") ||
|
|
||||||
(!currentNode.has("Type") && currentNode.has("Contents"))
|
|
||||||
) {
|
|
||||||
if (currentPageIndex === pageIndex) {
|
if (currentPageIndex === pageIndex) {
|
||||||
capability.resolve([currentNode, null]);
|
return [obj, currentNode];
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
currentPageIndex++;
|
currentPageIndex++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
nodesToVisit.push(obj);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
capability.reject(
|
// Must be a child page dictionary.
|
||||||
new FormatError("Page dictionary kids object is not an array.")
|
if (!(currentNode instanceof Dict)) {
|
||||||
);
|
throw new FormatError(
|
||||||
return;
|
"Page dictionary kid reference points to wrong type of object."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const { objId } = currentNode;
|
||||||
|
|
||||||
|
let count = currentNode.getRaw("Count");
|
||||||
|
if (count instanceof Ref) {
|
||||||
|
count = await xref.fetchAsync(count);
|
||||||
|
}
|
||||||
|
if (Number.isInteger(count) && count >= 0) {
|
||||||
|
// Cache the Kids count, since it can reduce redundant lookups in
|
||||||
|
// documents where all nodes are found at *one* level of the tree.
|
||||||
|
if (objId && !pageKidsCountCache.has(objId)) {
|
||||||
|
pageKidsCountCache.put(objId, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always check all `Kids` nodes, to avoid getting stuck in an empty
|
// Skip nodes where the page can't be.
|
||||||
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
|
if (currentPageIndex + count <= pageIndex) {
|
||||||
// and to ensure that we actually find the correct `Page` dict.
|
currentPageIndex += count;
|
||||||
for (let last = kids.length - 1; last >= 0; last--) {
|
continue;
|
||||||
nodesToVisit.push(kids[last]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
capability.reject(new Error(`Page index ${pageIndex} not found.`));
|
|
||||||
|
let kids = currentNode.getRaw("Kids");
|
||||||
|
if (kids instanceof Ref) {
|
||||||
|
kids = await xref.fetchAsync(kids);
|
||||||
|
}
|
||||||
|
if (!Array.isArray(kids)) {
|
||||||
|
// Prevent errors in corrupt PDF documents that violate the
|
||||||
|
// specification by *inlining* Page dicts directly in the Kids
|
||||||
|
// array, rather than using indirect objects (fixes issue9540.pdf).
|
||||||
|
let type = currentNode.getRaw("Type");
|
||||||
|
if (type instanceof Ref) {
|
||||||
|
type = await xref.fetchAsync(type);
|
||||||
|
}
|
||||||
|
if (isName(type, "Page") || !currentNode.has("Kids")) {
|
||||||
|
if (currentPageIndex === pageIndex) {
|
||||||
|
return [currentNode, null];
|
||||||
|
}
|
||||||
|
currentPageIndex++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new FormatError("Page dictionary kids object is not an array.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always check all `Kids` nodes, to avoid getting stuck in an empty
|
||||||
|
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
|
||||||
|
// and to ensure that we actually find the correct `Page` dict.
|
||||||
|
for (let last = kids.length - 1; last >= 0; last--) {
|
||||||
|
nodesToVisit.push(kids[last]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
next();
|
|
||||||
return capability.promise;
|
throw new Error(`Page index ${pageIndex} not found.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1319,7 +1292,20 @@ class Catalog {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isDict(obj, "Page") || !obj.has("Kids")) {
|
let type;
|
||||||
|
try {
|
||||||
|
type = obj.get("Type");
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
if (ex instanceof XRefEntryException && !recoveryMode) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
addPageError(ex);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (isName(type, "Page") || !obj.has("Kids")) {
|
||||||
addPageDict(obj, kidObj instanceof Ref ? kidObj : null);
|
addPageDict(obj, kidObj instanceof Ref ? kidObj : null);
|
||||||
} else {
|
} else {
|
||||||
queue.push({ currentNode: obj, posInKids: 0 });
|
queue.push({ currentNode: obj, posInKids: 0 });
|
||||||
|
@ -622,9 +622,7 @@ describe("api", function () {
|
|||||||
expect(false).toEqual(true);
|
expect(false).toEqual(true);
|
||||||
} catch (reason) {
|
} catch (reason) {
|
||||||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||||
expect(reason.message).toEqual(
|
expect(reason.message).toEqual("Illegal character: 41");
|
||||||
"Page dictionary kids object is not an array."
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
await pdfDocument2.getPage(1);
|
await pdfDocument2.getPage(1);
|
||||||
@ -633,9 +631,7 @@ describe("api", function () {
|
|||||||
expect(false).toEqual(true);
|
expect(false).toEqual(true);
|
||||||
} catch (reason) {
|
} catch (reason) {
|
||||||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||||
expect(reason.message).toEqual(
|
expect(reason.message).toEqual("End of file inside array.");
|
||||||
"Page dictionary kids object is not an array."
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||||
|
@ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions";
|
|||||||
const PagesCountLimit = {
|
const PagesCountLimit = {
|
||||||
FORCE_SCROLL_MODE_PAGE: 15000,
|
FORCE_SCROLL_MODE_PAGE: 15000,
|
||||||
FORCE_LAZY_PAGE_INIT: 7500,
|
FORCE_LAZY_PAGE_INIT: 7500,
|
||||||
PAUSE_EAGER_PAGE_INIT: 500,
|
PAUSE_EAGER_PAGE_INIT: 250,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user