Merge pull request #14409 from Snuffleupagus/getPageIndex-better-caching

Improve caching for the `Catalog.getPageIndex` method (PR 13319 follow-up)
This commit is contained in:
Tim van der Meij 2021-12-31 19:19:14 +01:00 committed by GitHub
commit 3d7bb6c38d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 131 additions and 13 deletions

View File

@ -1099,7 +1099,8 @@ class Catalog {
visitedNodes.put(pagesRef);
}
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;
pageKidsCountCache = this.pageKidsCountCache,
pageIndexCache = this.pageIndexCache;
let currentPageIndex = 0;
while (nodesToVisit.length) {
@ -1128,9 +1129,13 @@ class Catalog {
// Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree.
if (currentNode && !pageKidsCountCache.has(currentNode)) {
if (!pageKidsCountCache.has(currentNode)) {
pageKidsCountCache.put(currentNode, 1);
}
// Help improve performance of the `getPageIndex` method.
if (!pageIndexCache.has(currentNode)) {
pageIndexCache.put(currentNode, currentPageIndex);
}
if (currentPageIndex === pageIndex) {
return [obj, currentNode];
@ -1215,10 +1220,16 @@ class Catalog {
if (pagesRef instanceof Ref) {
visitedNodes.put(pagesRef);
}
const map = new Map();
const map = new Map(),
pageIndexCache = this.pageIndexCache;
let pageIndex = 0;
function addPageDict(pageDict, pageRef) {
// Help improve performance of the `getPageIndex` method.
if (pageRef && !pageIndexCache.has(pageRef)) {
pageIndexCache.put(pageRef, pageIndex);
}
map.set(pageIndex++, [pageDict, pageRef]);
}
function addPageError(error) {

View File

@ -1264,7 +1264,7 @@ class PDFDocument {
}
async _getLinearizationPage(pageIndex) {
const { catalog, linearization } = this;
const { catalog, linearization, xref } = this;
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
@ -1277,22 +1277,30 @@ class PDFDocument {
const ref = Ref.get(linearization.objectNumberFirst, 0);
try {
const obj = await this.xref.fetchAsync(ref);
const obj = await xref.fetchAsync(ref);
// Ensure that the object that was found is actually a Page dictionary.
if (
isDict(obj, "Page") ||
(isDict(obj) && !obj.has("Type") && obj.has("Contents"))
) {
if (ref && !catalog.pageKidsCountCache.has(ref)) {
catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference.
if (obj instanceof Dict) {
let type = obj.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || (!obj.has("Type") && !obj.has("Kids"))) {
if (!catalog.pageKidsCountCache.has(ref)) {
catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference.
}
// Help improve performance of the `Catalog.getPageIndex` method.
if (!catalog.pageIndexCache.has(ref)) {
catalog.pageIndexCache.put(ref, 0);
}
return [obj, ref];
}
return [obj, ref];
}
throw new FormatError(
"The Linearization dictionary doesn't point to a valid Page dictionary."
);
} catch (reason) {
info(reason);
warn(`_getLinearizationPage: "${reason.message}".`);
return catalog.getPageDict(pageIndex);
}
}

View File

@ -1256,6 +1256,105 @@ describe("api", function () {
await loadingTask.destroy();
});
it("gets non-existent fieldObjects", async function () {
const fieldObjects = await pdfDocument.getFieldObjects();
expect(fieldObjects).toEqual(null);
});
it("gets fieldObjects", async function () {
if (isNodeJS) {
pending(
"Node.js appears to ignore Object properties that are explicitly " +
"set to `undefined`, thus breaking the expectations used below."
);
}
const loadingTask = getDocument(buildGetDocumentParams("js-authors.pdf"));
const pdfDoc = await loadingTask.promise;
const fieldObjects = await pdfDoc.getFieldObjects();
expect(fieldObjects).toEqual({
Text1: [
{
id: "25R",
value: "",
defaultValue: null,
multiline: false,
password: false,
charLimit: null,
comb: false,
editable: true,
hidden: false,
name: "Text1",
rect: [24.1789, 719.66, 432.22, 741.66],
actions: null,
page: 0,
strokeColor: null,
fillColor: null,
type: "text",
},
],
Button1: [
{
id: "26R",
value: "Off",
defaultValue: null,
exportValues: undefined,
editable: true,
name: "Button1",
rect: [455.436, 719.678, 527.436, 739.678],
hidden: false,
actions: {
Action: [
`this.getField("Text1").value = this.info.authors.join("::");`,
],
},
page: 0,
strokeColor: null,
fillColor: new Uint8ClampedArray([192, 192, 192]),
type: "button",
},
],
});
await loadingTask.destroy();
});
it("gets non-existent calculationOrder", async function () {
const calculationOrder = await pdfDocument.getCalculationOrderIds();
expect(calculationOrder).toEqual(null);
});
it("gets calculationOrder", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("issue13132.pdf"));
const pdfDoc = await loadingTask.promise;
const calculationOrder = await pdfDoc.getCalculationOrderIds();
expect(calculationOrder).toEqual([
"319R",
"320R",
"321R",
"322R",
"323R",
"324R",
"325R",
"326R",
"327R",
"328R",
"329R",
"330R",
"331R",
"332R",
"333R",
"334R",
"335R",
]);
await loadingTask.destroy();
});
it("gets non-existent outline", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("tracemonkey.pdf")