From b99927e1eeabdb65a4c60d6545929f735c0e9b3d Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 29 Dec 2021 12:22:20 +0100 Subject: [PATCH 1/3] Improve the API unit-tests for scripting-related functionality I happened to notice that we didn't have *any* unit-tests for either `getFieldObjects` or `getCalculationOrderIds`, on the `PDFDocumentProxy` class, which seems unfortunate since it's API functionality that we depend on in e.g. the viewer. --- test/unit/api_spec.js | 99 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 24cc581bf..ad3a80d27 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1256,6 +1256,105 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets non-existent fieldObjects", async function () { + const fieldObjects = await pdfDocument.getFieldObjects(); + expect(fieldObjects).toEqual(null); + }); + + it("gets fieldObjects", async function () { + if (isNodeJS) { + pending( + "Node.js appears to ignore Object properties that are explicitly " + + "set to `undefined`, thus breaking the expectations used below." + ); + } + const loadingTask = getDocument(buildGetDocumentParams("js-authors.pdf")); + const pdfDoc = await loadingTask.promise; + const fieldObjects = await pdfDoc.getFieldObjects(); + + expect(fieldObjects).toEqual({ + Text1: [ + { + id: "25R", + value: "", + defaultValue: null, + multiline: false, + password: false, + charLimit: null, + comb: false, + editable: true, + hidden: false, + name: "Text1", + rect: [24.1789, 719.66, 432.22, 741.66], + actions: null, + page: 0, + strokeColor: null, + fillColor: null, + type: "text", + }, + ], + Button1: [ + { + id: "26R", + value: "Off", + defaultValue: null, + exportValues: undefined, + editable: true, + name: "Button1", + rect: [455.436, 719.678, 527.436, 739.678], + hidden: false, + actions: { + Action: [ + `this.getField("Text1").value = this.info.authors.join("::");`, + ], + }, + page: 0, + strokeColor: null, + fillColor: new Uint8ClampedArray([192, 192, 192]), + type: "button", + }, + ], + }); + + await loadingTask.destroy(); + }); + + it("gets non-existent calculationOrder", async function () { + const calculationOrder = await pdfDocument.getCalculationOrderIds(); + expect(calculationOrder).toEqual(null); + }); + + it("gets calculationOrder", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + const loadingTask = getDocument(buildGetDocumentParams("issue13132.pdf")); + const pdfDoc = await loadingTask.promise; + const calculationOrder = await pdfDoc.getCalculationOrderIds(); + + expect(calculationOrder).toEqual([ + "319R", + "320R", + "321R", + "322R", + "323R", + "324R", + "325R", + "326R", + "327R", + "328R", + "329R", + "330R", + "331R", + "332R", + "333R", + "334R", + "335R", + ]); + + await loadingTask.destroy(); + }); + it("gets non-existent outline", async function () { const loadingTask = getDocument( buildGetDocumentParams("tracemonkey.pdf") From a20393e6e447d14eb5f29fd6608457b8355d12c1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 29 Dec 2021 13:13:42 +0100 Subject: [PATCH 2/3] Update `PDFDocument._getLinearizationPage` to do the /Type-check correctly (PR 14400 follow-up) I forgot about this in PR 14400, since we should obviously be consistent *and* given that the existing check is actually wrong; sorry about this! --- src/core/document.js | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index d2238cae9..5daeeb186 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1264,7 +1264,7 @@ class PDFDocument { } async _getLinearizationPage(pageIndex) { - const { catalog, linearization } = this; + const { catalog, linearization, xref } = this; if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") @@ -1277,22 +1277,25 @@ class PDFDocument { const ref = Ref.get(linearization.objectNumberFirst, 0); try { - const obj = await this.xref.fetchAsync(ref); + const obj = await xref.fetchAsync(ref); // Ensure that the object that was found is actually a Page dictionary. - if ( - isDict(obj, "Page") || - (isDict(obj) && !obj.has("Type") && obj.has("Contents")) - ) { - if (ref && !catalog.pageKidsCountCache.has(ref)) { - catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference. + if (obj instanceof Dict) { + let type = obj.getRaw("Type"); + if (type instanceof Ref) { + type = await xref.fetchAsync(type); + } + if (isName(type, "Page") || (!obj.has("Type") && !obj.has("Kids"))) { + if (ref && !catalog.pageKidsCountCache.has(ref)) { + catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference. + } + return [obj, ref]; } - return [obj, ref]; } throw new FormatError( "The Linearization dictionary doesn't point to a valid Page dictionary." ); } catch (reason) { - info(reason); + warn(`_getLinearizationPage: "${reason.message}".`); return catalog.getPageDict(pageIndex); } } From 1491459deaa145ea875be6bdc9480921403de1c4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 29 Dec 2021 15:57:34 +0100 Subject: [PATCH 3/3] Improve caching for the `Catalog.getPageIndex` method (PR 13319 follow-up) This method is now being used a lot more, compared to when it's added, since it's now used together with scripting as part of the `PDFDocument.fieldObjects` parsing (called during viewer initialization). For /Page Dictionaries that we've already parsed, the `pageIndex` corresponding to a particular Reference is already known and we're thus able to skip *all* parsing in the `Catalog.getPageIndex` method for those cases. --- src/core/catalog.js | 17 ++++++++++++++--- src/core/document.js | 7 ++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 54c6849ef..30ccddf16 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1099,7 +1099,8 @@ class Catalog { visitedNodes.put(pagesRef); } const xref = this.xref, - pageKidsCountCache = this.pageKidsCountCache; + pageKidsCountCache = this.pageKidsCountCache, + pageIndexCache = this.pageIndexCache; let currentPageIndex = 0; while (nodesToVisit.length) { @@ -1128,9 +1129,13 @@ class Catalog { // Cache the Page reference, since it can *greatly* improve // performance by reducing redundant lookups in long documents // where all nodes are found at *one* level of the tree. - if (currentNode && !pageKidsCountCache.has(currentNode)) { + if (!pageKidsCountCache.has(currentNode)) { pageKidsCountCache.put(currentNode, 1); } + // Help improve performance of the `getPageIndex` method. + if (!pageIndexCache.has(currentNode)) { + pageIndexCache.put(currentNode, currentPageIndex); + } if (currentPageIndex === pageIndex) { return [obj, currentNode]; @@ -1215,10 +1220,16 @@ class Catalog { if (pagesRef instanceof Ref) { visitedNodes.put(pagesRef); } - const map = new Map(); + const map = new Map(), + pageIndexCache = this.pageIndexCache; let pageIndex = 0; function addPageDict(pageDict, pageRef) { + // Help improve performance of the `getPageIndex` method. + if (pageRef && !pageIndexCache.has(pageRef)) { + pageIndexCache.put(pageRef, pageIndex); + } + map.set(pageIndex++, [pageDict, pageRef]); } function addPageError(error) { diff --git a/src/core/document.js b/src/core/document.js index 5daeeb186..06b3f3895 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1285,9 +1285,14 @@ class PDFDocument { type = await xref.fetchAsync(type); } if (isName(type, "Page") || (!obj.has("Type") && !obj.has("Kids"))) { - if (ref && !catalog.pageKidsCountCache.has(ref)) { + if (!catalog.pageKidsCountCache.has(ref)) { catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference. } + // Help improve performance of the `Catalog.getPageIndex` method. + if (!catalog.pageIndexCache.has(ref)) { + catalog.pageIndexCache.put(ref, 0); + } + return [obj, ref]; } }