From ff4dae05b0f8349b6277d45e2d282ad893a4b3af Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Sun, 11 Apr 2021 12:00:14 +0200
Subject: [PATCH 1/2] Ensure that `getStructTree` won't break with
 `disableAutoFetch = true` set (PR 13171 follow-up)

Open http://localhost:8888/web/viewer.html?file=/test/pdfs/pdf.pdf#disableStream=true&disableAutoFetch=true and observe the following message in the console (repeated for each page of the document):
```
Uncaught (in promise)
Object { message: "Missing data [19787293, 19787294)", name: "UnknownErrorException", details: "MissingDataException: Missing data [19787293, 19787294)", stack: "BaseExceptionClosure@http://localhost:8888/src/shared/util.js:458:29\n@http://localhost:8888/src/shared/util.js:462:3\n" }
```
---
 src/core/document.js | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/core/document.js b/src/core/document.js
index 11830f0b0..27cbede28 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -454,6 +454,13 @@ class Page {
     const structTreeRoot = await this.pdfManager.ensureCatalog(
       "structTreeRoot"
     );
+    return this.pdfManager.ensure(this, "_parseStructTree", [structTreeRoot]);
+  }
+
+  /**
+   * @private
+   */
+  _parseStructTree(structTreeRoot) {
     const tree = new StructTreePage(structTreeRoot, this.pageDict);
     tree.parse();
     return tree;

From 5adee0cdd14017ca79a56ef00c4ec07659f17084 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Sun, 11 Apr 2021 12:04:29 +0200
Subject: [PATCH 2/2] [api-minor] Let `PDFPageProxy.getStructTree` return
 `null`, rather than an empty structTree, for documents without any
 accessibility data (PR 13171 follow-up)

This is first of all consistent with existing API-methods, where we return `null` when the data in question doesn't exist. Secondly, it should also be (slightly) more efficient since there's less dummy-data that we need to transfer between threads.
Finally, this prevents us from adding an empty/unnecessary span to *every* single page even in documents without any structure tree data.
---
 src/core/struct_tree.js |  4 +++
 src/display/api.js      |  3 +-
 test/unit/api_spec.js   | 63 +++++++++++++++++++++++++++++++++++++++++
 web/pdf_page_view.js    |  3 ++
 4 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js
index 41587d45c..a07d99b96 100644
--- a/src/core/struct_tree.js
+++ b/src/core/struct_tree.js
@@ -328,6 +328,10 @@ class StructTreePage {
       }
       nodeToSerializable(child, root);
     }
+
+    if (root.children.length === 0) {
+      return null;
+    }
     return root;
   }
 }
diff --git a/src/display/api.js b/src/display/api.js
index 9b5f70536..b05fd4641 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -1522,7 +1522,8 @@ class PDFPageProxy {
 
   /**
    * @returns {Promise<StructTreeNode>} A promise that is resolved with a
-   *   {@link StructTreeNode} object that represents the page's structure tree.
+   *   {@link StructTreeNode} object that represents the page's structure tree,
+   *   or `null` when no structure tree is present for the current page.
    */
   getStructTree() {
     return (this._structTreePromise ||= this._transport.getStructTree(
diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js
index cc0548335..af9a494bf 100644
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@@ -1702,6 +1702,69 @@ describe("api", function () {
         .catch(done.fail);
     });
 
+    it("gets empty structure tree", async function () {
+      const tree = await page.getStructTree();
+
+      expect(tree).toEqual(null);
+    });
+    it("gets simple structure tree", async function () {
+      const loadingTask = getDocument(
+        buildGetDocumentParams("structure_simple.pdf")
+      );
+      const pdfDoc = await loadingTask.promise;
+      const pdfPage = await pdfDoc.getPage(1);
+      const tree = await pdfPage.getStructTree();
+
+      expect(tree).toEqual({
+        role: "Root",
+        children: [
+          {
+            role: "Document",
+            children: [
+              {
+                role: "H1",
+                children: [
+                  {
+                    role: "NonStruct",
+                    children: [{ type: "content", id: "page2R_mcid0" }],
+                  },
+                ],
+              },
+              {
+                role: "P",
+                children: [
+                  {
+                    role: "NonStruct",
+                    children: [{ type: "content", id: "page2R_mcid1" }],
+                  },
+                ],
+              },
+              {
+                role: "H2",
+                children: [
+                  {
+                    role: "NonStruct",
+                    children: [{ type: "content", id: "page2R_mcid2" }],
+                  },
+                ],
+              },
+              {
+                role: "P",
+                children: [
+                  {
+                    role: "NonStruct",
+                    children: [{ type: "content", id: "page2R_mcid3" }],
+                  },
+                ],
+              },
+            ],
+          },
+        ],
+      });
+
+      await loadingTask.destroy();
+    });
+
     it("gets operator list", function (done) {
       const promise = page.getOperatorList();
       promise
diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js
index f83b0568d..8d6771129 100644
--- a/web/pdf_page_view.js
+++ b/web/pdf_page_view.js
@@ -619,6 +619,9 @@ class PDFPageView {
         this.eventBus._off("textlayerrendered", this._onTextLayerRendered);
         this._onTextLayerRendered = null;
         this.pdfPage.getStructTree().then(tree => {
+          if (!tree) {
+            return;
+          }
           const treeDom = this.structTreeLayer.render(tree);
           treeDom.classList.add("structTree");
           this.canvas.appendChild(treeDom);