From 389a26c115de8e6de8df800ddaddaac9d59e3355 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Thu, 10 Aug 2023 11:59:40 +0200
Subject: [PATCH] Fallback to check all pages when getting the pageIndex of
 FieldObjects

Given that the FieldObjects are parsed in parallel, in combination with the existing caching in the `getPage`-method and `annotations`-getter, adding additional caches for this fallback code-path doesn't seem entirely necessary.
---
 src/core/annotation.js | 38 ++++++++++++++++++++++++++++++--------
 test/unit/api_spec.js  | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/src/core/annotation.js b/src/core/annotation.js
index 347fa3ed6..5674b1b75 100644
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@@ -26,6 +26,7 @@ import {
   FeatureTest,
   getModificationDate,
   IDENTITY_MATRIX,
+  info,
   LINE_DESCENT_FACTOR,
   LINE_FACTOR,
   OPS,
@@ -52,7 +53,7 @@ import {
   parseAppearanceStream,
   parseDefaultAppearance,
 } from "./default_appearance.js";
-import { Dict, isName, Name, Ref, RefSet } from "./primitives.js";
+import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js";
 import { Stream, StringStream } from "./stream.js";
 import { writeDict, writeObject } from "./writer.js";
 import { BaseStream } from "./base_stream.js";
@@ -245,17 +246,38 @@ class AnnotationFactory {
         return -1;
       }
       const pageRef = annotDict.getRaw("P");
-      if (!(pageRef instanceof Ref)) {
-        return -1;
+      if (pageRef instanceof Ref) {
+        try {
+          const pageIndex = await pdfManager.ensureCatalog("getPageIndex", [
+            pageRef,
+          ]);
+          return pageIndex;
+        } catch (ex) {
+          info(`_getPageIndex -- not a valid page reference: "${ex}".`);
+        }
+      }
+      if (annotDict.has("Kids")) {
+        return -1; // Not an annotation reference.
+      }
+      // Fallback to, potentially, checking the annotations of all pages.
+      // PLEASE NOTE: This could force the *entire* PDF document to load,
+      //              hence it absolutely cannot be done unconditionally.
+      const numPages = await pdfManager.ensureDoc("numPages");
+
+      for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
+        const page = await pdfManager.getPage(pageIndex);
+        const annotations = await pdfManager.ensure(page, "annotations");
+
+        for (const annotRef of annotations) {
+          if (annotRef instanceof Ref && isRefsEqual(annotRef, ref)) {
+            return pageIndex;
+          }
+        }
       }
-      const pageIndex = await pdfManager.ensureCatalog("getPageIndex", [
-        pageRef,
-      ]);
-      return pageIndex;
     } catch (ex) {
       warn(`_getPageIndex: "${ex}".`);
-      return -1;
     }
+    return -1;
   }
 
   static generateImages(annotations, xref, isOffscreenCanvasSupported) {
diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js
index ac68f80aa..7c14dbee7 100644
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@@ -1485,6 +1485,42 @@ describe("api", function () {
       await loadingTask.destroy();
     });
 
+    it("gets fieldObjects with missing /P-entries", async function () {
+      if (isNodeJS) {
+        pending("Linked test-cases are not supported in Node.js.");
+      }
+
+      const loadingTask = getDocument(buildGetDocumentParams("bug1847733.pdf"));
+      const pdfDoc = await loadingTask.promise;
+      const fieldObjects = await pdfDoc.getFieldObjects();
+
+      for (const name in fieldObjects) {
+        const pageIndexes = fieldObjects[name].map(o => o.page);
+        let expected;
+
+        switch (name) {
+          case "formID":
+          case "pdf_submission_new":
+          case "simple_spc":
+          case "adobeWarning":
+            expected = [0];
+            break;
+          case "typeA13":
+            expected = [0, 0, 0, 0];
+            break;
+          case "typeA15[0]":
+          case "typeA15[1]":
+          case "typeA15[2]":
+          case "typeA15[3]":
+            expected = [-1, 0, 0, 0, 0];
+            break;
+        }
+        expect(pageIndexes).toEqual(expected);
+      }
+
+      await loadingTask.destroy();
+    });
+
     it("check field object for group of buttons", async function () {
       if (isNodeJS) {
         pending("Linked test-cases are not supported in Node.js.");