From 389a26c115de8e6de8df800ddaddaac9d59e3355 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald <jonas.jenwald@gmail.com> Date: Thu, 10 Aug 2023 11:59:40 +0200 Subject: [PATCH] Fallback to check all pages when getting the pageIndex of FieldObjects Given that the FieldObjects are parsed in parallel, in combination with the existing caching in the `getPage`-method and `annotations`-getter, adding additional caches for this fallback code-path doesn't seem entirely necessary. --- src/core/annotation.js | 38 ++++++++++++++++++++++++++++++-------- test/unit/api_spec.js | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/src/core/annotation.js b/src/core/annotation.js index 347fa3ed6..5674b1b75 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -26,6 +26,7 @@ import { FeatureTest, getModificationDate, IDENTITY_MATRIX, + info, LINE_DESCENT_FACTOR, LINE_FACTOR, OPS, @@ -52,7 +53,7 @@ import { parseAppearanceStream, parseDefaultAppearance, } from "./default_appearance.js"; -import { Dict, isName, Name, Ref, RefSet } from "./primitives.js"; +import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js"; import { Stream, StringStream } from "./stream.js"; import { writeDict, writeObject } from "./writer.js"; import { BaseStream } from "./base_stream.js"; @@ -245,17 +246,38 @@ class AnnotationFactory { return -1; } const pageRef = annotDict.getRaw("P"); - if (!(pageRef instanceof Ref)) { - return -1; + if (pageRef instanceof Ref) { + try { + const pageIndex = await pdfManager.ensureCatalog("getPageIndex", [ + pageRef, + ]); + return pageIndex; + } catch (ex) { + info(`_getPageIndex -- not a valid page reference: "${ex}".`); + } + } + if (annotDict.has("Kids")) { + return -1; // Not an annotation reference. + } + // Fallback to, potentially, checking the annotations of all pages. + // PLEASE NOTE: This could force the *entire* PDF document to load, + // hence it absolutely cannot be done unconditionally. + const numPages = await pdfManager.ensureDoc("numPages"); + + for (let pageIndex = 0; pageIndex < numPages; pageIndex++) { + const page = await pdfManager.getPage(pageIndex); + const annotations = await pdfManager.ensure(page, "annotations"); + + for (const annotRef of annotations) { + if (annotRef instanceof Ref && isRefsEqual(annotRef, ref)) { + return pageIndex; + } + } } - const pageIndex = await pdfManager.ensureCatalog("getPageIndex", [ - pageRef, - ]); - return pageIndex; } catch (ex) { warn(`_getPageIndex: "${ex}".`); - return -1; } + return -1; } static generateImages(annotations, xref, isOffscreenCanvasSupported) { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index ac68f80aa..7c14dbee7 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1485,6 +1485,42 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets fieldObjects with missing /P-entries", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + + const loadingTask = getDocument(buildGetDocumentParams("bug1847733.pdf")); + const pdfDoc = await loadingTask.promise; + const fieldObjects = await pdfDoc.getFieldObjects(); + + for (const name in fieldObjects) { + const pageIndexes = fieldObjects[name].map(o => o.page); + let expected; + + switch (name) { + case "formID": + case "pdf_submission_new": + case "simple_spc": + case "adobeWarning": + expected = [0]; + break; + case "typeA13": + expected = [0, 0, 0, 0]; + break; + case "typeA15[0]": + case "typeA15[1]": + case "typeA15[2]": + case "typeA15[3]": + expected = [-1, 0, 0, 0, 0]; + break; + } + expect(pageIndexes).toEqual(expected); + } + + await loadingTask.destroy(); + }); + it("check field object for group of buttons", async function () { if (isNodeJS) { pending("Linked test-cases are not supported in Node.js.");