diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 8be0950f3..68fe82694 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -158,7 +158,7 @@ class StructTreeRoot { newRefs.push({ ref: structTreeRootRef, data: buffer.join("") }); } - async canUpdateStructTree({ pdfManager, newAnnotationsByPage }) { + async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) { if (!this.ref) { warn("Cannot update the struct tree: no root reference."); return false; @@ -180,17 +180,18 @@ class StructTreeRoot { warn("Cannot update the struct tree: nums isn't an array."); return false; } + const numberTree = new NumberTree(parentTree, xref); - const { numPages } = pdfManager.catalog; for (const pageIndex of newAnnotationsByPage.keys()) { - const { pageDict, ref: pageRef } = await pdfManager.getPage(pageIndex); - if (!(pageRef instanceof Ref)) { - warn(`Cannot save the struct tree: page ${pageIndex} has no ref.`); - return false; + const { pageDict } = await pdfManager.getPage(pageIndex); + if (!pageDict.has("StructParents")) { + // StructParents is required when the content stream has some tagged + // contents but a page can just have tagged annotations. + continue; } const id = pageDict.get("StructParents"); - if (!Number.isInteger(id) || id < 0 || id >= numPages) { - warn(`Cannot save the struct tree: page ${pageIndex} has no id.`); + if (!Number.isInteger(id) || !Array.isArray(numberTree.get(id))) { + warn(`Cannot save the struct tree: page ${pageIndex} has a wrong id.`); return false; } } @@ -202,7 +203,7 @@ class StructTreeRoot { elements, xref: this.dict.xref, pageDict, - parentTree, + numberTree, }); for (const element of elements) { @@ -312,6 +313,7 @@ class StructTreeRoot { for (const [pageIndex, elements] of newAnnotationsByPage) { const { ref: pageRef } = await pdfManager.getPage(pageIndex); + const isPageRef = pageRef instanceof Ref; for (const { accessibilityData, ref, @@ -364,7 +366,10 @@ class StructTreeRoot { const objDict = new Dict(xref); tagDict.set("K", objDict); objDict.set("Type", objr); - objDict.set("Pg", pageRef); + if (isPageRef) { + // Pg is optional. + objDict.set("Pg", pageRef); + } objDict.set("Obj", ref); buffer.length = 0; @@ -378,7 +383,7 @@ class StructTreeRoot { return nextKey + 1; } - static #collectParents({ elements, xref, pageDict, parentTree }) { + static #collectParents({ elements, xref, pageDict, numberTree }) { const idToElement = new Map(); for (const element of elements) { if (element.structTreeParentId) { @@ -388,11 +393,12 @@ class StructTreeRoot { } const id = pageDict.get("StructParents"); - const numberTree = new NumberTree(parentTree, xref); - const parentArray = numberTree.get(id); - if (!Array.isArray(parentArray)) { + if (!Number.isInteger(id)) { return; } + // The parentArray type has already been checked by the caller. + const parentArray = numberTree.get(id); + const updateElement = (kid, pageKid, kidRef) => { const element = idToElement.get(kid); if (element) { diff --git a/src/core/worker.js b/src/core/worker.js index 4df0d0d3d..fe699b978 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -576,6 +576,7 @@ class WorkerMessageHandler { } else if ( await _structTreeRoot.canUpdateStructTree({ pdfManager, + xref, newAnnotationsByPage, }) ) { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 81a86aff0..3e7eea679 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2297,7 +2297,7 @@ describe("api", function () { await loadingTask.destroy(); }); - it("write a new stamp annotation in a tagged pdf, save and check that the structure tree", async function () { + it("write a new stamp annotation in a tagged pdf, save and check the structure tree", async function () { if (isNodeJS) { pending("Cannot create a bitmap from Node.js."); } @@ -2349,6 +2349,76 @@ describe("api", function () { await loadingTask.destroy(); }); + it("write a new stamp annotation in a tagged pdf, save, repeat and check the structure tree", async function () { + if (isNodeJS) { + pending("Cannot create a bitmap from Node.js."); + } + + const TEST_IMAGES_PATH = "../images/"; + const filename = "firefox_logo.png"; + const path = new URL(TEST_IMAGES_PATH + filename, window.location).href; + + const response = await fetch(path); + const blob = await response.blob(); + let loadingTask, pdfDoc; + let data = buildGetDocumentParams("empty.pdf"); + + for (let i = 1; i <= 2; i++) { + const bitmap = await createImageBitmap(blob); + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + pdfDoc.annotationStorage.setValue("pdfjs_internal_editor_0", { + annotationType: AnnotationEditorType.STAMP, + rect: [10 * i, 10 * i, 20 * i, 20 * i], + rotation: 0, + bitmap, + bitmapId: "im1", + pageIndex: 0, + structTreeParentId: null, + accessibilityData: { + type: "Figure", + alt: `Hello World ${i}`, + }, + }); + + data = await pdfDoc.saveDocument(); + await loadingTask.destroy(); + } + + loadingTask = getDocument(data); + pdfDoc = await loadingTask.promise; + const page = await pdfDoc.getPage(1); + const tree = await page.getStructTree(); + + expect(tree).toEqual({ + children: [ + { + role: "Figure", + children: [ + { + type: "annotation", + id: "pdfjs_internal_id_18R", + }, + ], + alt: "Hello World 1", + }, + { + role: "Figure", + children: [ + { + type: "annotation", + id: "pdfjs_internal_id_26R", + }, + ], + alt: "Hello World 2", + }, + ], + role: "Root", + }); + + await loadingTask.destroy(); + }); + it("write a new stamp annotation in a non-tagged pdf, save and check that the structure tree", async function () { if (isNodeJS) { pending("Cannot create a bitmap from Node.js.");