StructParents entry isn't required on pages with no tagged contents (bug 1855641)

This commit is contained in:
Calixte Denizet 2023-09-28 11:37:35 +02:00
parent 2daf9515b3
commit f2196f7803
3 changed files with 92 additions and 15 deletions

View File

@ -158,7 +158,7 @@ class StructTreeRoot {
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") }); newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
} }
async canUpdateStructTree({ pdfManager, newAnnotationsByPage }) { async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) {
if (!this.ref) { if (!this.ref) {
warn("Cannot update the struct tree: no root reference."); warn("Cannot update the struct tree: no root reference.");
return false; return false;
@ -180,17 +180,18 @@ class StructTreeRoot {
warn("Cannot update the struct tree: nums isn't an array."); warn("Cannot update the struct tree: nums isn't an array.");
return false; return false;
} }
const numberTree = new NumberTree(parentTree, xref);
const { numPages } = pdfManager.catalog;
for (const pageIndex of newAnnotationsByPage.keys()) { for (const pageIndex of newAnnotationsByPage.keys()) {
const { pageDict, ref: pageRef } = await pdfManager.getPage(pageIndex); const { pageDict } = await pdfManager.getPage(pageIndex);
if (!(pageRef instanceof Ref)) { if (!pageDict.has("StructParents")) {
warn(`Cannot save the struct tree: page ${pageIndex} has no ref.`); // StructParents is required when the content stream has some tagged
return false; // contents but a page can just have tagged annotations.
continue;
} }
const id = pageDict.get("StructParents"); const id = pageDict.get("StructParents");
if (!Number.isInteger(id) || id < 0 || id >= numPages) { if (!Number.isInteger(id) || !Array.isArray(numberTree.get(id))) {
warn(`Cannot save the struct tree: page ${pageIndex} has no id.`); warn(`Cannot save the struct tree: page ${pageIndex} has a wrong id.`);
return false; return false;
} }
} }
@ -202,7 +203,7 @@ class StructTreeRoot {
elements, elements,
xref: this.dict.xref, xref: this.dict.xref,
pageDict, pageDict,
parentTree, numberTree,
}); });
for (const element of elements) { for (const element of elements) {
@ -312,6 +313,7 @@ class StructTreeRoot {
for (const [pageIndex, elements] of newAnnotationsByPage) { for (const [pageIndex, elements] of newAnnotationsByPage) {
const { ref: pageRef } = await pdfManager.getPage(pageIndex); const { ref: pageRef } = await pdfManager.getPage(pageIndex);
const isPageRef = pageRef instanceof Ref;
for (const { for (const {
accessibilityData, accessibilityData,
ref, ref,
@ -364,7 +366,10 @@ class StructTreeRoot {
const objDict = new Dict(xref); const objDict = new Dict(xref);
tagDict.set("K", objDict); tagDict.set("K", objDict);
objDict.set("Type", objr); objDict.set("Type", objr);
if (isPageRef) {
// Pg is optional.
objDict.set("Pg", pageRef); objDict.set("Pg", pageRef);
}
objDict.set("Obj", ref); objDict.set("Obj", ref);
buffer.length = 0; buffer.length = 0;
@ -378,7 +383,7 @@ class StructTreeRoot {
return nextKey + 1; return nextKey + 1;
} }
static #collectParents({ elements, xref, pageDict, parentTree }) { static #collectParents({ elements, xref, pageDict, numberTree }) {
const idToElement = new Map(); const idToElement = new Map();
for (const element of elements) { for (const element of elements) {
if (element.structTreeParentId) { if (element.structTreeParentId) {
@ -388,11 +393,12 @@ class StructTreeRoot {
} }
const id = pageDict.get("StructParents"); const id = pageDict.get("StructParents");
const numberTree = new NumberTree(parentTree, xref); if (!Number.isInteger(id)) {
const parentArray = numberTree.get(id);
if (!Array.isArray(parentArray)) {
return; return;
} }
// The parentArray type has already been checked by the caller.
const parentArray = numberTree.get(id);
const updateElement = (kid, pageKid, kidRef) => { const updateElement = (kid, pageKid, kidRef) => {
const element = idToElement.get(kid); const element = idToElement.get(kid);
if (element) { if (element) {

View File

@ -576,6 +576,7 @@ class WorkerMessageHandler {
} else if ( } else if (
await _structTreeRoot.canUpdateStructTree({ await _structTreeRoot.canUpdateStructTree({
pdfManager, pdfManager,
xref,
newAnnotationsByPage, newAnnotationsByPage,
}) })
) { ) {

View File

@ -2297,7 +2297,7 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("write a new stamp annotation in a tagged pdf, save and check that the structure tree", async function () { it("write a new stamp annotation in a tagged pdf, save and check the structure tree", async function () {
if (isNodeJS) { if (isNodeJS) {
pending("Cannot create a bitmap from Node.js."); pending("Cannot create a bitmap from Node.js.");
} }
@ -2349,6 +2349,76 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("write a new stamp annotation in a tagged pdf, save, repeat and check the structure tree", async function () {
if (isNodeJS) {
pending("Cannot create a bitmap from Node.js.");
}
const TEST_IMAGES_PATH = "../images/";
const filename = "firefox_logo.png";
const path = new URL(TEST_IMAGES_PATH + filename, window.location).href;
const response = await fetch(path);
const blob = await response.blob();
let loadingTask, pdfDoc;
let data = buildGetDocumentParams("empty.pdf");
for (let i = 1; i <= 2; i++) {
const bitmap = await createImageBitmap(blob);
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
pdfDoc.annotationStorage.setValue("pdfjs_internal_editor_0", {
annotationType: AnnotationEditorType.STAMP,
rect: [10 * i, 10 * i, 20 * i, 20 * i],
rotation: 0,
bitmap,
bitmapId: "im1",
pageIndex: 0,
structTreeParentId: null,
accessibilityData: {
type: "Figure",
alt: `Hello World ${i}`,
},
});
data = await pdfDoc.saveDocument();
await loadingTask.destroy();
}
loadingTask = getDocument(data);
pdfDoc = await loadingTask.promise;
const page = await pdfDoc.getPage(1);
const tree = await page.getStructTree();
expect(tree).toEqual({
children: [
{
role: "Figure",
children: [
{
type: "annotation",
id: "pdfjs_internal_id_18R",
},
],
alt: "Hello World 1",
},
{
role: "Figure",
children: [
{
type: "annotation",
id: "pdfjs_internal_id_26R",
},
],
alt: "Hello World 2",
},
],
role: "Root",
});
await loadingTask.destroy();
});
it("write a new stamp annotation in a non-tagged pdf, save and check that the structure tree", async function () { it("write a new stamp annotation in a non-tagged pdf, save and check that the structure tree", async function () {
if (isNodeJS) { if (isNodeJS) {
pending("Cannot create a bitmap from Node.js."); pending("Cannot create a bitmap from Node.js.");