pdf.js/web/struct_tree_layer_builder.js
Brendan Dahl fc9501a637 Add support for basic structure tree for accessibility.
When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.
2021-04-09 09:56:28 -07:00

150 lines
3.7 KiB
JavaScript

/* Copyright 2021 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const PDF_ROLE_TO_HTML_ROLE = {
// Document level structure types
Document: null, // There's a "document" role, but it doesn't make sense here.
DocumentFragment: null,
// Grouping level structure types
Part: "group",
Sect: "group", // XXX: There's a "section" role, but it's abstract.
Div: "group",
Aside: "note",
NonStruct: "none",
// Block level structure types
P: null,
// H<n>,
H: "heading",
Title: null,
FENote: "note",
// Sub-block level structure type
Sub: "group",
// General inline level structure types
Lbl: null,
Span: null,
Em: null,
Strong: null,
Link: "link",
Annot: "note",
Form: "form",
// Ruby and Warichu structure types
Ruby: null,
RB: null,
RT: null,
RP: null,
Warichu: null,
WT: null,
WP: null,
// List standard structure types
L: "list",
LI: "listitem",
LBody: null,
// Table standard structure types
Table: "table",
TR: "row",
TH: "columnheader",
TD: "cell",
THead: "columnheader",
TBody: null,
TFoot: null,
// Standard structure type Caption
Caption: null,
// Standard structure type Figure
Figure: "figure",
// Standard structure type Formula
Formula: null,
// standard structure type Artifact
Artifact: null,
};
const HEADING_PATTERN = /^H(\d+)$/;
/**
* @typedef {Object} StructTreeLayerBuilderOptions
* @property {PDFPage} pdfPage
*/
class StructTreeLayerBuilder {
/**
* @param {StructTreeLayerBuilderOptions} options
*/
constructor({ pdfPage }) {
this.pdfPage = pdfPage;
}
render(structTree) {
return this._walk(structTree);
}
_setAttributes(structElement, htmlElement) {
if (structElement.alt !== undefined) {
htmlElement.setAttribute("aria-label", structElement.alt);
}
if (structElement.id !== undefined) {
htmlElement.setAttribute("aria-owns", structElement.id);
}
}
_walk(node) {
if (!node) {
return null;
}
const element = document.createElement("span");
if ("role" in node) {
const { role } = node;
const match = role.match(HEADING_PATTERN);
if (match) {
element.setAttribute("role", "heading");
element.setAttribute("aria-level", match[1]);
} else if (PDF_ROLE_TO_HTML_ROLE[role]) {
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]);
}
}
this._setAttributes(node, element);
if (node.children) {
if (node.children.length === 1 && "id" in node.children[0]) {
// Often there is only one content node so just set the values on the
// parent node to avoid creating an extra span.
this._setAttributes(node.children[0], element);
} else {
for (const kid of node.children) {
element.appendChild(this._walk(kid));
}
}
}
return element;
}
}
/**
* @implements IPDFStructTreeLayerFactory
*/
class DefaultStructTreeLayerFactory {
/**
* @param {PDFPage} pdfPage
* @returns {StructTreeLayerBuilder}
*/
createStructTreeLayerBuilder(pdfPage) {
return new StructTreeLayerBuilder({
pdfPage,
});
}
}
export { DefaultStructTreeLayerFactory, StructTreeLayerBuilder };