Merge pull request #13171 from brendandahl/struct-tree
[api-minor] Add support for basic structure tree for accessibility.
This commit is contained in:
commit
03c8c89002
@ -58,6 +58,7 @@ import { calculateMD5 } from "./crypto.js";
|
|||||||
import { Linearization } from "./parser.js";
|
import { Linearization } from "./parser.js";
|
||||||
import { OperatorList } from "./operator_list.js";
|
import { OperatorList } from "./operator_list.js";
|
||||||
import { PartialEvaluator } from "./evaluator.js";
|
import { PartialEvaluator } from "./evaluator.js";
|
||||||
|
import { StructTreePage } from "./struct_tree.js";
|
||||||
import { XFAFactory } from "./xfa/factory.js";
|
import { XFAFactory } from "./xfa/factory.js";
|
||||||
|
|
||||||
const DEFAULT_USER_UNIT = 1.0;
|
const DEFAULT_USER_UNIT = 1.0;
|
||||||
@ -104,6 +105,10 @@ class Page {
|
|||||||
static createObjId() {
|
static createObjId() {
|
||||||
return `p${pageIndex}_${++idCounters.obj}`;
|
return `p${pageIndex}_${++idCounters.obj}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static getPageObjId() {
|
||||||
|
return `page${ref.toString()}`;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,6 +411,7 @@ class Page {
|
|||||||
handler,
|
handler,
|
||||||
task,
|
task,
|
||||||
normalizeWhitespace,
|
normalizeWhitespace,
|
||||||
|
includeMarkedContent,
|
||||||
sink,
|
sink,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
}) {
|
}) {
|
||||||
@ -437,12 +443,22 @@ class Page {
|
|||||||
task,
|
task,
|
||||||
resources: this.resources,
|
resources: this.resources,
|
||||||
normalizeWhitespace,
|
normalizeWhitespace,
|
||||||
|
includeMarkedContent,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
sink,
|
sink,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getStructTree() {
|
||||||
|
const structTreeRoot = await this.pdfManager.ensureCatalog(
|
||||||
|
"structTreeRoot"
|
||||||
|
);
|
||||||
|
const tree = new StructTreePage(structTreeRoot, this.pageDict);
|
||||||
|
tree.parse();
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
getAnnotationsData(intent) {
|
getAnnotationsData(intent) {
|
||||||
return this._parsedAnnotations.then(function (annotations) {
|
return this._parsedAnnotations.then(function (annotations) {
|
||||||
const annotationsData = [];
|
const annotationsData = [];
|
||||||
@ -604,6 +620,10 @@ class PDFDocument {
|
|||||||
static createObjId() {
|
static createObjId() {
|
||||||
unreachable("Abstract method `createObjId` called.");
|
unreachable("Abstract method `createObjId` called.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static getPageObjId() {
|
||||||
|
unreachable("Abstract method `getPageObjId` called.");
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1913,7 +1913,10 @@ class PartialEvaluator {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Other marked content types aren't supported yet.
|
// Other marked content types aren't supported yet.
|
||||||
args = [args[0].name];
|
args = [
|
||||||
|
args[0].name,
|
||||||
|
args[1] instanceof Dict ? args[1].get("MCID") : null,
|
||||||
|
];
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case OPS.beginMarkedContent:
|
case OPS.beginMarkedContent:
|
||||||
@ -1973,6 +1976,7 @@ class PartialEvaluator {
|
|||||||
stateManager = null,
|
stateManager = null,
|
||||||
normalizeWhitespace = false,
|
normalizeWhitespace = false,
|
||||||
combineTextItems = false,
|
combineTextItems = false,
|
||||||
|
includeMarkedContent = false,
|
||||||
sink,
|
sink,
|
||||||
seenStyles = new Set(),
|
seenStyles = new Set(),
|
||||||
}) {
|
}) {
|
||||||
@ -2573,6 +2577,7 @@ class PartialEvaluator {
|
|||||||
stateManager: xObjStateManager,
|
stateManager: xObjStateManager,
|
||||||
normalizeWhitespace,
|
normalizeWhitespace,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
|
includeMarkedContent,
|
||||||
sink: sinkWrapper,
|
sink: sinkWrapper,
|
||||||
seenStyles,
|
seenStyles,
|
||||||
})
|
})
|
||||||
@ -2650,6 +2655,38 @@ class PartialEvaluator {
|
|||||||
})
|
})
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
|
case OPS.beginMarkedContent:
|
||||||
|
if (includeMarkedContent) {
|
||||||
|
textContent.items.push({
|
||||||
|
type: "beginMarkedContent",
|
||||||
|
tag: isName(args[0]) ? args[0].name : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPS.beginMarkedContentProps:
|
||||||
|
if (includeMarkedContent) {
|
||||||
|
flushTextContentItem();
|
||||||
|
let mcid = null;
|
||||||
|
if (isDict(args[1])) {
|
||||||
|
mcid = args[1].get("MCID");
|
||||||
|
}
|
||||||
|
textContent.items.push({
|
||||||
|
type: "beginMarkedContentProps",
|
||||||
|
id: Number.isInteger(mcid)
|
||||||
|
? `${self.idFactory.getPageObjId()}_mcid${mcid}`
|
||||||
|
: null,
|
||||||
|
tag: isName(args[0]) ? args[0].name : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPS.endMarkedContent:
|
||||||
|
if (includeMarkedContent) {
|
||||||
|
flushTextContentItem();
|
||||||
|
textContent.items.push({
|
||||||
|
type: "endMarkedContent",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
if (textContent.items.length >= sink.desiredSize) {
|
if (textContent.items.length >= sink.desiredSize) {
|
||||||
// Wait for ready, if we reach highWaterMark.
|
// Wait for ready, if we reach highWaterMark.
|
||||||
|
@ -60,6 +60,7 @@ import { CipherTransformFactory } from "./crypto.js";
|
|||||||
import { ColorSpace } from "./colorspace.js";
|
import { ColorSpace } from "./colorspace.js";
|
||||||
import { GlobalImageCache } from "./image_utils.js";
|
import { GlobalImageCache } from "./image_utils.js";
|
||||||
import { MetadataParser } from "./metadata_parser.js";
|
import { MetadataParser } from "./metadata_parser.js";
|
||||||
|
import { StructTreeRoot } from "./struct_tree.js";
|
||||||
|
|
||||||
function fetchDestination(dest) {
|
function fetchDestination(dest) {
|
||||||
return isDict(dest) ? dest.get("D") : dest;
|
return isDict(dest) ? dest.get("D") : dest;
|
||||||
@ -200,6 +201,32 @@ class Catalog {
|
|||||||
return markInfo;
|
return markInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get structTreeRoot() {
|
||||||
|
let structTree = null;
|
||||||
|
try {
|
||||||
|
structTree = this._readStructTreeRoot();
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
warn("Unable read to structTreeRoot info.");
|
||||||
|
}
|
||||||
|
return shadow(this, "structTreeRoot", structTree);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
_readStructTreeRoot() {
|
||||||
|
const obj = this._catDict.get("StructTreeRoot");
|
||||||
|
if (!isDict(obj)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const root = new StructTreeRoot(obj);
|
||||||
|
root.init();
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
|
||||||
get toplevelPagesDict() {
|
get toplevelPagesDict() {
|
||||||
const pagesObj = this._catDict.get("Pages");
|
const pagesObj = this._catDict.get("Pages");
|
||||||
if (!isDict(pagesObj)) {
|
if (!isDict(pagesObj)) {
|
||||||
@ -2626,4 +2653,4 @@ const ObjectLoader = (function () {
|
|||||||
return ObjectLoader;
|
return ObjectLoader;
|
||||||
})();
|
})();
|
||||||
|
|
||||||
export { Catalog, FileSpec, ObjectLoader, XRef };
|
export { Catalog, FileSpec, NumberTree, ObjectLoader, XRef };
|
||||||
|
335
src/core/struct_tree.js
Normal file
335
src/core/struct_tree.js
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
/* Copyright 2021 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { isDict, isName, isRef } from "./primitives.js";
|
||||||
|
import { isString, stringToPDFString, warn } from "../shared/util.js";
|
||||||
|
import { NumberTree } from "./obj.js";
|
||||||
|
|
||||||
|
const MAX_DEPTH = 40;
|
||||||
|
|
||||||
|
const StructElementType = {
|
||||||
|
PAGE_CONTENT: "PAGE_CONTENT",
|
||||||
|
STREAM_CONTENT: "STREAM_CONTENT",
|
||||||
|
OBJECT: "OBJECT",
|
||||||
|
ELEMENT: "ELEMENT",
|
||||||
|
};
|
||||||
|
|
||||||
|
class StructTreeRoot {
|
||||||
|
constructor(rootDict) {
|
||||||
|
this.dict = rootDict;
|
||||||
|
this.roleMap = new Map();
|
||||||
|
}
|
||||||
|
|
||||||
|
init() {
|
||||||
|
this.readRoleMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
readRoleMap() {
|
||||||
|
const roleMapDict = this.dict.get("RoleMap");
|
||||||
|
if (!isDict(roleMapDict)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
roleMapDict.forEach((key, value) => {
|
||||||
|
if (!isName(value)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.roleMap.set(key, value.name);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instead of loading the whole tree we load just the page's relevant structure
|
||||||
|
* elements, which means we need a wrapper structure to represent the tree.
|
||||||
|
*/
|
||||||
|
class StructElementNode {
|
||||||
|
constructor(tree, dict) {
|
||||||
|
this.tree = tree;
|
||||||
|
this.dict = dict;
|
||||||
|
this.kids = [];
|
||||||
|
this.parseKids();
|
||||||
|
}
|
||||||
|
|
||||||
|
get role() {
|
||||||
|
const nameObj = this.dict.get("S");
|
||||||
|
const name = isName(nameObj) ? nameObj.name : "";
|
||||||
|
const { root } = this.tree;
|
||||||
|
if (root.roleMap.has(name)) {
|
||||||
|
return root.roleMap.get(name);
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
parseKids() {
|
||||||
|
let pageObjId = null;
|
||||||
|
const objRef = this.dict.getRaw("Pg");
|
||||||
|
if (isRef(objRef)) {
|
||||||
|
pageObjId = objRef.toString();
|
||||||
|
}
|
||||||
|
const kids = this.dict.get("K");
|
||||||
|
if (Array.isArray(kids)) {
|
||||||
|
for (const kid of kids) {
|
||||||
|
const element = this.parseKid(pageObjId, kid);
|
||||||
|
if (element) {
|
||||||
|
this.kids.push(element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const element = this.parseKid(pageObjId, kids);
|
||||||
|
if (element) {
|
||||||
|
this.kids.push(element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parseKid(pageObjId, kid) {
|
||||||
|
// A direct link to content, the integer is an mcid.
|
||||||
|
if (Number.isInteger(kid)) {
|
||||||
|
if (this.tree.pageDict.objId !== pageObjId) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new StructElement({
|
||||||
|
type: StructElementType.PAGE_CONTENT,
|
||||||
|
mcid: kid,
|
||||||
|
pageObjId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the dictionary for the kid.
|
||||||
|
let kidDict = null;
|
||||||
|
if (isRef(kid)) {
|
||||||
|
kidDict = this.dict.xref.fetch(kid);
|
||||||
|
} else if (isDict(kid)) {
|
||||||
|
kidDict = kid;
|
||||||
|
}
|
||||||
|
if (!kidDict) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const pageRef = kidDict.getRaw("Pg");
|
||||||
|
if (isRef(pageRef)) {
|
||||||
|
pageObjId = pageRef.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
const type = isName(kidDict.get("Type")) ? kidDict.get("Type").name : null;
|
||||||
|
if (type === "MCR") {
|
||||||
|
if (this.tree.pageDict.objId !== pageObjId) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new StructElement({
|
||||||
|
type: StructElementType.STREAM_CONTENT,
|
||||||
|
refObjId: isRef(kidDict.getRaw("Stm"))
|
||||||
|
? kidDict.getRaw("Stm").toString()
|
||||||
|
: null,
|
||||||
|
pageObjId,
|
||||||
|
mcid: kidDict.get("MCID"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === "OBJR") {
|
||||||
|
if (this.tree.pageDict.objId !== pageObjId) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new StructElement({
|
||||||
|
type: StructElementType.OBJECT,
|
||||||
|
refObjId: isRef(kidDict.getRaw("Obj"))
|
||||||
|
? kidDict.getRaw("Obj").toString()
|
||||||
|
: null,
|
||||||
|
pageObjId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return new StructElement({
|
||||||
|
type: StructElementType.ELEMENT,
|
||||||
|
dict: kidDict,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class StructElement {
|
||||||
|
constructor({
|
||||||
|
type,
|
||||||
|
dict = null,
|
||||||
|
mcid = null,
|
||||||
|
pageObjId = null,
|
||||||
|
refObjId = null,
|
||||||
|
}) {
|
||||||
|
this.type = type;
|
||||||
|
this.dict = dict;
|
||||||
|
this.mcid = mcid;
|
||||||
|
this.pageObjId = pageObjId;
|
||||||
|
this.refObjId = refObjId;
|
||||||
|
this.parentNode = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class StructTreePage {
|
||||||
|
constructor(structTreeRoot, pageDict) {
|
||||||
|
this.root = structTreeRoot;
|
||||||
|
this.rootDict = structTreeRoot ? structTreeRoot.dict : null;
|
||||||
|
this.pageDict = pageDict;
|
||||||
|
this.nodes = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
parse() {
|
||||||
|
if (!this.root || !this.rootDict) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parentTree = this.rootDict.get("ParentTree");
|
||||||
|
if (!parentTree) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const id = this.pageDict.get("StructParents");
|
||||||
|
if (!Number.isInteger(id)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const numberTree = new NumberTree(parentTree, this.rootDict.xref);
|
||||||
|
const parentArray = numberTree.get(id);
|
||||||
|
if (!Array.isArray(parentArray)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const map = new Map();
|
||||||
|
for (const ref of parentArray) {
|
||||||
|
if (isRef(ref)) {
|
||||||
|
this.addNode(this.rootDict.xref.fetch(ref), map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addNode(dict, map, level = 0) {
|
||||||
|
if (level > MAX_DEPTH) {
|
||||||
|
warn("StructTree MAX_DEPTH reached.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (map.has(dict)) {
|
||||||
|
return map.get(dict);
|
||||||
|
}
|
||||||
|
|
||||||
|
const element = new StructElementNode(this, dict);
|
||||||
|
map.set(dict, element);
|
||||||
|
|
||||||
|
const parent = dict.get("P");
|
||||||
|
|
||||||
|
if (!parent || isName(parent.get("Type"), "StructTreeRoot")) {
|
||||||
|
if (!this.addTopLevelNode(dict, element)) {
|
||||||
|
map.delete(dict);
|
||||||
|
}
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parentNode = this.addNode(parent, map, level + 1);
|
||||||
|
if (!parentNode) {
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
let save = false;
|
||||||
|
for (const kid of parentNode.kids) {
|
||||||
|
if (kid.type === StructElementType.ELEMENT && kid.dict === dict) {
|
||||||
|
kid.parentNode = element;
|
||||||
|
save = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!save) {
|
||||||
|
map.delete(dict);
|
||||||
|
}
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
|
||||||
|
addTopLevelNode(dict, element) {
|
||||||
|
const obj = this.rootDict.get("K");
|
||||||
|
if (!obj) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isDict(obj)) {
|
||||||
|
if (obj.objId !== dict.objId) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
this.nodes[0] = element;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(obj)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let save = false;
|
||||||
|
for (let i = 0; i < obj.length; i++) {
|
||||||
|
const kidRef = obj[i];
|
||||||
|
if (kidRef && kidRef.toString() === dict.objId) {
|
||||||
|
this.nodes[i] = element;
|
||||||
|
save = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return save;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the tree structure into a simplifed object literal that can
|
||||||
|
* be sent to the main thread.
|
||||||
|
* @returns {Object}
|
||||||
|
*/
|
||||||
|
get serializable() {
|
||||||
|
function nodeToSerializable(node, parent, level = 0) {
|
||||||
|
if (level > MAX_DEPTH) {
|
||||||
|
warn("StructTree too deep to be fully serialized.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const obj = Object.create(null);
|
||||||
|
obj.role = node.role;
|
||||||
|
obj.children = [];
|
||||||
|
parent.children.push(obj);
|
||||||
|
const alt = node.dict.get("Alt");
|
||||||
|
if (isString(alt)) {
|
||||||
|
obj.alt = stringToPDFString(alt);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const kid of node.kids) {
|
||||||
|
const kidElement =
|
||||||
|
kid.type === StructElementType.ELEMENT ? kid.parentNode : null;
|
||||||
|
if (kidElement) {
|
||||||
|
nodeToSerializable(kidElement, obj, level + 1);
|
||||||
|
continue;
|
||||||
|
} else if (
|
||||||
|
kid.type === StructElementType.PAGE_CONTENT ||
|
||||||
|
kid.type === StructElementType.STREAM_CONTENT
|
||||||
|
) {
|
||||||
|
obj.children.push({
|
||||||
|
type: "content",
|
||||||
|
id: `page${kid.pageObjId}_mcid${kid.mcid}`,
|
||||||
|
});
|
||||||
|
} else if (kid.type === StructElementType.OBJECT) {
|
||||||
|
obj.children.push({
|
||||||
|
type: "object",
|
||||||
|
id: kid.refObjId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const root = Object.create(null);
|
||||||
|
root.children = [];
|
||||||
|
root.role = "Root";
|
||||||
|
for (const child of this.nodes) {
|
||||||
|
if (!child) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nodeToSerializable(child, root);
|
||||||
|
}
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { StructTreePage, StructTreeRoot };
|
@ -717,6 +717,7 @@ class WorkerMessageHandler {
|
|||||||
task,
|
task,
|
||||||
sink,
|
sink,
|
||||||
normalizeWhitespace: data.normalizeWhitespace,
|
normalizeWhitespace: data.normalizeWhitespace,
|
||||||
|
includeMarkedContent: data.includeMarkedContent,
|
||||||
combineTextItems: data.combineTextItems,
|
combineTextItems: data.combineTextItems,
|
||||||
})
|
})
|
||||||
.then(
|
.then(
|
||||||
@ -745,6 +746,18 @@ class WorkerMessageHandler {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
handler.on("GetStructTree", function wphGetStructTree(data) {
|
||||||
|
const pageIndex = data.pageIndex;
|
||||||
|
return pdfManager
|
||||||
|
.getPage(pageIndex)
|
||||||
|
.then(function (page) {
|
||||||
|
return pdfManager.ensure(page, "getStructTree");
|
||||||
|
})
|
||||||
|
.then(function (structTree) {
|
||||||
|
return structTree.serializable;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
handler.on("FontFallback", function (data) {
|
handler.on("FontFallback", function (data) {
|
||||||
return pdfManager.fontFallback(data.id, handler);
|
return pdfManager.fontFallback(data.id, handler);
|
||||||
});
|
});
|
||||||
|
@ -1026,13 +1026,17 @@ class PDFDocumentProxy {
|
|||||||
* whitespace with standard spaces (0x20). The default value is `false`.
|
* whitespace with standard spaces (0x20). The default value is `false`.
|
||||||
* @property {boolean} disableCombineTextItems - Do not attempt to combine
|
* @property {boolean} disableCombineTextItems - Do not attempt to combine
|
||||||
* same line {@link TextItem}'s. The default value is `false`.
|
* same line {@link TextItem}'s. The default value is `false`.
|
||||||
|
* @property {boolean} [includeMarkedContent] - When true include marked
|
||||||
|
* content items in the items array of TextContent. The default is `false`.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Page text content.
|
* Page text content.
|
||||||
*
|
*
|
||||||
* @typedef {Object} TextContent
|
* @typedef {Object} TextContent
|
||||||
* @property {Array<TextItem>} items - Array of {@link TextItem} objects.
|
* @property {Array<TextItem | TextMarkedContent>} items - Array of
|
||||||
|
* {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent
|
||||||
|
* items are included when includeMarkedContent is true.
|
||||||
* @property {Object<string, TextStyle>} styles - {@link TextStyle} objects,
|
* @property {Object<string, TextStyle>} styles - {@link TextStyle} objects,
|
||||||
* indexed by font name.
|
* indexed by font name.
|
||||||
*/
|
*/
|
||||||
@ -1047,6 +1051,17 @@ class PDFDocumentProxy {
|
|||||||
* @property {number} width - Width in device space.
|
* @property {number} width - Width in device space.
|
||||||
* @property {number} height - Height in device space.
|
* @property {number} height - Height in device space.
|
||||||
* @property {string} fontName - Font name used by PDF.js for converted font.
|
* @property {string} fontName - Font name used by PDF.js for converted font.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Page text marked content part.
|
||||||
|
*
|
||||||
|
* @typedef {Object} TextMarkedContent
|
||||||
|
* @property {string} type - Either 'beginMarkedContent',
|
||||||
|
* 'beginMarkedContentProps', or 'endMarkedContent'.
|
||||||
|
* @property {string} id - The marked content identifier. Only used for type
|
||||||
|
* 'beginMarkedContentProps'.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1103,6 +1118,25 @@ class PDFDocumentProxy {
|
|||||||
* states set.
|
* states set.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure tree node. The root node will have a role "Root".
|
||||||
|
*
|
||||||
|
* @typedef {Object} StructTreeNode
|
||||||
|
* @property {Array<StructTreeNode | StructTreeContent>} children - Array of
|
||||||
|
* {@link StructTreeNode} and {@link StructTreeContent} objects.
|
||||||
|
* @property {string} role - element's role, already mapped if a role map exists
|
||||||
|
* in the PDF.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure tree content.
|
||||||
|
*
|
||||||
|
* @typedef {Object} StructTreeContent
|
||||||
|
* @property {string} type - either "content" for page and stream structure
|
||||||
|
* elements or "object" for object references.
|
||||||
|
* @property {string} id - unique id that will map to the text layer.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PDF page operator list.
|
* PDF page operator list.
|
||||||
*
|
*
|
||||||
@ -1435,6 +1469,7 @@ class PDFPageProxy {
|
|||||||
streamTextContent({
|
streamTextContent({
|
||||||
normalizeWhitespace = false,
|
normalizeWhitespace = false,
|
||||||
disableCombineTextItems = false,
|
disableCombineTextItems = false,
|
||||||
|
includeMarkedContent = false,
|
||||||
} = {}) {
|
} = {}) {
|
||||||
const TEXT_CONTENT_CHUNK_SIZE = 100;
|
const TEXT_CONTENT_CHUNK_SIZE = 100;
|
||||||
|
|
||||||
@ -1444,6 +1479,7 @@ class PDFPageProxy {
|
|||||||
pageIndex: this._pageIndex,
|
pageIndex: this._pageIndex,
|
||||||
normalizeWhitespace: normalizeWhitespace === true,
|
normalizeWhitespace: normalizeWhitespace === true,
|
||||||
combineTextItems: disableCombineTextItems !== true,
|
combineTextItems: disableCombineTextItems !== true,
|
||||||
|
includeMarkedContent: includeMarkedContent === true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
|
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
|
||||||
@ -1484,6 +1520,16 @@ class PDFPageProxy {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @returns {Promise<StructTreeNode>} A promise that is resolved with a
|
||||||
|
* {@link StructTreeNode} object that represents the page's structure tree.
|
||||||
|
*/
|
||||||
|
getStructTree() {
|
||||||
|
return (this._structTreePromise ||= this._transport.getStructTree(
|
||||||
|
this._pageIndex
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destroys the page object.
|
* Destroys the page object.
|
||||||
* @private
|
* @private
|
||||||
@ -1513,6 +1559,7 @@ class PDFPageProxy {
|
|||||||
this._annotationsPromise = null;
|
this._annotationsPromise = null;
|
||||||
this._jsActionsPromise = null;
|
this._jsActionsPromise = null;
|
||||||
this._xfaPromise = null;
|
this._xfaPromise = null;
|
||||||
|
this._structTreePromise = null;
|
||||||
this.pendingCleanup = false;
|
this.pendingCleanup = false;
|
||||||
return Promise.all(waitOn);
|
return Promise.all(waitOn);
|
||||||
}
|
}
|
||||||
@ -1548,6 +1595,7 @@ class PDFPageProxy {
|
|||||||
this._annotationsPromise = null;
|
this._annotationsPromise = null;
|
||||||
this._jsActionsPromise = null;
|
this._jsActionsPromise = null;
|
||||||
this._xfaPromise = null;
|
this._xfaPromise = null;
|
||||||
|
this._structTreePromise = null;
|
||||||
if (resetStats && this._stats) {
|
if (resetStats && this._stats) {
|
||||||
this._stats = new StatTimer();
|
this._stats = new StatTimer();
|
||||||
}
|
}
|
||||||
@ -2773,6 +2821,12 @@ class WorkerTransport {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getStructTree(pageIndex) {
|
||||||
|
return this.messageHandler.sendWithPromise("GetStructTree", {
|
||||||
|
pageIndex,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
getOutline() {
|
getOutline() {
|
||||||
return this.messageHandler.sendWithPromise("GetOutline", null);
|
return this.messageHandler.sendWithPromise("GetOutline", null);
|
||||||
}
|
}
|
||||||
|
@ -638,6 +638,23 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
|
|
||||||
_processItems(items, styleCache) {
|
_processItems(items, styleCache) {
|
||||||
for (let i = 0, len = items.length; i < len; i++) {
|
for (let i = 0, len = items.length; i < len; i++) {
|
||||||
|
if (items[i].str === undefined) {
|
||||||
|
if (
|
||||||
|
items[i].type === "beginMarkedContentProps" ||
|
||||||
|
items[i].type === "beginMarkedContent"
|
||||||
|
) {
|
||||||
|
const parent = this._container;
|
||||||
|
this._container = document.createElement("span");
|
||||||
|
this._container.classList.add("markedContent");
|
||||||
|
if (items[i].id !== null) {
|
||||||
|
this._container.setAttribute("id", `${items[i].id}`);
|
||||||
|
}
|
||||||
|
parent.appendChild(this._container);
|
||||||
|
} else if (items[i].type === "endMarkedContent") {
|
||||||
|
this._container = this._container.parentNode;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
this._textContentItemsStr.push(items[i].str);
|
this._textContentItemsStr.push(items[i].str);
|
||||||
appendText(this, items[i], styleCache, this._layoutTextCtx);
|
appendText(this, items[i], styleCache, this._layoutTextCtx);
|
||||||
}
|
}
|
||||||
|
@ -572,6 +572,7 @@ var Driver = (function DriverClosure() {
|
|||||||
initPromise = page
|
initPromise = page
|
||||||
.getTextContent({
|
.getTextContent({
|
||||||
normalizeWhitespace: true,
|
normalizeWhitespace: true,
|
||||||
|
includeMarkedContent: true,
|
||||||
})
|
})
|
||||||
.then(function (textContent) {
|
.then(function (textContent) {
|
||||||
return rasterizeTextLayer(
|
return rasterizeTextLayer(
|
||||||
|
@ -24,7 +24,11 @@ async function runTests(results) {
|
|||||||
jasmine.loadConfig({
|
jasmine.loadConfig({
|
||||||
random: false,
|
random: false,
|
||||||
spec_dir: "integration",
|
spec_dir: "integration",
|
||||||
spec_files: ["scripting_spec.js", "annotation_spec.js"],
|
spec_files: [
|
||||||
|
"scripting_spec.js",
|
||||||
|
"annotation_spec.js",
|
||||||
|
"accessibility_spec.js",
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
jasmine.addReporter({
|
jasmine.addReporter({
|
||||||
|
69
test/integration/accessibility_spec.js
Normal file
69
test/integration/accessibility_spec.js
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
/* Copyright 2021 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const { closePages, loadAndWait } = require("./test_utils.js");
|
||||||
|
|
||||||
|
describe("accessibility", () => {
|
||||||
|
describe("structure tree", () => {
|
||||||
|
let pages;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
pages = await loadAndWait("structure_simple.pdf", ".structTree");
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await closePages(pages);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("must build structure that maps to text layer", async () => {
|
||||||
|
await Promise.all(
|
||||||
|
pages.map(async ([browserName, page]) => {
|
||||||
|
await page.waitForSelector(".structTree");
|
||||||
|
|
||||||
|
// Check the headings match up.
|
||||||
|
const head1 = await page.$eval(
|
||||||
|
".structTree [role='heading'][aria-level='1'] span",
|
||||||
|
el =>
|
||||||
|
document.getElementById(el.getAttribute("aria-owns")).textContent
|
||||||
|
);
|
||||||
|
expect(head1).withContext(`In ${browserName}`).toEqual("Heading 1");
|
||||||
|
const head2 = await page.$eval(
|
||||||
|
".structTree [role='heading'][aria-level='2'] span",
|
||||||
|
el =>
|
||||||
|
document.getElementById(el.getAttribute("aria-owns")).textContent
|
||||||
|
);
|
||||||
|
expect(head2).withContext(`In ${browserName}`).toEqual("Heading 2");
|
||||||
|
|
||||||
|
// Check the order of the content.
|
||||||
|
const texts = await page.$$eval(".structTree [aria-owns]", nodes =>
|
||||||
|
nodes.map(
|
||||||
|
el =>
|
||||||
|
document.getElementById(el.getAttribute("aria-owns"))
|
||||||
|
.textContent
|
||||||
|
)
|
||||||
|
);
|
||||||
|
expect(texts)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual([
|
||||||
|
"Heading 1",
|
||||||
|
"This paragraph 1.",
|
||||||
|
"Heading 2",
|
||||||
|
"This paragraph 2.",
|
||||||
|
]);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -71,6 +71,7 @@
|
|||||||
!issue8570.pdf
|
!issue8570.pdf
|
||||||
!issue8697.pdf
|
!issue8697.pdf
|
||||||
!issue8702.pdf
|
!issue8702.pdf
|
||||||
|
!structure_simple.pdf
|
||||||
!issue12823.pdf
|
!issue12823.pdf
|
||||||
!issue8707.pdf
|
!issue8707.pdf
|
||||||
!issue8798r.pdf
|
!issue8798r.pdf
|
||||||
|
BIN
test/pdfs/structure_simple.pdf
Normal file
BIN
test/pdfs/structure_simple.pdf
Normal file
Binary file not shown.
@ -23,7 +23,7 @@
|
|||||||
bottom: 0;
|
bottom: 0;
|
||||||
line-height: 1;
|
line-height: 1;
|
||||||
}
|
}
|
||||||
.textLayer > span {
|
.textLayer span {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
white-space: pre;
|
white-space: pre;
|
||||||
-webkit-transform-origin: 0% 0%;
|
-webkit-transform-origin: 0% 0%;
|
||||||
@ -37,3 +37,8 @@
|
|||||||
-moz-box-sizing: border-box;
|
-moz-box-sizing: border-box;
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.textLayer .markedContent {
|
||||||
|
border: none;
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
"pdf_history_spec.js",
|
"pdf_history_spec.js",
|
||||||
"primitives_spec.js",
|
"primitives_spec.js",
|
||||||
"stream_spec.js",
|
"stream_spec.js",
|
||||||
|
"struct_tree_spec.js",
|
||||||
"type1_parser_spec.js",
|
"type1_parser_spec.js",
|
||||||
"ui_utils_spec.js",
|
"ui_utils_spec.js",
|
||||||
"unicode_spec.js",
|
"unicode_spec.js",
|
||||||
|
@ -80,6 +80,7 @@ async function initializePDFJS(callback) {
|
|||||||
"pdfjs-test/unit/primitives_spec.js",
|
"pdfjs-test/unit/primitives_spec.js",
|
||||||
"pdfjs-test/unit/scripting_spec.js",
|
"pdfjs-test/unit/scripting_spec.js",
|
||||||
"pdfjs-test/unit/stream_spec.js",
|
"pdfjs-test/unit/stream_spec.js",
|
||||||
|
"pdfjs-test/unit/struct_tree_spec.js",
|
||||||
"pdfjs-test/unit/type1_parser_spec.js",
|
"pdfjs-test/unit/type1_parser_spec.js",
|
||||||
"pdfjs-test/unit/ui_utils_spec.js",
|
"pdfjs-test/unit/ui_utils_spec.js",
|
||||||
"pdfjs-test/unit/unicode_spec.js",
|
"pdfjs-test/unit/unicode_spec.js",
|
||||||
|
108
test/unit/struct_tree_spec.js
Normal file
108
test/unit/struct_tree_spec.js
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/* Copyright 2021 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { buildGetDocumentParams } from "./test_utils.js";
|
||||||
|
import { getDocument } from "../../src/display/api.js";
|
||||||
|
|
||||||
|
function equalTrees(rootA, rootB) {
|
||||||
|
function walk(a, b) {
|
||||||
|
expect(a.role).toEqual(b.role);
|
||||||
|
expect(a.type).toEqual(b.type);
|
||||||
|
expect("children" in a).toEqual("children" in b);
|
||||||
|
if (!a.children) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
expect(a.children.length).toEqual(b.children.length);
|
||||||
|
for (let i = 0; i < rootA.children.length; i++) {
|
||||||
|
walk(a.children[i], b.children[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return walk(rootA, rootB);
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("struct tree", function () {
|
||||||
|
describe("getStructTree", function () {
|
||||||
|
it("parses basic structure", async function () {
|
||||||
|
const filename = "structure_simple.pdf";
|
||||||
|
const params = buildGetDocumentParams(filename);
|
||||||
|
const loadingTask = getDocument(params);
|
||||||
|
const doc = await loadingTask.promise;
|
||||||
|
const page = await doc.getPage(1);
|
||||||
|
const struct = await page.getStructTree();
|
||||||
|
equalTrees(
|
||||||
|
{
|
||||||
|
role: "Root",
|
||||||
|
children: [
|
||||||
|
{
|
||||||
|
role: "Document",
|
||||||
|
children: [
|
||||||
|
{
|
||||||
|
role: "H1",
|
||||||
|
children: [
|
||||||
|
{ role: "NonStruct", children: [{ type: "content" }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "P",
|
||||||
|
children: [
|
||||||
|
{ role: "NonStruct", children: [{ type: "content" }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "H2",
|
||||||
|
children: [
|
||||||
|
{ role: "NonStruct", children: [{ type: "content" }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "P",
|
||||||
|
children: [
|
||||||
|
{ role: "NonStruct", children: [{ type: "content" }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
struct
|
||||||
|
);
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("parses structure with marked content reference", async function () {
|
||||||
|
const filename = "issue6782.pdf";
|
||||||
|
const params = buildGetDocumentParams(filename);
|
||||||
|
const loadingTask = getDocument(params);
|
||||||
|
const doc = await loadingTask.promise;
|
||||||
|
const page = await doc.getPage(1);
|
||||||
|
const struct = await page.getStructTree();
|
||||||
|
equalTrees(
|
||||||
|
{
|
||||||
|
role: "Root",
|
||||||
|
children: [
|
||||||
|
{
|
||||||
|
role: "Part",
|
||||||
|
children: [
|
||||||
|
{ role: "P", children: Array(27).fill({ type: "content" }) },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
struct
|
||||||
|
);
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
@ -41,6 +41,7 @@ import { AnnotationLayerBuilder } from "./annotation_layer_builder.js";
|
|||||||
import { NullL10n } from "./l10n_utils.js";
|
import { NullL10n } from "./l10n_utils.js";
|
||||||
import { PDFPageView } from "./pdf_page_view.js";
|
import { PDFPageView } from "./pdf_page_view.js";
|
||||||
import { SimpleLinkService } from "./pdf_link_service.js";
|
import { SimpleLinkService } from "./pdf_link_service.js";
|
||||||
|
import { StructTreeLayerBuilder } from "./struct_tree_layer_builder.js";
|
||||||
import { TextLayerBuilder } from "./text_layer_builder.js";
|
import { TextLayerBuilder } from "./text_layer_builder.js";
|
||||||
import { XfaLayerBuilder } from "./xfa_layer_builder.js";
|
import { XfaLayerBuilder } from "./xfa_layer_builder.js";
|
||||||
|
|
||||||
@ -545,6 +546,7 @@ class BaseViewer {
|
|||||||
textLayerMode: this.textLayerMode,
|
textLayerMode: this.textLayerMode,
|
||||||
annotationLayerFactory: this,
|
annotationLayerFactory: this,
|
||||||
xfaLayerFactory,
|
xfaLayerFactory,
|
||||||
|
structTreeLayerFactory: this,
|
||||||
imageResourcesPath: this.imageResourcesPath,
|
imageResourcesPath: this.imageResourcesPath,
|
||||||
renderInteractiveForms: this.renderInteractiveForms,
|
renderInteractiveForms: this.renderInteractiveForms,
|
||||||
renderer: this.renderer,
|
renderer: this.renderer,
|
||||||
@ -1328,6 +1330,16 @@ class BaseViewer {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {PDFPage} pdfPage
|
||||||
|
* @returns {StructTreeLayerBuilder}
|
||||||
|
*/
|
||||||
|
createStructTreeLayerBuilder(pdfPage) {
|
||||||
|
return new StructTreeLayerBuilder({
|
||||||
|
pdfPage,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @type {boolean} Whether all pages of the PDF document have identical
|
* @type {boolean} Whether all pages of the PDF document have identical
|
||||||
* widths and heights.
|
* widths and heights.
|
||||||
|
@ -216,6 +216,17 @@ class IPDFXfaLayerFactory {
|
|||||||
createXfaLayerBuilder(pageDiv, pdfPage) {}
|
createXfaLayerBuilder(pageDiv, pdfPage) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @interface
|
||||||
|
*/
|
||||||
|
class IPDFStructTreeLayerFactory {
|
||||||
|
/**
|
||||||
|
* @param {PDFPage} pdfPage
|
||||||
|
* @returns {StructTreeLayerBuilder}
|
||||||
|
*/
|
||||||
|
createStructTreeLayerBuilder(pdfPage) {}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @interface
|
* @interface
|
||||||
*/
|
*/
|
||||||
@ -254,6 +265,7 @@ export {
|
|||||||
IPDFAnnotationLayerFactory,
|
IPDFAnnotationLayerFactory,
|
||||||
IPDFHistory,
|
IPDFHistory,
|
||||||
IPDFLinkService,
|
IPDFLinkService,
|
||||||
|
IPDFStructTreeLayerFactory,
|
||||||
IPDFTextLayerFactory,
|
IPDFTextLayerFactory,
|
||||||
IPDFXfaLayerFactory,
|
IPDFXfaLayerFactory,
|
||||||
IRenderableView,
|
IRenderableView,
|
||||||
|
@ -49,6 +49,7 @@ import { viewerCompatibilityParams } from "./viewer_compatibility.js";
|
|||||||
* The default value is `TextLayerMode.ENABLE`.
|
* The default value is `TextLayerMode.ENABLE`.
|
||||||
* @property {IPDFAnnotationLayerFactory} annotationLayerFactory
|
* @property {IPDFAnnotationLayerFactory} annotationLayerFactory
|
||||||
* @property {IPDFXfaLayerFactory} xfaLayerFactory
|
* @property {IPDFXfaLayerFactory} xfaLayerFactory
|
||||||
|
* @property {IPDFStructTreeLayerFactory} structTreeLayerFactory
|
||||||
* @property {string} [imageResourcesPath] - Path for image resources, mainly
|
* @property {string} [imageResourcesPath] - Path for image resources, mainly
|
||||||
* for annotation icons. Include trailing slash.
|
* for annotation icons. Include trailing slash.
|
||||||
* @property {boolean} renderInteractiveForms - Turns on rendering of
|
* @property {boolean} renderInteractiveForms - Turns on rendering of
|
||||||
@ -102,6 +103,7 @@ class PDFPageView {
|
|||||||
this.textLayerFactory = options.textLayerFactory;
|
this.textLayerFactory = options.textLayerFactory;
|
||||||
this.annotationLayerFactory = options.annotationLayerFactory;
|
this.annotationLayerFactory = options.annotationLayerFactory;
|
||||||
this.xfaLayerFactory = options.xfaLayerFactory;
|
this.xfaLayerFactory = options.xfaLayerFactory;
|
||||||
|
this.structTreeLayerFactory = options.structTreeLayerFactory;
|
||||||
this.renderer = options.renderer || RendererType.CANVAS;
|
this.renderer = options.renderer || RendererType.CANVAS;
|
||||||
this.enableWebGL = options.enableWebGL || false;
|
this.enableWebGL = options.enableWebGL || false;
|
||||||
this.l10n = options.l10n || NullL10n;
|
this.l10n = options.l10n || NullL10n;
|
||||||
@ -116,6 +118,7 @@ class PDFPageView {
|
|||||||
this.textLayer = null;
|
this.textLayer = null;
|
||||||
this.zoomLayer = null;
|
this.zoomLayer = null;
|
||||||
this.xfaLayer = null;
|
this.xfaLayer = null;
|
||||||
|
this.structTreeLayer = null;
|
||||||
|
|
||||||
const div = document.createElement("div");
|
const div = document.createElement("div");
|
||||||
div.className = "page";
|
div.className = "page";
|
||||||
@ -354,6 +357,10 @@ class PDFPageView {
|
|||||||
this.annotationLayer.cancel();
|
this.annotationLayer.cancel();
|
||||||
this.annotationLayer = null;
|
this.annotationLayer = null;
|
||||||
}
|
}
|
||||||
|
if (this._onTextLayerRendered) {
|
||||||
|
this.eventBus._off("textlayerrendered", this._onTextLayerRendered);
|
||||||
|
this._onTextLayerRendered = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cssTransform(target, redrawAnnotations = false) {
|
cssTransform(target, redrawAnnotations = false) {
|
||||||
@ -556,11 +563,12 @@ class PDFPageView {
|
|||||||
this.paintTask = paintTask;
|
this.paintTask = paintTask;
|
||||||
|
|
||||||
const resultPromise = paintTask.promise.then(
|
const resultPromise = paintTask.promise.then(
|
||||||
function () {
|
() => {
|
||||||
return finishPaintTask(null).then(function () {
|
return finishPaintTask(null).then(() => {
|
||||||
if (textLayer) {
|
if (textLayer) {
|
||||||
const readableStream = pdfPage.streamTextContent({
|
const readableStream = pdfPage.streamTextContent({
|
||||||
normalizeWhitespace: true,
|
normalizeWhitespace: true,
|
||||||
|
includeMarkedContent: true,
|
||||||
});
|
});
|
||||||
textLayer.setTextContentStream(readableStream);
|
textLayer.setTextContentStream(readableStream);
|
||||||
textLayer.render();
|
textLayer.render();
|
||||||
@ -599,6 +607,29 @@ class PDFPageView {
|
|||||||
this._renderXfaLayer();
|
this._renderXfaLayer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The structure tree is currently only supported when the text layer is
|
||||||
|
// enabled and a canvas is used for rendering.
|
||||||
|
if (this.structTreeLayerFactory && this.textLayer && this.canvas) {
|
||||||
|
// The structure tree must be generated after the text layer for the
|
||||||
|
// aria-owns to work.
|
||||||
|
this._onTextLayerRendered = event => {
|
||||||
|
if (event.pageNumber !== this.id) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.eventBus._off("textlayerrendered", this._onTextLayerRendered);
|
||||||
|
this._onTextLayerRendered = null;
|
||||||
|
this.pdfPage.getStructTree().then(tree => {
|
||||||
|
const treeDom = this.structTreeLayer.render(tree);
|
||||||
|
treeDom.classList.add("structTree");
|
||||||
|
this.canvas.appendChild(treeDom);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
this.eventBus._on("textlayerrendered", this._onTextLayerRendered);
|
||||||
|
this.structTreeLayer = this.structTreeLayerFactory.createStructTreeLayerBuilder(
|
||||||
|
pdfPage
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
div.setAttribute("data-loaded", true);
|
div.setAttribute("data-loaded", true);
|
||||||
|
|
||||||
this.eventBus.dispatch("pagerender", {
|
this.eventBus.dispatch("pagerender", {
|
||||||
|
149
web/struct_tree_layer_builder.js
Normal file
149
web/struct_tree_layer_builder.js
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
/* Copyright 2021 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const PDF_ROLE_TO_HTML_ROLE = {
|
||||||
|
// Document level structure types
|
||||||
|
Document: null, // There's a "document" role, but it doesn't make sense here.
|
||||||
|
DocumentFragment: null,
|
||||||
|
// Grouping level structure types
|
||||||
|
Part: "group",
|
||||||
|
Sect: "group", // XXX: There's a "section" role, but it's abstract.
|
||||||
|
Div: "group",
|
||||||
|
Aside: "note",
|
||||||
|
NonStruct: "none",
|
||||||
|
// Block level structure types
|
||||||
|
P: null,
|
||||||
|
// H<n>,
|
||||||
|
H: "heading",
|
||||||
|
Title: null,
|
||||||
|
FENote: "note",
|
||||||
|
// Sub-block level structure type
|
||||||
|
Sub: "group",
|
||||||
|
// General inline level structure types
|
||||||
|
Lbl: null,
|
||||||
|
Span: null,
|
||||||
|
Em: null,
|
||||||
|
Strong: null,
|
||||||
|
Link: "link",
|
||||||
|
Annot: "note",
|
||||||
|
Form: "form",
|
||||||
|
// Ruby and Warichu structure types
|
||||||
|
Ruby: null,
|
||||||
|
RB: null,
|
||||||
|
RT: null,
|
||||||
|
RP: null,
|
||||||
|
Warichu: null,
|
||||||
|
WT: null,
|
||||||
|
WP: null,
|
||||||
|
// List standard structure types
|
||||||
|
L: "list",
|
||||||
|
LI: "listitem",
|
||||||
|
LBody: null,
|
||||||
|
// Table standard structure types
|
||||||
|
Table: "table",
|
||||||
|
TR: "row",
|
||||||
|
TH: "columnheader",
|
||||||
|
TD: "cell",
|
||||||
|
THead: "columnheader",
|
||||||
|
TBody: null,
|
||||||
|
TFoot: null,
|
||||||
|
// Standard structure type Caption
|
||||||
|
Caption: null,
|
||||||
|
// Standard structure type Figure
|
||||||
|
Figure: "figure",
|
||||||
|
// Standard structure type Formula
|
||||||
|
Formula: null,
|
||||||
|
// standard structure type Artifact
|
||||||
|
Artifact: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const HEADING_PATTERN = /^H(\d+)$/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {Object} StructTreeLayerBuilderOptions
|
||||||
|
* @property {PDFPage} pdfPage
|
||||||
|
*/
|
||||||
|
|
||||||
|
class StructTreeLayerBuilder {
|
||||||
|
/**
|
||||||
|
* @param {StructTreeLayerBuilderOptions} options
|
||||||
|
*/
|
||||||
|
constructor({ pdfPage }) {
|
||||||
|
this.pdfPage = pdfPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
render(structTree) {
|
||||||
|
return this._walk(structTree);
|
||||||
|
}
|
||||||
|
|
||||||
|
_setAttributes(structElement, htmlElement) {
|
||||||
|
if (structElement.alt !== undefined) {
|
||||||
|
htmlElement.setAttribute("aria-label", structElement.alt);
|
||||||
|
}
|
||||||
|
if (structElement.id !== undefined) {
|
||||||
|
htmlElement.setAttribute("aria-owns", structElement.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_walk(node) {
|
||||||
|
if (!node) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const element = document.createElement("span");
|
||||||
|
if ("role" in node) {
|
||||||
|
const { role } = node;
|
||||||
|
const match = role.match(HEADING_PATTERN);
|
||||||
|
if (match) {
|
||||||
|
element.setAttribute("role", "heading");
|
||||||
|
element.setAttribute("aria-level", match[1]);
|
||||||
|
} else if (PDF_ROLE_TO_HTML_ROLE[role]) {
|
||||||
|
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this._setAttributes(node, element);
|
||||||
|
|
||||||
|
if (node.children) {
|
||||||
|
if (node.children.length === 1 && "id" in node.children[0]) {
|
||||||
|
// Often there is only one content node so just set the values on the
|
||||||
|
// parent node to avoid creating an extra span.
|
||||||
|
this._setAttributes(node.children[0], element);
|
||||||
|
} else {
|
||||||
|
for (const kid of node.children) {
|
||||||
|
element.appendChild(this._walk(kid));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @implements IPDFStructTreeLayerFactory
|
||||||
|
*/
|
||||||
|
class DefaultStructTreeLayerFactory {
|
||||||
|
/**
|
||||||
|
* @param {PDFPage} pdfPage
|
||||||
|
* @returns {StructTreeLayerBuilder}
|
||||||
|
*/
|
||||||
|
createStructTreeLayerBuilder(pdfPage) {
|
||||||
|
return new StructTreeLayerBuilder({
|
||||||
|
pdfPage,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { DefaultStructTreeLayerFactory, StructTreeLayerBuilder };
|
@ -24,7 +24,7 @@
|
|||||||
line-height: 1;
|
line-height: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
.textLayer > span {
|
.textLayer span {
|
||||||
color: transparent;
|
color: transparent;
|
||||||
position: absolute;
|
position: absolute;
|
||||||
white-space: pre;
|
white-space: pre;
|
||||||
|
@ -175,7 +175,7 @@ select {
|
|||||||
display: none !important;
|
display: none !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.pdfViewer.enablePermissions .textLayer > span {
|
.pdfViewer.enablePermissions .textLayer span {
|
||||||
user-select: none !important;
|
user-select: none !important;
|
||||||
cursor: not-allowed;
|
cursor: not-allowed;
|
||||||
}
|
}
|
||||||
@ -195,12 +195,12 @@ select {
|
|||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.pdfPresentationMode:fullscreen .textLayer > span {
|
.pdfPresentationMode:fullscreen .textLayer span {
|
||||||
cursor: none;
|
cursor: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.pdfPresentationMode.pdfPresentationModeControls > *,
|
.pdfPresentationMode.pdfPresentationModeControls > *,
|
||||||
.pdfPresentationMode.pdfPresentationModeControls .textLayer > span {
|
.pdfPresentationMode.pdfPresentationModeControls .textLayer span {
|
||||||
cursor: default;
|
cursor: default;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1653,19 +1653,19 @@ html[dir="rtl"] #documentPropertiesOverlay .row > * {
|
|||||||
mix-blend-mode: screen;
|
mix-blend-mode: screen;
|
||||||
}
|
}
|
||||||
|
|
||||||
#viewer.textLayer-visible .textLayer > span {
|
#viewer.textLayer-visible .textLayer span {
|
||||||
background-color: rgba(255, 255, 0, 0.1);
|
background-color: rgba(255, 255, 0, 0.1);
|
||||||
color: rgba(0, 0, 0, 1);
|
color: rgba(0, 0, 0, 1);
|
||||||
border: solid 1px rgba(255, 0, 0, 0.5);
|
border: solid 1px rgba(255, 0, 0, 0.5);
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
}
|
}
|
||||||
|
|
||||||
#viewer.textLayer-hover .textLayer > span:hover {
|
#viewer.textLayer-hover .textLayer span:hover {
|
||||||
background-color: rgba(255, 255, 255, 1);
|
background-color: rgba(255, 255, 255, 1);
|
||||||
color: rgba(0, 0, 0, 1);
|
color: rgba(0, 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#viewer.textLayer-shadow .textLayer > span {
|
#viewer.textLayer-shadow .textLayer span {
|
||||||
background-color: rgba(255, 255, 255, 0.6);
|
background-color: rgba(255, 255, 255, 0.6);
|
||||||
color: rgba(0, 0, 0, 1);
|
color: rgba(0, 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user