XFA - Support text search in XFA documents.
Moves the logic out of TextLayerBuilder to handle highlighting matches into a new separate class `TextHighlighter` that can be used with regular PDFs and XFA PDFs. To mimic the current find functionality in XFA, two arrays from the XFA rendering are created to get the text content and map those to DOM nodes. Fixes #13878
This commit is contained in:
parent
3c8ee25e05
commit
bb47128864
@ -62,6 +62,7 @@ import { MessageHandler } from "../shared/message_handler.js";
|
||||
import { Metadata } from "./metadata.js";
|
||||
import { OptionalContentConfig } from "./optional_content_config.js";
|
||||
import { PDFDataTransportStream } from "./transport_stream.js";
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536
|
||||
const RENDERING_CANCELLED_TIMEOUT = 100; // ms
|
||||
@ -1561,6 +1562,13 @@ class PDFPageProxy {
|
||||
* {@link TextContent} object that represents the page's text content.
|
||||
*/
|
||||
getTextContent(params = {}) {
|
||||
if (this._transport._htmlForXfa) {
|
||||
// TODO: We need to revisit this once the XFA foreground patch lands and
|
||||
// only do this for non-foreground XFA.
|
||||
return this.getXfa().then(xfa => {
|
||||
return XfaText.textContent(xfa);
|
||||
});
|
||||
}
|
||||
const readableStream = this.streamTextContent(params);
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
|
@ -13,6 +13,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { XfaText } from "./xfa_text.js";
|
||||
|
||||
class XfaLayer {
|
||||
static setupStorage(html, id, element, storage, intent) {
|
||||
const storedData = storage.getValue(id, { value: null });
|
||||
@ -127,6 +129,9 @@ class XfaLayer {
|
||||
// Set defaults.
|
||||
rootDiv.setAttribute("class", "xfaLayer xfaFont");
|
||||
|
||||
// Text nodes used for the text highlighter.
|
||||
const textDivs = [];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [parent, i, html] = stack[stack.length - 1];
|
||||
if (i + 1 === parent.children.length) {
|
||||
@ -141,7 +146,9 @@ class XfaLayer {
|
||||
|
||||
const { name } = child;
|
||||
if (name === "#text") {
|
||||
html.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
textDivs.push(node);
|
||||
html.appendChild(node);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -160,7 +167,11 @@ class XfaLayer {
|
||||
if (child.children && child.children.length > 0) {
|
||||
stack.push([child, -1, childHtml]);
|
||||
} else if (child.value) {
|
||||
childHtml.appendChild(document.createTextNode(child.value));
|
||||
const node = document.createTextNode(child.value);
|
||||
if (XfaText.shouldBuildText(name)) {
|
||||
textDivs.push(node);
|
||||
}
|
||||
childHtml.appendChild(node);
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,6 +196,10 @@ class XfaLayer {
|
||||
)) {
|
||||
el.setAttribute("readOnly", true);
|
||||
}
|
||||
|
||||
return {
|
||||
textDivs,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
79
src/display/xfa_text.js
Normal file
79
src/display/xfa_text.js
Normal file
@ -0,0 +1,79 @@
|
||||
/* Copyright 2021 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
class XfaText {
|
||||
/**
|
||||
* Walk an XFA tree and create an array of text nodes that is compatible
|
||||
* with a regular PDFs TextContent. Currently, only TextItem.str is supported,
|
||||
* all other fields and styles haven't been implemented.
|
||||
*
|
||||
* @param {Object} xfa - An XFA fake DOM object.
|
||||
*
|
||||
* @returns {TextContent}
|
||||
*/
|
||||
static textContent(xfa) {
|
||||
const items = [];
|
||||
const output = {
|
||||
items,
|
||||
styles: Object.create(null),
|
||||
};
|
||||
function walk(node) {
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
let str = null;
|
||||
const name = node.name;
|
||||
if (name === "#text") {
|
||||
str = node.value;
|
||||
} else if (!XfaText.shouldBuildText(name)) {
|
||||
return;
|
||||
} else if (node?.attributes?.textContent) {
|
||||
str = node.attributes.textContent;
|
||||
} else if (node.value) {
|
||||
str = node.value;
|
||||
}
|
||||
if (str !== null) {
|
||||
items.push({
|
||||
str,
|
||||
});
|
||||
}
|
||||
if (!node.children) {
|
||||
return;
|
||||
}
|
||||
for (const child of node.children) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
walk(xfa);
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} name - DOM node name. (lower case)
|
||||
*
|
||||
* @returns {boolean} true if the DOM node should have a corresponding text
|
||||
* node.
|
||||
*/
|
||||
static shouldBuildText(name) {
|
||||
return !(
|
||||
name === "textarea" ||
|
||||
name === "input" ||
|
||||
name === "option" ||
|
||||
name === "select"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export { XfaText };
|
@ -72,4 +72,37 @@ describe("find bar", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
describe("highlight all", () => {
|
||||
let pages;
|
||||
|
||||
beforeAll(async () => {
|
||||
pages = await loadAndWait("xfa_imm5257e.pdf#zoom=100", ".xfaLayer");
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await closePages(pages);
|
||||
});
|
||||
|
||||
it("must search xfa correctly", async () => {
|
||||
await Promise.all(
|
||||
pages.map(async ([browserName, page]) => {
|
||||
await page.click("#viewFind");
|
||||
await page.waitForSelector("#viewFind", { hidden: false });
|
||||
await page.type("#findInput", "city");
|
||||
await page.waitForSelector("#findInput[data-status='']");
|
||||
await page.waitForSelector(".xfaLayer .highlight");
|
||||
const resultElement = await page.waitForSelector("#findResultsCount");
|
||||
const resultText = await resultElement.evaluate(el => el.textContent);
|
||||
expect(resultText).toEqual("1 of 7 matches");
|
||||
const selectedElement = await page.waitForSelector(
|
||||
".highlight.selected"
|
||||
);
|
||||
const selectedText = await selectedElement.evaluate(
|
||||
el => el.textContent
|
||||
);
|
||||
expect(selectedText).toEqual("City");
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -42,6 +42,7 @@ import { NullL10n } from "./l10n_utils.js";
|
||||
import { PDFPageView } from "./pdf_page_view.js";
|
||||
import { SimpleLinkService } from "./pdf_link_service.js";
|
||||
import { StructTreeLayerBuilder } from "./struct_tree_layer_builder.js";
|
||||
import { TextHighlighter } from "./text_highlighter.js";
|
||||
import { TextLayerBuilder } from "./text_layer_builder.js";
|
||||
import { XfaLayerBuilder } from "./xfa_layer_builder.js";
|
||||
|
||||
@ -525,7 +526,9 @@ class BaseViewer {
|
||||
const scale = this.currentScale;
|
||||
const viewport = firstPdfPage.getViewport({ scale: scale * CSS_UNITS });
|
||||
const textLayerFactory =
|
||||
this.textLayerMode !== TextLayerMode.DISABLE ? this : null;
|
||||
this.textLayerMode !== TextLayerMode.DISABLE && !isPureXfa
|
||||
? this
|
||||
: null;
|
||||
const xfaLayerFactory = isPureXfa ? this : null;
|
||||
|
||||
for (let pageNum = 1; pageNum <= pagesCount; ++pageNum) {
|
||||
@ -541,6 +544,7 @@ class BaseViewer {
|
||||
textLayerMode: this.textLayerMode,
|
||||
annotationLayerFactory: this,
|
||||
xfaLayerFactory,
|
||||
textHighlighterFactory: this,
|
||||
structTreeLayerFactory: this,
|
||||
imageResourcesPath: this.imageResourcesPath,
|
||||
renderInteractiveForms: this.renderInteractiveForms,
|
||||
@ -1242,6 +1246,7 @@ class BaseViewer {
|
||||
* @param {PageViewport} viewport
|
||||
* @param {boolean} enhanceTextSelection
|
||||
* @param {EventBus} eventBus
|
||||
* @param {TextHighlighter} highlighter
|
||||
* @returns {TextLayerBuilder}
|
||||
*/
|
||||
createTextLayerBuilder(
|
||||
@ -1249,17 +1254,31 @@ class BaseViewer {
|
||||
pageIndex,
|
||||
viewport,
|
||||
enhanceTextSelection = false,
|
||||
eventBus
|
||||
eventBus,
|
||||
highlighter
|
||||
) {
|
||||
return new TextLayerBuilder({
|
||||
textLayerDiv,
|
||||
eventBus,
|
||||
pageIndex,
|
||||
viewport,
|
||||
findController: this.isInPresentationMode ? null : this.findController,
|
||||
enhanceTextSelection: this.isInPresentationMode
|
||||
? false
|
||||
: enhanceTextSelection,
|
||||
highlighter,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} pageIndex
|
||||
* @param {EventBus} eventBus
|
||||
* @returns {TextHighlighter}
|
||||
*/
|
||||
createTextHighlighter(pageIndex, eventBus) {
|
||||
return new TextHighlighter({
|
||||
eventBus,
|
||||
pageIndex,
|
||||
findController: this.isInPresentationMode ? null : this.findController,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -162,6 +162,7 @@ class IPDFTextLayerFactory {
|
||||
* @param {PageViewport} viewport
|
||||
* @param {boolean} enhanceTextSelection
|
||||
* @param {EventBus} eventBus
|
||||
* @param {TextHighlighter} highlighter
|
||||
* @returns {TextLayerBuilder}
|
||||
*/
|
||||
createTextLayerBuilder(
|
||||
@ -169,7 +170,8 @@ class IPDFTextLayerFactory {
|
||||
pageIndex,
|
||||
viewport,
|
||||
enhanceTextSelection = false,
|
||||
eventBus
|
||||
eventBus,
|
||||
highlighter
|
||||
) {}
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,11 @@ class PDFPageView {
|
||||
this.textLayerFactory = options.textLayerFactory;
|
||||
this.annotationLayerFactory = options.annotationLayerFactory;
|
||||
this.xfaLayerFactory = options.xfaLayerFactory;
|
||||
this.textHighlighter =
|
||||
options.textHighlighterFactory?.createTextHighlighter(
|
||||
this.id - 1,
|
||||
this.eventBus
|
||||
);
|
||||
this.structTreeLayerFactory = options.structTreeLayerFactory;
|
||||
this.renderer = options.renderer || RendererType.CANVAS;
|
||||
this.l10n = options.l10n || NullL10n;
|
||||
@ -175,7 +180,10 @@ class PDFPageView {
|
||||
async _renderXfaLayer() {
|
||||
let error = null;
|
||||
try {
|
||||
await this.xfaLayer.render(this.viewport, "display");
|
||||
const result = await this.xfaLayer.render(this.viewport, "display");
|
||||
if (this.textHighlighter) {
|
||||
this._buildXfaTextContentItems(result.textDivs);
|
||||
}
|
||||
} catch (ex) {
|
||||
error = ex;
|
||||
} finally {
|
||||
@ -187,6 +195,16 @@ class PDFPageView {
|
||||
}
|
||||
}
|
||||
|
||||
async _buildXfaTextContentItems(textDivs) {
|
||||
const text = await this.pdfPage.getTextContent();
|
||||
const items = [];
|
||||
for (const item of text.items) {
|
||||
items.push(item.str);
|
||||
}
|
||||
this.textHighlighter.setTextMapping(textDivs, items);
|
||||
this.textHighlighter.enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
@ -382,6 +400,7 @@ class PDFPageView {
|
||||
if (this.xfaLayer && (!keepXfaLayer || !this.xfaLayer.div)) {
|
||||
this.xfaLayer.cancel();
|
||||
this.xfaLayer = null;
|
||||
this.textHighlighter?.disable();
|
||||
}
|
||||
if (this._onTextLayerRendered) {
|
||||
this.eventBus._off("textlayerrendered", this._onTextLayerRendered);
|
||||
@ -533,7 +552,8 @@ class PDFPageView {
|
||||
this.id - 1,
|
||||
this.viewport,
|
||||
this.textLayerMode === TextLayerMode.ENABLE_ENHANCE,
|
||||
this.eventBus
|
||||
this.eventBus,
|
||||
this.textHighlighter
|
||||
);
|
||||
}
|
||||
this.textLayer = textLayer;
|
||||
|
293
web/text_highlighter.js
Normal file
293
web/text_highlighter.js
Normal file
@ -0,0 +1,293 @@
|
||||
/* Copyright 2021 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} TextHighlighter
|
||||
* @property {PDFFindController} findController
|
||||
* @property {EventBus} eventBus - The application event bus.
|
||||
* @property {number} pageIndex - The page index.
|
||||
*/
|
||||
|
||||
/**
|
||||
* TextHighlighter handles highlighting matches from the FindController in
|
||||
* either the text layer or XFA layer depending on the type of document.
|
||||
*/
|
||||
class TextHighlighter {
|
||||
constructor({ findController, eventBus, pageIndex }) {
|
||||
this.findController = findController;
|
||||
this.matches = [];
|
||||
this.eventBus = eventBus;
|
||||
this.pageIdx = pageIndex;
|
||||
this._onUpdateTextLayerMatches = null;
|
||||
this.textDivs = null;
|
||||
this.textContentItemsStr = null;
|
||||
this.enabled = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Store two arrays that will map DOM nodes to text they should contain.
|
||||
* The arrays should be of equal length and the array element at each index
|
||||
* should correspond to the other. e.g.
|
||||
* `items[0] = "<span>Item 0</span>" and texts[0] = "Item 0";
|
||||
*
|
||||
* @param {Array<Node>} divs
|
||||
* @param {Array<string>} texts
|
||||
*/
|
||||
setTextMapping(divs, texts) {
|
||||
this.textDivs = divs;
|
||||
this.textContentItemsStr = texts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start listening for events to update the highlighter and check if there are
|
||||
* any current matches that need be highlighted.
|
||||
*/
|
||||
enable() {
|
||||
if (!this.textDivs || !this.textContentItemsStr) {
|
||||
throw new Error("Text divs and strings have not been set.");
|
||||
}
|
||||
if (this.enabled) {
|
||||
throw new Error("TextHighlighter is already enabled.");
|
||||
}
|
||||
this.enabled = true;
|
||||
if (!this._onUpdateTextLayerMatches) {
|
||||
this._onUpdateTextLayerMatches = evt => {
|
||||
if (evt.pageIndex === this.pageIdx || evt.pageIndex === -1) {
|
||||
this._updateMatches();
|
||||
}
|
||||
};
|
||||
this.eventBus._on(
|
||||
"updatetextlayermatches",
|
||||
this._onUpdateTextLayerMatches
|
||||
);
|
||||
}
|
||||
this._updateMatches();
|
||||
}
|
||||
|
||||
disable() {
|
||||
if (!this.enabled) {
|
||||
return;
|
||||
}
|
||||
this.enabled = false;
|
||||
if (this._onUpdateTextLayerMatches) {
|
||||
this.eventBus._off(
|
||||
"updatetextlayermatches",
|
||||
this._onUpdateTextLayerMatches
|
||||
);
|
||||
this._onUpdateTextLayerMatches = null;
|
||||
}
|
||||
}
|
||||
|
||||
_convertMatches(matches, matchesLength) {
|
||||
// Early exit if there is nothing to convert.
|
||||
if (!matches) {
|
||||
return [];
|
||||
}
|
||||
const { textContentItemsStr } = this;
|
||||
|
||||
let i = 0,
|
||||
iIndex = 0;
|
||||
const end = textContentItemsStr.length - 1;
|
||||
const result = [];
|
||||
|
||||
for (let m = 0, mm = matches.length; m < mm; m++) {
|
||||
// Calculate the start position.
|
||||
let matchIdx = matches[m];
|
||||
|
||||
// Loop over the divIdxs.
|
||||
while (i !== end && matchIdx >= iIndex + textContentItemsStr[i].length) {
|
||||
iIndex += textContentItemsStr[i].length;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i === textContentItemsStr.length) {
|
||||
console.error("Could not find a matching mapping");
|
||||
}
|
||||
|
||||
const match = {
|
||||
begin: {
|
||||
divIdx: i,
|
||||
offset: matchIdx - iIndex,
|
||||
},
|
||||
};
|
||||
|
||||
// Calculate the end position.
|
||||
matchIdx += matchesLength[m];
|
||||
|
||||
// Somewhat the same array as above, but use > instead of >= to get
|
||||
// the end position right.
|
||||
while (i !== end && matchIdx > iIndex + textContentItemsStr[i].length) {
|
||||
iIndex += textContentItemsStr[i].length;
|
||||
i++;
|
||||
}
|
||||
|
||||
match.end = {
|
||||
divIdx: i,
|
||||
offset: matchIdx - iIndex,
|
||||
};
|
||||
result.push(match);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
_renderMatches(matches) {
|
||||
// Early exit if there is nothing to render.
|
||||
if (matches.length === 0) {
|
||||
return;
|
||||
}
|
||||
const { findController, pageIdx } = this;
|
||||
const { textContentItemsStr, textDivs } = this;
|
||||
|
||||
const isSelectedPage = pageIdx === findController.selected.pageIdx;
|
||||
const selectedMatchIdx = findController.selected.matchIdx;
|
||||
const highlightAll = findController.state.highlightAll;
|
||||
let prevEnd = null;
|
||||
const infinity = {
|
||||
divIdx: -1,
|
||||
offset: undefined,
|
||||
};
|
||||
|
||||
function beginText(begin, className) {
|
||||
const divIdx = begin.divIdx;
|
||||
textDivs[divIdx].textContent = "";
|
||||
return appendTextToDiv(divIdx, 0, begin.offset, className);
|
||||
}
|
||||
|
||||
function appendTextToDiv(divIdx, fromOffset, toOffset, className) {
|
||||
let div = textDivs[divIdx];
|
||||
if (div.nodeType === 3) {
|
||||
const span = document.createElement("span");
|
||||
div.parentNode.insertBefore(span, div);
|
||||
span.appendChild(div);
|
||||
textDivs[divIdx] = span;
|
||||
div = span;
|
||||
}
|
||||
const content = textContentItemsStr[divIdx].substring(
|
||||
fromOffset,
|
||||
toOffset
|
||||
);
|
||||
const node = document.createTextNode(content);
|
||||
if (className) {
|
||||
const span = document.createElement("span");
|
||||
span.className = `${className} appended`;
|
||||
span.appendChild(node);
|
||||
div.appendChild(span);
|
||||
return className.includes("selected") ? span.offsetLeft : 0;
|
||||
}
|
||||
div.appendChild(node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
let i0 = selectedMatchIdx,
|
||||
i1 = i0 + 1;
|
||||
if (highlightAll) {
|
||||
i0 = 0;
|
||||
i1 = matches.length;
|
||||
} else if (!isSelectedPage) {
|
||||
// Not highlighting all and this isn't the selected page, so do nothing.
|
||||
return;
|
||||
}
|
||||
|
||||
for (let i = i0; i < i1; i++) {
|
||||
const match = matches[i];
|
||||
const begin = match.begin;
|
||||
const end = match.end;
|
||||
const isSelected = isSelectedPage && i === selectedMatchIdx;
|
||||
const highlightSuffix = isSelected ? " selected" : "";
|
||||
let selectedLeft = 0;
|
||||
|
||||
// Match inside new div.
|
||||
if (!prevEnd || begin.divIdx !== prevEnd.divIdx) {
|
||||
// If there was a previous div, then add the text at the end.
|
||||
if (prevEnd !== null) {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, infinity.offset);
|
||||
}
|
||||
// Clear the divs and set the content until the starting point.
|
||||
beginText(begin);
|
||||
} else {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, begin.offset);
|
||||
}
|
||||
|
||||
if (begin.divIdx === end.divIdx) {
|
||||
selectedLeft = appendTextToDiv(
|
||||
begin.divIdx,
|
||||
begin.offset,
|
||||
end.offset,
|
||||
"highlight" + highlightSuffix
|
||||
);
|
||||
} else {
|
||||
selectedLeft = appendTextToDiv(
|
||||
begin.divIdx,
|
||||
begin.offset,
|
||||
infinity.offset,
|
||||
"highlight begin" + highlightSuffix
|
||||
);
|
||||
for (let n0 = begin.divIdx + 1, n1 = end.divIdx; n0 < n1; n0++) {
|
||||
textDivs[n0].className = "highlight middle" + highlightSuffix;
|
||||
}
|
||||
beginText(end, "highlight end" + highlightSuffix);
|
||||
}
|
||||
prevEnd = end;
|
||||
|
||||
if (isSelected) {
|
||||
// Attempt to scroll the selected match into view.
|
||||
findController.scrollMatchIntoView({
|
||||
element: textDivs[begin.divIdx],
|
||||
selectedLeft,
|
||||
pageIndex: pageIdx,
|
||||
matchIndex: selectedMatchIdx,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (prevEnd) {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, infinity.offset);
|
||||
}
|
||||
}
|
||||
|
||||
_updateMatches() {
|
||||
if (!this.enabled) {
|
||||
return;
|
||||
}
|
||||
const { findController, matches, pageIdx } = this;
|
||||
const { textContentItemsStr, textDivs } = this;
|
||||
let clearedUntilDivIdx = -1;
|
||||
|
||||
// Clear all current matches.
|
||||
for (let i = 0, ii = matches.length; i < ii; i++) {
|
||||
const match = matches[i];
|
||||
const begin = Math.max(clearedUntilDivIdx, match.begin.divIdx);
|
||||
for (let n = begin, end = match.end.divIdx; n <= end; n++) {
|
||||
const div = textDivs[n];
|
||||
div.textContent = textContentItemsStr[n];
|
||||
div.className = "";
|
||||
}
|
||||
clearedUntilDivIdx = match.end.divIdx + 1;
|
||||
}
|
||||
|
||||
if (!findController?.highlightMatches) {
|
||||
return;
|
||||
}
|
||||
// Convert the matches on the `findController` into the match format
|
||||
// used for the textLayer.
|
||||
const pageMatches = findController.pageMatches[pageIdx] || null;
|
||||
const pageMatchesLength = findController.pageMatchesLength[pageIdx] || null;
|
||||
|
||||
this.matches = this._convertMatches(pageMatches, pageMatchesLength);
|
||||
this._renderMatches(this.matches);
|
||||
}
|
||||
}
|
||||
|
||||
export { TextHighlighter };
|
@ -23,7 +23,8 @@ const EXPAND_DIVS_TIMEOUT = 300; // ms
|
||||
* @property {EventBus} eventBus - The application event bus.
|
||||
* @property {number} pageIndex - The page index.
|
||||
* @property {PageViewport} viewport - The viewport of the text layer.
|
||||
* @property {PDFFindController} findController
|
||||
* @property {TextHighlighter} highlighter - Optional object that will handle
|
||||
* highlighting text from the find controller.
|
||||
* @property {boolean} enhanceTextSelection - Option to turn on improved
|
||||
* text selection.
|
||||
*/
|
||||
@ -31,8 +32,7 @@ const EXPAND_DIVS_TIMEOUT = 300; // ms
|
||||
/**
|
||||
* The text layer builder provides text selection functionality for the PDF.
|
||||
* It does this by creating overlay divs over the PDF's text. These divs
|
||||
* contain text that matches the PDF text they are overlaying. This object
|
||||
* also provides a way to highlight text that is being searched for.
|
||||
* contain text that matches the PDF text they are overlaying.
|
||||
*/
|
||||
class TextLayerBuilder {
|
||||
constructor({
|
||||
@ -40,7 +40,7 @@ class TextLayerBuilder {
|
||||
eventBus,
|
||||
pageIndex,
|
||||
viewport,
|
||||
findController = null,
|
||||
highlighter = null,
|
||||
enhanceTextSelection = false,
|
||||
}) {
|
||||
this.textLayerDiv = textLayerDiv;
|
||||
@ -54,11 +54,10 @@ class TextLayerBuilder {
|
||||
this.matches = [];
|
||||
this.viewport = viewport;
|
||||
this.textDivs = [];
|
||||
this.findController = findController;
|
||||
this.textLayerRenderTask = null;
|
||||
this.highlighter = highlighter;
|
||||
this.enhanceTextSelection = enhanceTextSelection;
|
||||
|
||||
this._onUpdateTextLayerMatches = null;
|
||||
this._bindMouse();
|
||||
}
|
||||
|
||||
@ -94,6 +93,9 @@ class TextLayerBuilder {
|
||||
this.cancel();
|
||||
|
||||
this.textDivs = [];
|
||||
if (this.highlighter) {
|
||||
this.highlighter.setTextMapping(this.textDivs, this.textContentItemsStr);
|
||||
}
|
||||
const textLayerFrag = document.createDocumentFragment();
|
||||
this.textLayerRenderTask = renderTextLayer({
|
||||
textContent: this.textContent,
|
||||
@ -109,24 +111,12 @@ class TextLayerBuilder {
|
||||
() => {
|
||||
this.textLayerDiv.appendChild(textLayerFrag);
|
||||
this._finishRendering();
|
||||
this._updateMatches();
|
||||
this.highlighter?.enable();
|
||||
},
|
||||
function (reason) {
|
||||
// Cancelled or failed to render text layer; skipping errors.
|
||||
}
|
||||
);
|
||||
|
||||
if (!this._onUpdateTextLayerMatches) {
|
||||
this._onUpdateTextLayerMatches = evt => {
|
||||
if (evt.pageIndex === this.pageIdx || evt.pageIndex === -1) {
|
||||
this._updateMatches();
|
||||
}
|
||||
};
|
||||
this.eventBus._on(
|
||||
"updatetextlayermatches",
|
||||
this._onUpdateTextLayerMatches
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -137,13 +127,7 @@ class TextLayerBuilder {
|
||||
this.textLayerRenderTask.cancel();
|
||||
this.textLayerRenderTask = null;
|
||||
}
|
||||
if (this._onUpdateTextLayerMatches) {
|
||||
this.eventBus._off(
|
||||
"updatetextlayermatches",
|
||||
this._onUpdateTextLayerMatches
|
||||
);
|
||||
this._onUpdateTextLayerMatches = null;
|
||||
}
|
||||
this.highlighter?.disable();
|
||||
}
|
||||
|
||||
setTextContentStream(readableStream) {
|
||||
@ -156,198 +140,6 @@ class TextLayerBuilder {
|
||||
this.textContent = textContent;
|
||||
}
|
||||
|
||||
_convertMatches(matches, matchesLength) {
|
||||
// Early exit if there is nothing to convert.
|
||||
if (!matches) {
|
||||
return [];
|
||||
}
|
||||
const { textContentItemsStr } = this;
|
||||
|
||||
let i = 0,
|
||||
iIndex = 0;
|
||||
const end = textContentItemsStr.length - 1;
|
||||
const result = [];
|
||||
|
||||
for (let m = 0, mm = matches.length; m < mm; m++) {
|
||||
// Calculate the start position.
|
||||
let matchIdx = matches[m];
|
||||
|
||||
// Loop over the divIdxs.
|
||||
while (i !== end && matchIdx >= iIndex + textContentItemsStr[i].length) {
|
||||
iIndex += textContentItemsStr[i].length;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i === textContentItemsStr.length) {
|
||||
console.error("Could not find a matching mapping");
|
||||
}
|
||||
|
||||
const match = {
|
||||
begin: {
|
||||
divIdx: i,
|
||||
offset: matchIdx - iIndex,
|
||||
},
|
||||
};
|
||||
|
||||
// Calculate the end position.
|
||||
matchIdx += matchesLength[m];
|
||||
|
||||
// Somewhat the same array as above, but use > instead of >= to get
|
||||
// the end position right.
|
||||
while (i !== end && matchIdx > iIndex + textContentItemsStr[i].length) {
|
||||
iIndex += textContentItemsStr[i].length;
|
||||
i++;
|
||||
}
|
||||
|
||||
match.end = {
|
||||
divIdx: i,
|
||||
offset: matchIdx - iIndex,
|
||||
};
|
||||
result.push(match);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
_renderMatches(matches) {
|
||||
// Early exit if there is nothing to render.
|
||||
if (matches.length === 0) {
|
||||
return;
|
||||
}
|
||||
const { findController, pageIdx, textContentItemsStr, textDivs } = this;
|
||||
|
||||
const isSelectedPage = pageIdx === findController.selected.pageIdx;
|
||||
const selectedMatchIdx = findController.selected.matchIdx;
|
||||
const highlightAll = findController.state.highlightAll;
|
||||
let prevEnd = null;
|
||||
const infinity = {
|
||||
divIdx: -1,
|
||||
offset: undefined,
|
||||
};
|
||||
|
||||
function beginText(begin, className) {
|
||||
const divIdx = begin.divIdx;
|
||||
textDivs[divIdx].textContent = "";
|
||||
return appendTextToDiv(divIdx, 0, begin.offset, className);
|
||||
}
|
||||
|
||||
function appendTextToDiv(divIdx, fromOffset, toOffset, className) {
|
||||
const div = textDivs[divIdx];
|
||||
const content = textContentItemsStr[divIdx].substring(
|
||||
fromOffset,
|
||||
toOffset
|
||||
);
|
||||
const node = document.createTextNode(content);
|
||||
if (className) {
|
||||
const span = document.createElement("span");
|
||||
span.className = `${className} appended`;
|
||||
span.appendChild(node);
|
||||
div.appendChild(span);
|
||||
return className.includes("selected") ? span.offsetLeft : 0;
|
||||
}
|
||||
div.appendChild(node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
let i0 = selectedMatchIdx,
|
||||
i1 = i0 + 1;
|
||||
if (highlightAll) {
|
||||
i0 = 0;
|
||||
i1 = matches.length;
|
||||
} else if (!isSelectedPage) {
|
||||
// Not highlighting all and this isn't the selected page, so do nothing.
|
||||
return;
|
||||
}
|
||||
|
||||
for (let i = i0; i < i1; i++) {
|
||||
const match = matches[i];
|
||||
const begin = match.begin;
|
||||
const end = match.end;
|
||||
const isSelected = isSelectedPage && i === selectedMatchIdx;
|
||||
const highlightSuffix = isSelected ? " selected" : "";
|
||||
let selectedLeft = 0;
|
||||
|
||||
// Match inside new div.
|
||||
if (!prevEnd || begin.divIdx !== prevEnd.divIdx) {
|
||||
// If there was a previous div, then add the text at the end.
|
||||
if (prevEnd !== null) {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, infinity.offset);
|
||||
}
|
||||
// Clear the divs and set the content until the starting point.
|
||||
beginText(begin);
|
||||
} else {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, begin.offset);
|
||||
}
|
||||
|
||||
if (begin.divIdx === end.divIdx) {
|
||||
selectedLeft = appendTextToDiv(
|
||||
begin.divIdx,
|
||||
begin.offset,
|
||||
end.offset,
|
||||
"highlight" + highlightSuffix
|
||||
);
|
||||
} else {
|
||||
selectedLeft = appendTextToDiv(
|
||||
begin.divIdx,
|
||||
begin.offset,
|
||||
infinity.offset,
|
||||
"highlight begin" + highlightSuffix
|
||||
);
|
||||
for (let n0 = begin.divIdx + 1, n1 = end.divIdx; n0 < n1; n0++) {
|
||||
textDivs[n0].className = "highlight middle" + highlightSuffix;
|
||||
}
|
||||
beginText(end, "highlight end" + highlightSuffix);
|
||||
}
|
||||
prevEnd = end;
|
||||
|
||||
if (isSelected) {
|
||||
// Attempt to scroll the selected match into view.
|
||||
findController.scrollMatchIntoView({
|
||||
element: textDivs[begin.divIdx],
|
||||
selectedLeft,
|
||||
pageIndex: pageIdx,
|
||||
matchIndex: selectedMatchIdx,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (prevEnd) {
|
||||
appendTextToDiv(prevEnd.divIdx, prevEnd.offset, infinity.offset);
|
||||
}
|
||||
}
|
||||
|
||||
_updateMatches() {
|
||||
// Only show matches when all rendering is done.
|
||||
if (!this.renderingDone) {
|
||||
return;
|
||||
}
|
||||
const { findController, matches, pageIdx, textContentItemsStr, textDivs } =
|
||||
this;
|
||||
let clearedUntilDivIdx = -1;
|
||||
|
||||
// Clear all current matches.
|
||||
for (let i = 0, ii = matches.length; i < ii; i++) {
|
||||
const match = matches[i];
|
||||
const begin = Math.max(clearedUntilDivIdx, match.begin.divIdx);
|
||||
for (let n = begin, end = match.end.divIdx; n <= end; n++) {
|
||||
const div = textDivs[n];
|
||||
div.textContent = textContentItemsStr[n];
|
||||
div.className = "";
|
||||
}
|
||||
clearedUntilDivIdx = match.end.divIdx + 1;
|
||||
}
|
||||
|
||||
if (!findController?.highlightMatches) {
|
||||
return;
|
||||
}
|
||||
// Convert the matches on the `findController` into the match format
|
||||
// used for the textLayer.
|
||||
const pageMatches = findController.pageMatches[pageIdx] || null;
|
||||
const pageMatchesLength = findController.pageMatchesLength[pageIdx] || null;
|
||||
|
||||
this.matches = this._convertMatches(pageMatches, pageMatchesLength);
|
||||
this._renderMatches(this.matches);
|
||||
}
|
||||
|
||||
/**
|
||||
* Improves text selection by adding an additional div where the mouse was
|
||||
* clicked. This reduces flickering of the content if the mouse is slowly
|
||||
@ -435,6 +227,7 @@ class DefaultTextLayerFactory {
|
||||
* @param {PageViewport} viewport
|
||||
* @param {boolean} enhanceTextSelection
|
||||
* @param {EventBus} eventBus
|
||||
* @param {TextHighlighter} highlighter
|
||||
* @returns {TextLayerBuilder}
|
||||
*/
|
||||
createTextLayerBuilder(
|
||||
@ -442,7 +235,8 @@ class DefaultTextLayerFactory {
|
||||
pageIndex,
|
||||
viewport,
|
||||
enhanceTextSelection = false,
|
||||
eventBus
|
||||
eventBus,
|
||||
highlighter
|
||||
) {
|
||||
return new TextLayerBuilder({
|
||||
textLayerDiv,
|
||||
|
@ -17,6 +17,37 @@
|
||||
--unfocused-field-background: url("data:image/svg+xml;charset=UTF-8,<svg width='1px' height='1px' xmlns='http://www.w3.org/2000/svg'><rect width='100%' height='100%' style='fill:rgba(0, 54, 255, 0.13);'/></svg>");
|
||||
}
|
||||
|
||||
.xfaLayer .highlight {
|
||||
margin: -1px;
|
||||
padding: 1px;
|
||||
background-color: rgba(239, 203, 237, 1);
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.xfaLayer .highlight.appended {
|
||||
position: initial;
|
||||
}
|
||||
|
||||
.xfaLayer .highlight.begin {
|
||||
border-radius: 4px 0 0 4px;
|
||||
}
|
||||
|
||||
.xfaLayer .highlight.end {
|
||||
border-radius: 0 4px 4px 0;
|
||||
}
|
||||
|
||||
.xfaLayer .highlight.middle {
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.xfaLayer .highlight.selected {
|
||||
background-color: rgba(203, 223, 203, 1);
|
||||
}
|
||||
|
||||
.xfaLayer ::selection {
|
||||
background: rgba(0, 0, 255, 1);
|
||||
}
|
||||
|
||||
.xfaPage {
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
|
@ -39,8 +39,9 @@ class XfaLayerBuilder {
|
||||
/**
|
||||
* @param {PageViewport} viewport
|
||||
* @param {string} intent (default value is 'display')
|
||||
* @returns {Promise<void>} A promise that is resolved when rendering of the
|
||||
* annotations is complete.
|
||||
* @returns {Promise<Object | void>} A promise that is resolved when rendering
|
||||
* of the XFA layer is complete. The first rendering will return an object
|
||||
* with a `textDivs` property that can be used with the TextHighlighter.
|
||||
*/
|
||||
render(viewport, intent = "display") {
|
||||
if (intent === "print") {
|
||||
@ -67,7 +68,7 @@ class XfaLayerBuilder {
|
||||
.getXfa()
|
||||
.then(xfa => {
|
||||
if (this._cancelled) {
|
||||
return;
|
||||
return Promise.resolve();
|
||||
}
|
||||
const parameters = {
|
||||
viewport: viewport.clone({ dontFlip: true }),
|
||||
@ -79,15 +80,13 @@ class XfaLayerBuilder {
|
||||
};
|
||||
|
||||
if (this.div) {
|
||||
XfaLayer.update(parameters);
|
||||
} else {
|
||||
// Create an xfa layer div and render the form
|
||||
this.div = document.createElement("div");
|
||||
this.pageDiv.appendChild(this.div);
|
||||
parameters.div = this.div;
|
||||
|
||||
XfaLayer.render(parameters);
|
||||
return XfaLayer.update(parameters);
|
||||
}
|
||||
// Create an xfa layer div and render the form
|
||||
this.div = document.createElement("div");
|
||||
this.pageDiv.appendChild(this.div);
|
||||
parameters.div = this.div;
|
||||
return XfaLayer.render(parameters);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(error);
|
||||
|
Loading…
x
Reference in New Issue
Block a user