Merge pull request #16286 from calixteman/copy_all
Add the possibility to copy all the pdf text whatever the rendered pages are (bug 1788035)
This commit is contained in:
commit
92baf14531
@ -26,13 +26,13 @@ async function runTests(results) {
|
||||
random: false,
|
||||
spec_dir: "integration",
|
||||
spec_files: [
|
||||
"scripting_spec.js",
|
||||
"annotation_spec.js",
|
||||
"accessibility_spec.js",
|
||||
"annotation_spec.js",
|
||||
"copy_paste_spec.js",
|
||||
"find_spec.js",
|
||||
"freetext_editor_spec.js",
|
||||
"ink_editor_spec.js",
|
||||
"a11y_spec.js",
|
||||
"scripting_spec.js",
|
||||
],
|
||||
});
|
||||
|
||||
|
120
test/integration/copy_paste_spec.js
Normal file
120
test/integration/copy_paste_spec.js
Normal file
@ -0,0 +1,120 @@
|
||||
/* Copyright 2023 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
const { closePages, loadAndWait, mockClipboard } = require("./test_utils.js");
|
||||
|
||||
describe("Copy and paste", () => {
|
||||
describe("all text", () => {
|
||||
let pages;
|
||||
|
||||
beforeAll(async () => {
|
||||
pages = await loadAndWait("tracemonkey.pdf", ".textLayer");
|
||||
await mockClipboard(pages);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await closePages(pages);
|
||||
});
|
||||
|
||||
it("must check that we've all the contents", async () => {
|
||||
await Promise.all(
|
||||
pages.map(async ([browserName, page]) => {
|
||||
await page.keyboard.down("Control");
|
||||
await page.keyboard.press("a");
|
||||
await page.keyboard.up("Control");
|
||||
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
await page.keyboard.down("Control");
|
||||
await page.keyboard.press("c");
|
||||
await page.keyboard.up("Control");
|
||||
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
await page.waitForFunction(
|
||||
`document.querySelector('#viewerContainer').style.cursor !== "wait"`
|
||||
);
|
||||
|
||||
const text = await page.evaluate(() =>
|
||||
navigator.clipboard.readText()
|
||||
);
|
||||
|
||||
expect(!!text).withContext(`In ${browserName}`).toEqual(true);
|
||||
expect(text.includes("Dynamic languages such as JavaScript"))
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes("This section provides an overview of our system")
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"are represented by function calls. This makes the LIR used by"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes("When compiling loops, we consult the oracle before")
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(text.includes("Nested Trace Tree Formation"))
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"An important detail is that the call to the inner trace"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(text.includes("When trace recording is completed, nanojit"))
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"SpiderMonkey, like many VMs, needs to preempt the user program"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"Using similar computations, we find that trace recording takes"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"specialization algorithm. We also described our trace compiler"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"dynamic optimization system. In Proceedings of the ACM SIGPLAN"
|
||||
)
|
||||
)
|
||||
.withContext(`In ${browserName}`)
|
||||
.toEqual(true);
|
||||
})
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
@ -118,3 +118,19 @@ const waitForSelectedEditor = async (page, selector) => {
|
||||
);
|
||||
};
|
||||
exports.waitForSelectedEditor = waitForSelectedEditor;
|
||||
|
||||
const mockClipboard = async pages => {
|
||||
await Promise.all(
|
||||
pages.map(async ([_, page]) => {
|
||||
await page.evaluate(() => {
|
||||
let data = null;
|
||||
const clipboard = {
|
||||
writeText: async text => (data = text),
|
||||
readText: async () => data,
|
||||
};
|
||||
Object.defineProperty(navigator, "clipboard", { value: clipboard });
|
||||
});
|
||||
})
|
||||
);
|
||||
};
|
||||
exports.mockClipboard = mockClipboard;
|
||||
|
@ -504,6 +504,7 @@ const PDFViewerApplication = {
|
||||
this.pdfViewer = new PDFViewer({
|
||||
container,
|
||||
viewer,
|
||||
hiddenCopyElement: appConfig.hiddenCopyElement,
|
||||
eventBus,
|
||||
renderingQueue: pdfRenderingQueue,
|
||||
linkService: pdfLinkService,
|
||||
|
@ -45,6 +45,15 @@
|
||||
transform: rotate(270deg) translateX(-100%);
|
||||
}
|
||||
|
||||
#hiddenCopyElement {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 0;
|
||||
height: 0;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.pdfViewer {
|
||||
/* Define this variable here and not in :root to avoid to reflow all the UI
|
||||
when scaling (see #15929). */
|
||||
|
@ -82,6 +82,8 @@ function isValidAnnotationEditorMode(mode) {
|
||||
* @typedef {Object} PDFViewerOptions
|
||||
* @property {HTMLDivElement} container - The container for the viewer element.
|
||||
* @property {HTMLDivElement} [viewer] - The viewer element.
|
||||
* @property {HTMLDivElement} [hiddenCopyElement] - The hidden element used to
|
||||
* check if all is selected.
|
||||
* @property {EventBus} eventBus - The application event bus.
|
||||
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
||||
* @property {IDownloadManager} [downloadManager] - The download manager
|
||||
@ -205,8 +207,16 @@ class PDFViewer {
|
||||
|
||||
#containerTopLeft = null;
|
||||
|
||||
#copyCallbackBound = this.#copyCallback.bind(this);
|
||||
|
||||
#enablePermissions = false;
|
||||
|
||||
#getAllTextInProgress = false;
|
||||
|
||||
#hiddenCopyElement = null;
|
||||
|
||||
#interruptCopyCondition = false;
|
||||
|
||||
#previousContainerHeight = 0;
|
||||
|
||||
#resizeObserver = new ResizeObserver(this.#resizeObserverCallback.bind(this));
|
||||
@ -230,6 +240,7 @@ class PDFViewer {
|
||||
}
|
||||
this.container = options.container;
|
||||
this.viewer = options.viewer || options.container.firstElementChild;
|
||||
this.#hiddenCopyElement = options.hiddenCopyElement;
|
||||
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
@ -638,6 +649,89 @@ class PDFViewer {
|
||||
]);
|
||||
}
|
||||
|
||||
async getAllText() {
|
||||
const texts = [];
|
||||
const buffer = [];
|
||||
for (
|
||||
let pageNum = 1, pagesCount = this.pdfDocument.numPages;
|
||||
pageNum <= pagesCount;
|
||||
++pageNum
|
||||
) {
|
||||
if (this.#interruptCopyCondition) {
|
||||
return null;
|
||||
}
|
||||
buffer.length = 0;
|
||||
const page = await this.pdfDocument.getPage(pageNum);
|
||||
const { items } = await page.getTextContent();
|
||||
for (const item of items) {
|
||||
if (item.str) {
|
||||
buffer.push(item.str);
|
||||
}
|
||||
if (item.hasEOL) {
|
||||
buffer.push("\n");
|
||||
}
|
||||
}
|
||||
texts.push(buffer.join(""));
|
||||
}
|
||||
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
#copyCallback(event) {
|
||||
const selection = document.getSelection();
|
||||
const { focusNode, anchorNode } = selection;
|
||||
if (
|
||||
anchorNode &&
|
||||
focusNode &&
|
||||
selection.containsNode(this.#hiddenCopyElement)
|
||||
) {
|
||||
// About the condition above:
|
||||
// - having non-null anchorNode and focusNode are here to guaranty that
|
||||
// we have at least a kind of selection.
|
||||
// - this.#hiddenCopyElement is an invisible element which is impossible
|
||||
// to select manually (its display is none) but ctrl+A will select all
|
||||
// including this element so having it in the selection means that all
|
||||
// has been selected.
|
||||
|
||||
// TODO: if all the pages are rendered we don't need to wait for
|
||||
// getAllText and we could just get text from the Selection object.
|
||||
|
||||
if (this.#getAllTextInProgress) {
|
||||
return;
|
||||
}
|
||||
this.#getAllTextInProgress = true;
|
||||
|
||||
// Select all the document.
|
||||
const savedCursor = this.container.style.cursor;
|
||||
this.container.style.cursor = "wait";
|
||||
|
||||
const interruptCopy = ev =>
|
||||
(this.#interruptCopyCondition = ev.key === "Escape");
|
||||
window.addEventListener("keydown", interruptCopy);
|
||||
|
||||
this.getAllText()
|
||||
.then(async text => {
|
||||
if (text !== null) {
|
||||
await navigator.clipboard.writeText(text);
|
||||
}
|
||||
})
|
||||
.catch(reason => {
|
||||
console.warn(
|
||||
`Something goes wrong when extracting the text: ${reason.message}`
|
||||
);
|
||||
})
|
||||
.finally(() => {
|
||||
this.#getAllTextInProgress = false;
|
||||
this.#interruptCopyCondition = false;
|
||||
window.removeEventListener("keydown", interruptCopy);
|
||||
this.container.style.cursor = savedCursor;
|
||||
});
|
||||
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {PDFDocumentProxy} pdfDocument
|
||||
*/
|
||||
@ -805,6 +899,10 @@ class PDFViewer {
|
||||
this.findController?.setDocument(pdfDocument); // Enable searching.
|
||||
this._scriptingManager?.setDocument(pdfDocument); // Enable scripting.
|
||||
|
||||
if (this.#hiddenCopyElement) {
|
||||
document.addEventListener("copy", this.#copyCallbackBound);
|
||||
}
|
||||
|
||||
if (this.#annotationEditorUIManager) {
|
||||
// Ensure that the Editor buttons, in the toolbar, are updated.
|
||||
this.eventBus.dispatch("annotationeditormodechanged", {
|
||||
@ -949,6 +1047,8 @@ class PDFViewer {
|
||||
this.viewer.removeAttribute("lang");
|
||||
// Reset all PDF document permissions.
|
||||
this.viewer.classList.remove(ENABLE_PERMISSIONS_CLASS);
|
||||
|
||||
document.removeEventListener("copy", this.#copyCallbackBound);
|
||||
}
|
||||
|
||||
#ensurePageViewVisible() {
|
||||
|
@ -82,6 +82,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
||||
<div id="mainContainer">
|
||||
|
||||
<div id="viewerContainer" tabindex="0">
|
||||
<div id="hiddenCopyElement"></div>
|
||||
<div id="viewer" class="pdfViewer"></div>
|
||||
</div>
|
||||
</div> <!-- mainContainer -->
|
||||
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
||||
appContainer: document.body,
|
||||
mainContainer,
|
||||
viewerContainer: document.getElementById("viewer"),
|
||||
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||
toolbar: {
|
||||
mainContainer,
|
||||
container: document.getElementById("floatingToolbar"),
|
||||
|
@ -385,6 +385,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
||||
</div>
|
||||
|
||||
<div id="viewerContainer" tabindex="0">
|
||||
<div id="hiddenCopyElement"></div>
|
||||
<div id="viewer" class="pdfViewer"></div>
|
||||
</div>
|
||||
</div> <!-- mainContainer -->
|
||||
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
||||
appContainer: document.body,
|
||||
mainContainer: document.getElementById("viewerContainer"),
|
||||
viewerContainer: document.getElementById("viewer"),
|
||||
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||
toolbar: {
|
||||
container: document.getElementById("toolbarViewer"),
|
||||
numPages: document.getElementById("numPages"),
|
||||
|
Loading…
Reference in New Issue
Block a user