Merge pull request #16286 from calixteman/copy_all
Add the possibility to copy all the pdf text whatever the rendered pages are (bug 1788035)
This commit is contained in:
commit
92baf14531
@ -26,13 +26,13 @@ async function runTests(results) {
|
|||||||
random: false,
|
random: false,
|
||||||
spec_dir: "integration",
|
spec_dir: "integration",
|
||||||
spec_files: [
|
spec_files: [
|
||||||
"scripting_spec.js",
|
|
||||||
"annotation_spec.js",
|
|
||||||
"accessibility_spec.js",
|
"accessibility_spec.js",
|
||||||
|
"annotation_spec.js",
|
||||||
|
"copy_paste_spec.js",
|
||||||
"find_spec.js",
|
"find_spec.js",
|
||||||
"freetext_editor_spec.js",
|
"freetext_editor_spec.js",
|
||||||
"ink_editor_spec.js",
|
"ink_editor_spec.js",
|
||||||
"a11y_spec.js",
|
"scripting_spec.js",
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
120
test/integration/copy_paste_spec.js
Normal file
120
test/integration/copy_paste_spec.js
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/* Copyright 2023 Mozilla Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const { closePages, loadAndWait, mockClipboard } = require("./test_utils.js");
|
||||||
|
|
||||||
|
describe("Copy and paste", () => {
|
||||||
|
describe("all text", () => {
|
||||||
|
let pages;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
pages = await loadAndWait("tracemonkey.pdf", ".textLayer");
|
||||||
|
await mockClipboard(pages);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await closePages(pages);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("must check that we've all the contents", async () => {
|
||||||
|
await Promise.all(
|
||||||
|
pages.map(async ([browserName, page]) => {
|
||||||
|
await page.keyboard.down("Control");
|
||||||
|
await page.keyboard.press("a");
|
||||||
|
await page.keyboard.up("Control");
|
||||||
|
|
||||||
|
await page.waitForTimeout(500);
|
||||||
|
|
||||||
|
await page.keyboard.down("Control");
|
||||||
|
await page.keyboard.press("c");
|
||||||
|
await page.keyboard.up("Control");
|
||||||
|
|
||||||
|
await page.waitForTimeout(500);
|
||||||
|
|
||||||
|
await page.waitForFunction(
|
||||||
|
`document.querySelector('#viewerContainer').style.cursor !== "wait"`
|
||||||
|
);
|
||||||
|
|
||||||
|
const text = await page.evaluate(() =>
|
||||||
|
navigator.clipboard.readText()
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(!!text).withContext(`In ${browserName}`).toEqual(true);
|
||||||
|
expect(text.includes("Dynamic languages such as JavaScript"))
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes("This section provides an overview of our system")
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"are represented by function calls. This makes the LIR used by"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes("When compiling loops, we consult the oracle before")
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(text.includes("Nested Trace Tree Formation"))
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"An important detail is that the call to the inner trace"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(text.includes("When trace recording is completed, nanojit"))
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"SpiderMonkey, like many VMs, needs to preempt the user program"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"Using similar computations, we find that trace recording takes"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"specialization algorithm. We also described our trace compiler"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"dynamic optimization system. In Proceedings of the ACM SIGPLAN"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withContext(`In ${browserName}`)
|
||||||
|
.toEqual(true);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
@ -118,3 +118,19 @@ const waitForSelectedEditor = async (page, selector) => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
exports.waitForSelectedEditor = waitForSelectedEditor;
|
exports.waitForSelectedEditor = waitForSelectedEditor;
|
||||||
|
|
||||||
|
const mockClipboard = async pages => {
|
||||||
|
await Promise.all(
|
||||||
|
pages.map(async ([_, page]) => {
|
||||||
|
await page.evaluate(() => {
|
||||||
|
let data = null;
|
||||||
|
const clipboard = {
|
||||||
|
writeText: async text => (data = text),
|
||||||
|
readText: async () => data,
|
||||||
|
};
|
||||||
|
Object.defineProperty(navigator, "clipboard", { value: clipboard });
|
||||||
|
});
|
||||||
|
})
|
||||||
|
);
|
||||||
|
};
|
||||||
|
exports.mockClipboard = mockClipboard;
|
||||||
|
@ -504,6 +504,7 @@ const PDFViewerApplication = {
|
|||||||
this.pdfViewer = new PDFViewer({
|
this.pdfViewer = new PDFViewer({
|
||||||
container,
|
container,
|
||||||
viewer,
|
viewer,
|
||||||
|
hiddenCopyElement: appConfig.hiddenCopyElement,
|
||||||
eventBus,
|
eventBus,
|
||||||
renderingQueue: pdfRenderingQueue,
|
renderingQueue: pdfRenderingQueue,
|
||||||
linkService: pdfLinkService,
|
linkService: pdfLinkService,
|
||||||
|
@ -45,6 +45,15 @@
|
|||||||
transform: rotate(270deg) translateX(-100%);
|
transform: rotate(270deg) translateX(-100%);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#hiddenCopyElement {
|
||||||
|
position: absolute;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
width: 0;
|
||||||
|
height: 0;
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
.pdfViewer {
|
.pdfViewer {
|
||||||
/* Define this variable here and not in :root to avoid to reflow all the UI
|
/* Define this variable here and not in :root to avoid to reflow all the UI
|
||||||
when scaling (see #15929). */
|
when scaling (see #15929). */
|
||||||
|
@ -82,6 +82,8 @@ function isValidAnnotationEditorMode(mode) {
|
|||||||
* @typedef {Object} PDFViewerOptions
|
* @typedef {Object} PDFViewerOptions
|
||||||
* @property {HTMLDivElement} container - The container for the viewer element.
|
* @property {HTMLDivElement} container - The container for the viewer element.
|
||||||
* @property {HTMLDivElement} [viewer] - The viewer element.
|
* @property {HTMLDivElement} [viewer] - The viewer element.
|
||||||
|
* @property {HTMLDivElement} [hiddenCopyElement] - The hidden element used to
|
||||||
|
* check if all is selected.
|
||||||
* @property {EventBus} eventBus - The application event bus.
|
* @property {EventBus} eventBus - The application event bus.
|
||||||
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
||||||
* @property {IDownloadManager} [downloadManager] - The download manager
|
* @property {IDownloadManager} [downloadManager] - The download manager
|
||||||
@ -205,8 +207,16 @@ class PDFViewer {
|
|||||||
|
|
||||||
#containerTopLeft = null;
|
#containerTopLeft = null;
|
||||||
|
|
||||||
|
#copyCallbackBound = this.#copyCallback.bind(this);
|
||||||
|
|
||||||
#enablePermissions = false;
|
#enablePermissions = false;
|
||||||
|
|
||||||
|
#getAllTextInProgress = false;
|
||||||
|
|
||||||
|
#hiddenCopyElement = null;
|
||||||
|
|
||||||
|
#interruptCopyCondition = false;
|
||||||
|
|
||||||
#previousContainerHeight = 0;
|
#previousContainerHeight = 0;
|
||||||
|
|
||||||
#resizeObserver = new ResizeObserver(this.#resizeObserverCallback.bind(this));
|
#resizeObserver = new ResizeObserver(this.#resizeObserverCallback.bind(this));
|
||||||
@ -230,6 +240,7 @@ class PDFViewer {
|
|||||||
}
|
}
|
||||||
this.container = options.container;
|
this.container = options.container;
|
||||||
this.viewer = options.viewer || options.container.firstElementChild;
|
this.viewer = options.viewer || options.container.firstElementChild;
|
||||||
|
this.#hiddenCopyElement = options.hiddenCopyElement;
|
||||||
|
|
||||||
if (
|
if (
|
||||||
typeof PDFJSDev === "undefined" ||
|
typeof PDFJSDev === "undefined" ||
|
||||||
@ -638,6 +649,89 @@ class PDFViewer {
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getAllText() {
|
||||||
|
const texts = [];
|
||||||
|
const buffer = [];
|
||||||
|
for (
|
||||||
|
let pageNum = 1, pagesCount = this.pdfDocument.numPages;
|
||||||
|
pageNum <= pagesCount;
|
||||||
|
++pageNum
|
||||||
|
) {
|
||||||
|
if (this.#interruptCopyCondition) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
buffer.length = 0;
|
||||||
|
const page = await this.pdfDocument.getPage(pageNum);
|
||||||
|
const { items } = await page.getTextContent();
|
||||||
|
for (const item of items) {
|
||||||
|
if (item.str) {
|
||||||
|
buffer.push(item.str);
|
||||||
|
}
|
||||||
|
if (item.hasEOL) {
|
||||||
|
buffer.push("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
texts.push(buffer.join(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
return texts.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#copyCallback(event) {
|
||||||
|
const selection = document.getSelection();
|
||||||
|
const { focusNode, anchorNode } = selection;
|
||||||
|
if (
|
||||||
|
anchorNode &&
|
||||||
|
focusNode &&
|
||||||
|
selection.containsNode(this.#hiddenCopyElement)
|
||||||
|
) {
|
||||||
|
// About the condition above:
|
||||||
|
// - having non-null anchorNode and focusNode are here to guaranty that
|
||||||
|
// we have at least a kind of selection.
|
||||||
|
// - this.#hiddenCopyElement is an invisible element which is impossible
|
||||||
|
// to select manually (its display is none) but ctrl+A will select all
|
||||||
|
// including this element so having it in the selection means that all
|
||||||
|
// has been selected.
|
||||||
|
|
||||||
|
// TODO: if all the pages are rendered we don't need to wait for
|
||||||
|
// getAllText and we could just get text from the Selection object.
|
||||||
|
|
||||||
|
if (this.#getAllTextInProgress) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.#getAllTextInProgress = true;
|
||||||
|
|
||||||
|
// Select all the document.
|
||||||
|
const savedCursor = this.container.style.cursor;
|
||||||
|
this.container.style.cursor = "wait";
|
||||||
|
|
||||||
|
const interruptCopy = ev =>
|
||||||
|
(this.#interruptCopyCondition = ev.key === "Escape");
|
||||||
|
window.addEventListener("keydown", interruptCopy);
|
||||||
|
|
||||||
|
this.getAllText()
|
||||||
|
.then(async text => {
|
||||||
|
if (text !== null) {
|
||||||
|
await navigator.clipboard.writeText(text);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(reason => {
|
||||||
|
console.warn(
|
||||||
|
`Something goes wrong when extracting the text: ${reason.message}`
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
this.#getAllTextInProgress = false;
|
||||||
|
this.#interruptCopyCondition = false;
|
||||||
|
window.removeEventListener("keydown", interruptCopy);
|
||||||
|
this.container.style.cursor = savedCursor;
|
||||||
|
});
|
||||||
|
|
||||||
|
event.preventDefault();
|
||||||
|
event.stopPropagation();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {PDFDocumentProxy} pdfDocument
|
* @param {PDFDocumentProxy} pdfDocument
|
||||||
*/
|
*/
|
||||||
@ -805,6 +899,10 @@ class PDFViewer {
|
|||||||
this.findController?.setDocument(pdfDocument); // Enable searching.
|
this.findController?.setDocument(pdfDocument); // Enable searching.
|
||||||
this._scriptingManager?.setDocument(pdfDocument); // Enable scripting.
|
this._scriptingManager?.setDocument(pdfDocument); // Enable scripting.
|
||||||
|
|
||||||
|
if (this.#hiddenCopyElement) {
|
||||||
|
document.addEventListener("copy", this.#copyCallbackBound);
|
||||||
|
}
|
||||||
|
|
||||||
if (this.#annotationEditorUIManager) {
|
if (this.#annotationEditorUIManager) {
|
||||||
// Ensure that the Editor buttons, in the toolbar, are updated.
|
// Ensure that the Editor buttons, in the toolbar, are updated.
|
||||||
this.eventBus.dispatch("annotationeditormodechanged", {
|
this.eventBus.dispatch("annotationeditormodechanged", {
|
||||||
@ -949,6 +1047,8 @@ class PDFViewer {
|
|||||||
this.viewer.removeAttribute("lang");
|
this.viewer.removeAttribute("lang");
|
||||||
// Reset all PDF document permissions.
|
// Reset all PDF document permissions.
|
||||||
this.viewer.classList.remove(ENABLE_PERMISSIONS_CLASS);
|
this.viewer.classList.remove(ENABLE_PERMISSIONS_CLASS);
|
||||||
|
|
||||||
|
document.removeEventListener("copy", this.#copyCallbackBound);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ensurePageViewVisible() {
|
#ensurePageViewVisible() {
|
||||||
|
@ -82,6 +82,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
|||||||
<div id="mainContainer">
|
<div id="mainContainer">
|
||||||
|
|
||||||
<div id="viewerContainer" tabindex="0">
|
<div id="viewerContainer" tabindex="0">
|
||||||
|
<div id="hiddenCopyElement"></div>
|
||||||
<div id="viewer" class="pdfViewer"></div>
|
<div id="viewer" class="pdfViewer"></div>
|
||||||
</div>
|
</div>
|
||||||
</div> <!-- mainContainer -->
|
</div> <!-- mainContainer -->
|
||||||
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
|||||||
appContainer: document.body,
|
appContainer: document.body,
|
||||||
mainContainer,
|
mainContainer,
|
||||||
viewerContainer: document.getElementById("viewer"),
|
viewerContainer: document.getElementById("viewer"),
|
||||||
|
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||||
toolbar: {
|
toolbar: {
|
||||||
mainContainer,
|
mainContainer,
|
||||||
container: document.getElementById("floatingToolbar"),
|
container: document.getElementById("floatingToolbar"),
|
||||||
|
@ -385,6 +385,7 @@ See https://github.com/adobe-type-tools/cmap-resources
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="viewerContainer" tabindex="0">
|
<div id="viewerContainer" tabindex="0">
|
||||||
|
<div id="hiddenCopyElement"></div>
|
||||||
<div id="viewer" class="pdfViewer"></div>
|
<div id="viewer" class="pdfViewer"></div>
|
||||||
</div>
|
</div>
|
||||||
</div> <!-- mainContainer -->
|
</div> <!-- mainContainer -->
|
||||||
|
@ -41,6 +41,7 @@ function getViewerConfiguration() {
|
|||||||
appContainer: document.body,
|
appContainer: document.body,
|
||||||
mainContainer: document.getElementById("viewerContainer"),
|
mainContainer: document.getElementById("viewerContainer"),
|
||||||
viewerContainer: document.getElementById("viewer"),
|
viewerContainer: document.getElementById("viewer"),
|
||||||
|
hiddenCopyElement: document.getElementById("hiddenCopyElement"),
|
||||||
toolbar: {
|
toolbar: {
|
||||||
container: document.getElementById("toolbarViewer"),
|
container: document.getElementById("toolbarViewer"),
|
||||||
numPages: document.getElementById("numPages"),
|
numPages: document.getElementById("numPages"),
|
||||||
|
Loading…
Reference in New Issue
Block a user