From 4736767b76eb7018310873f6b06c1ead13da3cf8 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 16 Apr 2023 08:34:40 +0200 Subject: [PATCH] Remove null chars, i.e. `\u0000`, when getting all text (PR 16286 follow-up) I was playing with the new "copy all text" feature, and stumbled upon one document where the copied text was truncated; see http://mirrors.ctan.org/info/lshort/english/lshort.pdf The problem turns out to be that on [page 83](https://ftp.acc.umu.se/mirror/CTAN/info/lshort/english/lshort.pdf#page=83) the textLayer contains `\u0000` and apparently copying just stops when a null char is encountered. To fix this we can simply use an existing helper function, and with this patch we're able to successfully copy all the text in that document. --- web/pdf_viewer.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/pdf_viewer.js b/web/pdf_viewer.js index 705d0446b..f4dc3f7c4 100644 --- a/web/pdf_viewer.js +++ b/web/pdf_viewer.js @@ -47,6 +47,7 @@ import { MAX_SCALE, MIN_SCALE, PresentationModeState, + removeNullCharacters, RenderingStates, SCROLLBAR_PADDING, scrollIntoView, @@ -671,7 +672,7 @@ class PDFViewer { buffer.push("\n"); } } - texts.push(buffer.join("")); + texts.push(removeNullCharacters(buffer.join(""))); } return texts.join("\n");