From 4736767b76eb7018310873f6b06c1ead13da3cf8 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Sun, 16 Apr 2023 08:34:40 +0200
Subject: [PATCH] Remove null chars, i.e. `\u0000`, when getting all text (PR
 16286 follow-up)

I was playing with the new "copy all text" feature, and stumbled upon one document where the copied text was truncated; see http://mirrors.ctan.org/info/lshort/english/lshort.pdf

The problem turns out to be that on [page 83](https://ftp.acc.umu.se/mirror/CTAN/info/lshort/english/lshort.pdf#page=83) the textLayer contains `\u0000` and apparently copying just stops when a null char is encountered.
To fix this we can simply use an existing helper function, and with this patch we're able to successfully copy all the text in that document.
---
 web/pdf_viewer.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/web/pdf_viewer.js b/web/pdf_viewer.js
index 705d0446b..f4dc3f7c4 100644
--- a/web/pdf_viewer.js
+++ b/web/pdf_viewer.js
@@ -47,6 +47,7 @@ import {
   MAX_SCALE,
   MIN_SCALE,
   PresentationModeState,
+  removeNullCharacters,
   RenderingStates,
   SCROLLBAR_PADDING,
   scrollIntoView,
@@ -671,7 +672,7 @@ class PDFViewer {
           buffer.push("\n");
         }
       }
-      texts.push(buffer.join(""));
+      texts.push(removeNullCharacters(buffer.join("")));
     }
 
     return texts.join("\n");