From 7041c62ccfa4ee2c7a3a0ab4a1ef46f6b6a1a378 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 12 Nov 2021 19:41:32 +0100 Subject: [PATCH] Remove non-displayable chars from outline title (#14267) - it aims to fix #14267; - there is nothing about chars in range [0-1F] in the specs but acrobat doesn't display them in any way. --- src/shared/util.js | 8 ++++++-- test/pdfs/.gitignore | 1 + test/pdfs/issue14267.pdf | Bin 0 -> 4428 bytes test/unit/api_spec.js | 13 +++++++++++++ test/unit/util_spec.js | 12 ++++++++++++ web/base_tree_viewer.js | 7 ++++++- 6 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue14267.pdf diff --git a/src/shared/util.js b/src/shared/util.js index ba94fec8b..1af94158d 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -566,16 +566,20 @@ class AbortException extends BaseException { } } -const NullCharactersRegExp = /\x00/g; +const NullCharactersRegExp = /\x00+/g; +const InvisibleCharactersRegExp = /[\x01-\x1F]/g; /** * @param {string} str */ -function removeNullCharacters(str) { +function removeNullCharacters(str, replaceInvisible = false) { if (typeof str !== "string") { warn("The argument for removeNullCharacters must be a string."); return str; } + if (replaceInvisible) { + str = str.replace(InvisibleCharactersRegExp, " "); + } return str.replace(NullCharactersRegExp, ""); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 2b4476710..a957af612 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -486,3 +486,4 @@ !pr12828.pdf !secHandler.pdf !rc_annotation.pdf +!issue14267.pdf \ No newline at end of file diff --git a/test/pdfs/issue14267.pdf b/test/pdfs/issue14267.pdf new file mode 100644 index 0000000000000000000000000000000000000000..38d8d844e94dc81152bf8061d1318fcff3642feb GIT binary patch literal 4428 zcmeHK-A?016i!y^MRQTwHyEWt3u>MD@gKI5tSBj6v}8emw6xv57~5m;;&@zVA_9-o z*J!1Eq+a%}>KVsQ3`CV8b?^!M-obSNT71|K{9 zKEh+6c*GS)7Cbyd_++NiBokBQEbdSF72Hs=xVyN^1;PhlpUNvV9Ckz&X(ZM_T-clh zAJ5eU;bW0qscS?%3*onjB7B$>Qw7n$d8!J*Cmj=!GL-|kQ;Ez*TnV%@@)=)=(Lb%bb#LPn9K+g^?nyI+H-r15YGdzv5cMq zQ+c9yKy{Fsb54NCg2<~a{r)DJjb+0@H(d`T>9j*A}v#k zLM&1-5t*8TA6@GQoQ}s!*R|m$leM0CgO3M(@B+e_cu4@%*M~Sh?9wE@(;IcD$u}r` z*#-4{YQh(`s!vECKeAjARAQ9Nb!)R(7xgtg8=z6>$Fi7kHRO4oCSiGEVBoWlSFalO<>L7x8P4<~AB~2ySrYks zCKDo#s9|$&&#+i54DN7h1deCM#1ha5Zkfh--1L2ddz4KT&q7h-h9R+IS40L69m}vh zmjfgZ4C3-QiYYe(%d2r4z71Tc#IMhx4H$diR>AeL?xIFlEn2pG3Ox(w1zsK)<*Q|r zeXhx>&d(JAaG!&0MD8*jTqR-_<48zZ*+B5k^?l%pnD5~0wxb- zjopwn>t&wfXqji$GLPB~QKJ>5i$Fr`fu3|0l26VuwNiyHSbzpS=AaO(c@?Hsr69Jn zYTXBn()5%U@VBlOs~3q%K{{TGG?m}n$|8++b$snHLcb!ag)iV={``wh#bYR&r78M= z@HxUi$qT8CpA_wZZLQ_j7HyLU(zaQ$*3AzKeUn7fRVomA0lARb>%x~w*YOVung1Z? z06=;G4a1=fexVll1+=k<68@hS6=SucKNC|qD?-rtz|wGt@R?Lve?v{H%qm`V7WApq zzdRH1Qs~ String.fromCharCode(x) + "a") + .join(""); + // \x00 is replaced by an empty string. + const expected = + "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a"; + expect(removeNullCharacters(str, /* replaceInvisible */ true)).toEqual( + expected + ); + }); }); describe("ReadableStream", function () { diff --git a/web/base_tree_viewer.js b/web/base_tree_viewer.js index 6e203aea8..7a4f1e030 100644 --- a/web/base_tree_viewer.js +++ b/web/base_tree_viewer.js @@ -59,7 +59,12 @@ class BaseTreeViewer { * @private */ _normalizeTextContent(str) { - return removeNullCharacters(str) || /* en dash = */ "\u2013"; + // Chars in range [0x01-0x1F] will be replaced with a white space + // and 0x00 by "". + return ( + removeNullCharacters(str, /* replaceInvisible */ true) || + /* en dash = */ "\u2013" + ); } /**