From ea1995991ba7d268f95a8d02a42e678db714621f Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Tue, 29 Nov 2022 10:46:48 +0100 Subject: [PATCH] Don't add an extra space after a Katakana or a Hiragana at the eol when searching --- test/pdfs/.gitignore | 1 + test/pdfs/issue15759.pdf | Bin 0 -> 14851 bytes test/unit/pdf_find_controller_spec.js | 21 +++++++++++++++++++++ web/pdf_find_controller.js | 6 +++++- 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100755 test/pdfs/issue15759.pdf diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index f5b148498..dd1ed3356 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -557,3 +557,4 @@ !freetext_no_appearance.pdf !issue15690.pdf !bug1802888.pdf +!issue15759.pdf diff --git a/test/pdfs/issue15759.pdf b/test/pdfs/issue15759.pdf new file mode 100755 index 0000000000000000000000000000000000000000..494898d172fdda40faa8c093905aa077fd44a3bf GIT binary patch literal 14851 zcmeHubyQSuw?8G)(jqO=HARvz}s{qfFPbJm=Bc7FDL&N};?y%|&`r8z*HJh%+2Yh#nRSRfEBkQQ$C6jw+H zplEFmGj+1|f|=8T0E)CAE*|8E_VFeuy{rfTYB=Zw%O0k?Ivvv;QD0;oE{%&nm=a3@+2 zSVROMZEfoUa{@@)n!3P#X937b>I;eS@d2f|q#?XKd_Zv?E}%HKq_{MgTY?)XCC0@g z2@(+kfx+C|d|(6#SW*he1>plhB)K8HybwNaFoHl_#1J57Zw~X&M>r1R{&8XfM>vNt zP#vHMv$u4yqUGlMVU62wZt?@v;cyp(O}`O5L104*<^gDUI=}!r>M#p{wmyg!0RaL4 zXBQ`!sU0qs)smT+IlhUBnF-b%LL3p%Ku3yj4-9$_yr3RE|6obs_@?K5T|MuB6!d*v z;mTK*3%7S^yO1a_G4^n=5E%X;<_|QTT}+)^Je*(_xL813ECvQC6=__ozYxa70(1Y3 z?5`lB<>UX;D;{1%D0v}(C$td0|75L+i3!xiB*6>`#TA6E(S^~&jzfuyB5P)=I^6ZX zyK9I~OsG6Ip!M|zmPYeu%;MMHCKWiOKxid08W|G7;@!DlnEe}Ee%*E8))WsA5 z(+`YQTwQDtY35AJ^V_MasU_m~kNi;o9aRYgsV&^{?}P$D{&rCUZtnuKcX399<~I%r zC%A(++(RG8i3lL#%L4)Ekx-`p?J6Pz5t9DUtPXRAyE;K(KT=W}q2otp z0*!&R;NR3B(oxk34t)Z1(FY)Q4?qLv;erS_A~ReN*+~oh-S8it{ABeH&OZ?NMPUBO zG!QrU-=UC=pp4eX>IM=^-quC7BF{HB+fmbsC%ndvzlyR7ku6k>+~GamaR^P1&<(&U zIS~Myw)bM9z~9Cjq%wUcVVxjzjA!})nNoj0nNiOcL@e<-sIggs=5nI zAd;{v+q++d_@8H3zbljdg+x0fHcs<-8VCNJF)5t z%HX5e2fCaq+w=1~ynBsq$KS3O*R+inZ@r&sIh{_BLbqPIecx`Jfy3<|`<+~0KNGpy z@wz-5Su8W&YwQ%Mrx7t8;%(e)lOx45Uk%%7+$?+H=W(!jYJ4k}S&zJKNx1g*+i{SZ zk%&kZjq$bD@!RXS^E>(~*KKF)4-MKq+B(`jk2e-G=MPCMjYM*mZ|Y+ljvM3lMx~`< z!!q6DPVZ^o^7WKG)FUxA?0g*b{qd-b1kG~Pm#EffOK_I7J-FYJ+57QMrnm|n-}9AJ z8N3-Se3Nw|w@SaIx1+lp$2EogjRuD$wj_!6c6)gf&LlN%83GnVwO-NQEY4-tPmKh{ z58>m=3wfS!LM|adJ!MSxq>*%@uv7~M$0vSY8;PL`xTdr%*mz8$*py9|`c?DX98bu+ zt6$vLvK{R(9ii>S5fh@ zP8f=0^mKDR`YThc5NfZ1W`Zw?k<~NsjN$Eh&ZZLN_uXGqr+8(z+tur0FKi zTt%~47GpPkjXL76;yaFNnoK%%E=4bOn^9WO$JrwuY}uHtC*VZ4ua|Iq&rdJR58>*S z3z$H&cAF&4rDcMhaD6Zb6}@F2n! znKHKQzRs?zNXA5UWy?%3h!)cH{6&NbRqa-E?3z|E zQ$(m0PXe7BfKve22UsBPePmJjE^`32`%x%?rWVulna~tsUe{h?IU7np;}R0t3~qbf zUH{p-uVS?fVy@-U*L+3PPV3F1_hJulZDc$-54Ez_qluYYHeP1vojQUuE_LW*Y z!`ZFwqEvlB5*?Q=wrq;jjV|XSRKdKp13=$WFm=pqoBpV=^14Se841XeyLbLxLmJF? zkI?iBwHn&_O&76*kr=+2dYR(G@JMMg(h8?eQ}H)f7U}BFMYDXN6tNh@xk-Q*Q+E|Y z9&i?6cD2Nc__xiZ1QUyi&!Xu(D##d>8bO&@((5)3P7~F@c~gm0C_|Gl+o#0dPoV1N ztq5lyK&p_u3`bhrU5^S07k2RAJc?MOYw#~tOy9?Us5R!oChkvYA3&fmnP4d8&siEu z_x3wUw9gqkCQ7GXv#V$DTClfb&2tQ?T{b!Hec=O4iW4!;r`hwi-|APp2Z#$*$TIGa zQ{NHk4{Sr-Vseu6 z+^vVw;Mv!)bi=ntn%{9WDMXh9wRJHHzEWwaNvTaGHKbPmqR*Prf=m7TnAs5176b{Y6+n1hP@R6$Fci}yXH-H2>y z?b((BG}7?0;tG@&$USwh>9J!_a9*x?bEc7q#o>29=(u>a)BtgR?Z^hPLnw#DpgE-o zuB=;e*y#2|j}+Tsu1kQgx`4yvo8C7?uy-jZyjPy@75d$c&Ky1h>M!7gRFqnIRV;p~ zTFG+bS=ynUMJhxE`u6Bms*c(TJM1jSFh+8 z%!3^rmo5`$IB|XQNKWGRGqo)FxbR25^`u^c?h>E+9Ynpgd1w7?Kcc9?Da%WedZ7Ut z=PK~Bvb-&5y8nW1G&wWQO3P`9B|B_2>~kV|{8>KD`_ej%v5{m})n&ubdyQo#I-S|` z<2h5V!h?|Z)ppploM^JZtaqLLgT^G{O&Oalo&>nl!#K{44WH0&3$V?NiUhkNS@>v8 zJHr_45Le?wVg9RIIxAkpf_fVDbxL1-#nnPrh=^`173Y@nwm5kosKl#gr`jTPm2W;jY&C6iT(ZVsq=+HIaeIy2Z^mSt+u?3*q2p9tC1jn#{-mOL zr95hxs;D?F_qg(9MX0D#CPj;WOj{0#1XN`=%MJ=;Ph2zOL~$0fLUfG6LAZu6ztx%&Y-8);*?_~;4@|=V4dtM>gm3h zh*f4iQb9)}n;Z-^d9|{)6{)I<*SaHgxikD(np%evHS&w0VYs#;sL@VS^yzc=ivpEV z7c>>`{6da_jwPSHlG{Ur1{_g4_c3m*l4!vAsZX~8H_17^0J?hWCWQ?r-u8C2{|CM} z98kng{RMM<-9_=0+`Xo4v(b=~CvYuoU&XJlKJn_uK!qhSGqU#X=d3V=S&e<^k5b8J z#2W92zi*@8Bei|kjGV>-ru1GL<#qlCQR28$-HCi{XGqD7fiP{FJTWzBPGbFVd{#?1hLcxYkTIZzI!GNw zfu76ytTB4BP37|C%Njvt^s=a;?n&!|C{5M&o=ef)p%V@=>~whpUjfy*#q;0+*1fub zeo_H9T331ppR@Z)F=aRYo*s)eUQx3vJ(`*hierzW|q_(%lkAI){r(#XhA$jKT%!KSNG@V%Ouw>$_kUvpH$NB*c;ag2@}usS zkdypTqy4DCff7I4W~*+any5{#e1#!7BF=EFcP4VxQ9td*8sx98O-xb-C8;-3m6W89 zM-odAMF}87dAJ~f(wrDiL57bkCWau$W3Pld)Vhn;hri-|i)h*^^(8J_u=JX_7#ms~lQ5PCx}o)Y&c*3miga5fLd^&!Z+ z`v|AIE@d0@;1D0jWedrN{4*1Fenk<%CJS2yC;9_Y;%<-mL!&rnUU&uMI4QX05)oadjshR;@FnRngLr) z_5MS1gR2g8w~w>J+l9oG8Czwqu)aL1^k-XzwWG+HV}Gm?8C_FjP9mu#xv$mqTtXI{ z5KByFRzPemYAZ_tj`WEfrc=2F-8x*%u%(iKV|B5cXJ zaFlfZaq+WQhWIj#zpYG=tzfuDUxXz(~0@`vwK3w4~af0EK* zY*`4ZO7U?{awjm{{q&uumfX=Q3%66|nz$XA-m9VGO)nI?oF?yVS?3Dp03So;w*Q7sSo!fT>D zAeNR%H8I*7GW^bLkLEk*mhojvp2U*2p6K}xDT2N_7{`1R|47eOZHJsoi=|<*D3Bs9u9s{-dnM&u=JZUdZA+EYZj71gwrbW0w&-8Psp;<4??ESS`lM zS!9ly)$7z8K-c6pzRyLncGp$kh4~?ikNP0Lt#_cxUS@4{&#b}=)XWs47jlTf7|y$f{9|?5yF>Px zr%T+=wwl%o?&)MR3VzVD717t6E>U(B% zE+%x|v`g+iI}yP~lpu4qpu%$*XYM_CoG)?J(t=;XHxeKD)({teHh)esEc~Jonu~w; z;c_~l*^_7VoHCs?q5LFF5PEUrb2>`-@xVmSXk3cb5YL*zzK#N5!0i0FF<$reqZb&v z&z3I=>=wjm?7!9US8JtLPNhy80;f?4W!{A4YjkylcoZ%{tvtyWvY(8LR&AH%xS1&- z+x1BceinH;G(aRDG99cXH>uA~j%~mFba*_m#YdqO6jsUCV&q7yG6;;RkxTDp{X*z6 zD9%QC&3(FWGp5L4oahV3-J|+6zJn5-ZaL-2H@w_8hqH_fVtOM|y`o^KEx4Rh$h7f2 zJj0x_*5nI`==EgcV-z#5a%s1qq!@wGW{0AYb*ri^LY)CaRJqRZTWRC!z^mu;9}C|$ zq?6NCe}cf5@JX`L&o+e}4p~-xXAY^nYG!?nVh3OwS59yWy{tTZaO1DLa$s2;>x!gI|Td;`uTy;8P0H7H1__)--dlTYH&qi&8m9LroAQ z_PKMSOIOs_b}EKotwk&!dq=@W{sZA=lt}y=T1rA-7Y2ScsSXlR_~&;kDWfUMH+3BGjaY@5^q#E{2|@9$doznZj;VU$kW;xHK5?)<1^Jdjp3@% z64{3~wJI|tpDeKzQx)EQ#~L|LAxs3<_xMTihw4xJDz%RYy^9WYj2Lif&Z}&~Su<26 zS0Z(edY*Q)f zD#8wd^(LbonhK@)^S&P)gFmQRQ}&i$)r9FwN@N(cYfO$4W4l z4h4>DY7%OxU&|wtCBB2Q%=ctI^B&$ZzA-S}+NJjsCVds3Ee_vxEbSKre?O{NH%>7d z$H_5yBKh?6+d=cqV;n`6jZ{pf^iZa56l{1<8geeho!+1~or_=3*mCco3^2*L^Fb74 z4~D(j)EbPcKCC9rwON4$awD@1QX@8DHePkIA<-#I5M6;-$P2a$>C7YOx-^xKVd~e` zr^?PEH_8THJ`21|T~8Cvh5Q_yOpGM-$QMhsR^_*3-7KjM&FWb7_qEEVcONlRcO{<^ zauN=%l+L~oq`0UwgSI$Ai*iI@jysDwGOt(6A!-aINm2vT_DW%M6}xtA<&a5V#s-U^ zfk@Zvwxm7)CtKyiv|BB9D%D)n25B+szN&TRT8tNd2U;RNL>ExtiP3ua$QK8KZ22;5 z=eVG0UjOB19+ellj1!R9P4hZUJpJz1j+3(u3nL4j&K1Y@Q)>Jntv1+t<47Aq(c)rc zq9S)t=vRi~_n#}t)@F|IV$R0zlX&^lI?{1AO{Akz`TpxE8@*{E z4JVH;y?X2oEna(gCASm3Apv8dkNFF+2$IBS7NYy7IRkO;i_VgHY#2-a8Ad9B~D$;nAt0xvKt+aZ`jiRd(n^#;r58B?Lwit-*88bP)njE+O{1|pr zu+`znW8P+$X)?|d7Iw0_yDTWrz0DKjmE75){EBmY`|c6tH%aeki@v}xzx(miS}Zm# z7)^|#Ih6VgOYqI7qC|~GyO6nV?g8Li{-x@xG5h^hKV8}~Nh81v z;e~y`lH`;69vUk_PxU9S1|KyfHybu2OwpZ6lFT~E$Il9?U5TzfczJ|szLmMx)k8I4 zrK{OqO8$6D{bM%ICbNt`jmYPy#g3nowp>S31ick$PKVhGRlVGG|dUQ-td|T3*B3;9qRq$jDK$lrPR_tPnhv8RJww_%z4%FRB>Rvlr%Xs zdJBfas!+5aO z#c6}VF%5V45JXZ&>CBJ&okxlSUhP9i z{gJfp^V$2!M^3{VyHk7vZ%7TtZJMk|8C|-{M;jw{*9=)32&Z3{Ov|@D<(!ADc)tj# z)}eB%df-y&Z?-Rh?;j};rn4T>_>!{Y<08HF-mYoqHga70v-qSarwrXGNqw@rB5k}$ z`nvO~RSc`-eDAvhf)-{L$m6|*vpTczWAM8|a1F?B>rP#0Hoy}ugRYKh&uO2E4RZ%p zBbN+t%Cn3B-6L0U*Wa3oacOy^Z%sa5Te-qnNov$q2}FL`|D|B$sCA|Xb{_a;g`7R> zo_^jr##HbI*~e!@ly@jPPy(6oS1flMOtec&miK6Q)bKz)aP(Kg0idyqW3Bvv5DFi& zo4hw~udEg3fzrejUK)v(`7R;s40sRb=$2r$5euiN+XYZ8=%$FaFuQLU8KzLvR1B1KH$ePlOn z=e)vmBJuGE zFymfn1?fp8u!a(Zs=nu&gQG7;z;{3OBr=E>>`3hs&vt%{7V5d1Xmi<3>TwALNqLsA zwU}p(Ru@fT$#l7ZmDA-_4bGdf=aY~`g*Vp5_va_yGKTg?z! zkJq0#lJb?lK5OQj$z`r&k6r8Qm_$a^wGfzw=uC!_*Vdpt2M0!#QLLdhhLdq4KSIV0 z4U254M)!PYL-rul^S$uMFc2t|VTyx;)jMPu$iBh=BrmKrZRE-Ls*X;yIHEOJG4D~7 z(hkW0l-xM&WO=vl37=j)+pN!-btt6eba!6bTMuzPcWq|O?m&O6rYt}N8el%@$@t>b zViP6@FXh%*H^zjFNZ`_Yk9vrxsEwviQA%q%%iPYUS4=8KlpYvJgl-gEp=y>27z&@f z05!d$sibqPDu;mg*J+ajlH`~LIM^FmJ6oFONMtfyglwvZ8_!Y{ob6?NM-57C`Q|+)5;#>#6}4zOg>LpRo4k^OwBSV225YjWU+jQ{9YFNy>Zy%ip~}QI(zHAh7~Civ9_n6 zv-LhxQ}Lg?e@yjcYY(4hrCD^FL=i?3G(@YFS9N+Y=!x6QW(NyC>KsW+8pI=m2db?b zjzxVc-VVO-nLg&VG8(Iq#`Y=PYpv7|?2~3nYBzLi3Je}`j z!nrQ@Z^w8>6@pL;YFVKzRk?@LC>qFo6iN@jp-iES;p@qXX`*%a9fT22Nt-No-KmKa z$6oGM+dHwNBoa<`ir;5HM?0Qoq`j76$v`D{PL{HJSQrp-zbvMIvx**T+>yICzNkI;k%g2UJ4?c8o`Is6OPIyf5SuD^(D_oy zM~Y0d6w5asDc*miebtWnn-;2PeOnG6+To7|m5Ls~pS#b@t zfBBK>3DqZ{Lm#;Uv5cVpJeA#xc;g^8{%3mJ>r% zcsk#-TAS0$dB`2TbaYTHP4dtuKkP22rO`Jgee`VYwNIAG^A@r5i7?M;Y?IOPBDmw_ zcNE*x&rCpEI(Jb6txmQ}aH_F^GWB!t4H*kM= zx2EfvD&Df6A0#6-&Ivy)bA-c3p$t7gJnl=LN(e)h_ib)N67^T|vwH6=$~zqVCmOz_ zx}ygoe)D?4fwB4DnjnCY6BpGNu;H&YG>n>ziRj)HuRq+}Td=B#!i-K2gMXr4eOb-i z6kLX~;KXpYM2wSmlCm0}j>cP11iQ2RPUEs*C~ie4y5Q4?&EO@Gj2uZxkwCErLJnBR z=;$e{;sX@AZkP1ghXeuiQTtzr+71$QThRfL89^%sk9H4z-NB8V z5u=pVZM4MDPjgF5RGK{G+5A%U;cU#r)!{>T6~hZu-_zlsSW!MF#t9ewNEX*XNE{I8 z{}JnMHOS!+rlIB?D4&2o^U|@ z7mfz@#u25vq%&`4?iAza8u*VlrPqx`uGr7srYtDvGmoL0&{QAmdk?rRS%H8y0#WyT zeQ0^rVx@f(wvi94pnP-T;_=3YpmyL%YAf%pyw_7tIZFjpQrp*LvEuX$+oj?RUWFJw zB^`Qxk~QyiTj9(-ybK(n(m4>j7Aeajf}GShcM^A=Y~q^gaDE76>kwMtQlRat%lRHG z(M*OXw89+O4cGAK7Ry~8Bw@QMESrfMF4d;co_uWrn&RzxhGydv^xOnA%-ci}eKa@S zL(PGI<&E-G>-xL(^RG-p)wla&?29)qu7~N)R74uDj1iCD$z@P%fA#pC?|<|79l{Ix z`8m5m!hB?qC{E~}Z)CGAAW|Sl42_D!tLE<9(i!-PHSj;=i zLpOl!_FFNd|5&Z9dHgb$4&zT0&^*9o^Hx(fh>T%3aOX<Uxhd3bPSy4$am(wOG-$ zGv_2GNMX4(62A)EKaJb(-B+N$y01WBK1A~sp;d z30WF3{yj$;A+^I(c}E&b)q1H`63}WNNps?NW(to&+9tBHbgoT&Pp{&A1O@#OZ>ARB zGL8SjyC1EBf4rFbqf-t71_J+j1Eiy*_`55A_7?t~okEX29894$Fc(@gn5DJ7Fx_5j zCmpS|xiFnJw-QjvK>}uFE$ih3d*Y?60rj$h@|)8=7Qqto5b&^butRhz(t6n0+B*w) z2-BG&+F4-&h~Gc{2GG&|M&e>4OegkZfmTOJl~w}o1f%8S1ad%uKoFwskP`ytv*6`{ zvD1Qqh!<8sJ^&EJ0pb?`0R_PPe|BxsiTr5b6ml}R5Kxzt{zD()OqkBf#l=AY0C0DA z=X8f~!ksJuAbx&+01ylSgER8Zhp7Vq#0doaipd=M7mtIhlkIP&nL`0E zTbSLCZcGFp=$Cv5OqG=WV*RB$JG)|3U_j05|W?GJc}` zFEaj2$zMtN2TT5Fjs1=GzsUJb1Kddi4!0HgJ>$fnPH;0*7g|IwDlIb)7l)a(3kxk6 z%n9K{ILrtB-MOFHev)Vo6|jIi*_pbCm^wJvT0>2LK(K4*_zRTJZ66@R;&KczAfYEFj#F-^@@_`kD71 zFOL}qHw0|T!DV4?%E1ptY#j&};=;wx&1c5T&-16;pLl*sX{w1>mns#cxZ9^!va4{O#ob zp`L#$`Io@`H(dXQ>t90PUn2f@cKsW!e+hwqiTK~y_5TbmtiOv5#JH9)ojanG_(d`E z)BMseCV_r0)&8o~xcGmcvHQ{J_K#CCwHiRxpUSh-QAL(m9xik&^xo|-4bp<`yY#P$ zNatsNOlE!J#;*E>G5>rl>33te{~X!@fkFSw8ZejY@8)pIE3pp@k2%Y+%S}7ffPdV7 zb8*#jaRGTC+%r5t4PKz?OmOiaV8TIhKt*2DL6LJ%MV?&^gLzOpd>jZFR~lrG7}o%T zCmg!N3fJk`AK)qjh_0ufU{}=Ox|1eUB z7?=EqIYJO(l=6R=BSb=Blpa~bQN)Qp^+(&&rG4`rd({Y4KI=WQ;{!5Nw8_V`Z{8u{ Xn;mY=2vK_H&L<_G@+p5K>v literal 0 HcmV?d00001 diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index 235e2a6eb..271a758f0 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -668,4 +668,25 @@ describe("pdf_find_controller", function () { pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], }); }); + + it("performs a search in a text with some Katakana at the end of a line", async function () { + const { eventBus, pdfFindController } = await initPdfFindController( + "issue15759.pdf" + ); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "ソレノイド", + }, + matchesPerPage: [1], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[6]], + pageMatchesLength: [[5]], + }); + }); }); diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 276015d72..3dab4fa89 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -132,7 +132,11 @@ function normalize(text) { "\u3244-\u32bf" + // Circled ideograms/numbers. "\u32d0-\u32fe" + // Circled ideograms. "\uff00-\uffef"; // Halfwidth, fullwidth forms. - const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\p{Ideographic}\\n)|(\\n)`; + + // 3040-309F: Hiragana + // 30A0-30FF: Katakana + const CJK = "(?:\\p{Ideographic}|[\u3040-\u30FF])"; + const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(${CJK}\\n)|(\\n)`; if (syllablePositions.length === 0) { // Most of the syllables belong to Hangul so there are no need