From 6c6f6fb2b89839e8ed5f57dc1d63f64a8686f393 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Sun, 4 Sep 2022 12:47:45 +0200 Subject: [PATCH] Don't replace cr by a white space when the last char on the line is an ideographic char --- test/pdfs/.gitignore | 1 + test/pdfs/issue15340.pdf | Bin 0 -> 13798 bytes test/unit/pdf_find_controller_spec.js | 21 +++++++++++++++++++++ web/pdf_find_controller.js | 17 +++++++++++++---- 4 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 test/pdfs/issue15340.pdf diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 119701c04..d8859cf1d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -538,3 +538,4 @@ !bug1782186.pdf !tracemonkey_a11y.pdf !bug1782564.pdf +!issue15340.pdf diff --git a/test/pdfs/issue15340.pdf b/test/pdfs/issue15340.pdf new file mode 100644 index 0000000000000000000000000000000000000000..439f1602f9c8e853803d77a84dc3524dc08ac6f1 GIT binary patch literal 13798 zcmbWe1ymi`(kL7V?(T9RxVyW%yW7Fx;4Xn6!QDd$?gWS6PLSXdJOp=l2>&57bHBNF zzW=_rUN07>ySjGm>XO}6)wL;AB%~Nwnb;91`?k)v4vH^6P4o>Qume~CATwJ80RaHB zl#RU`&;|T!Z|Vk=0Gfj=fBCju?T1Bod{@X+Ea8fFl&5+5cj1;Nx6Q{IS*qcwpIc*1T zB*<_vAInc+fAv@flBX+)jPNSr+>kg`Lp?dIW==neNvEq_LV|F-);qe8vZ%j1nqD+N z`-8ST_?uhTn2#uV$y4EsvWtYS0JJ=p3TQgSQgg=$CFpPPCMM-LlZ2AqU60W#z%~

?^WAY`H^g9UtOS z@5V1P?x#V=fGM2$T;>KtH+vtMQW!oi>qNGE9|r=LQV8A!hu(J7{z-dB&yKm~y|~7( ztVLU$!N($pLaaWv%C^)wBRngDp+l41BSr=5WbP}$rjA19wFzw?@uQmY!@LS)RzLuo z^;3wQum3Mpj;1AS1fZkEpV|PQy{M4&Z>rSrb^_~8#nkHee;1&m8-NXbTMfXh26P3v zyO;xA0UUn-#6ga3;QOwC7gGQuC;}~POvON60DTrPfRmS#iH!%q!@fzW4WO4Bz!1PJ4zdTis5_aO0|77gCGN@!;QT#_kPz6) z!1MkAW(9D)K*7XcOzcnkJb$CF1m=xd^u;(G!IlYNmH>L#m;=?M#r|cUY+S5t|GNSI z-DtB@JvG!9aKgvix!Wf&$N`j>1U~S9DSjDjDAfqrwMDmgD!GoJ=^*A{k~<`=9^{O4OAQi%LxVukf#_Pdw1kqrm4KXiY!}SPD3KYcyFO-mYtg#I(5XV@ zI`_ zD}8dW;`)0ph4IUJq*xZ*x1T|&k`R=-4jI-7N0mfJN4j&{)_0u?Ya{{cPl8{49yHIW z4#z)=Mt{^@vp=k0ZpV3bO`(@XT-TAJ`KsOlMr3QfLiq&n>Kyu2{pyDck}!Lt#J-dv~{vurApf|GE>_Ew_qT(WXiP2Fcl(cb+ zryI2AjV@lBZ$rHdA9A{I&q8-KRdTtv zt*FrVR|ul4-yuBS**d+N3G$hMQVQmBf-!*7?m~7#M0zVNf)Lg%&jPU;WCIs`iiVvh zS{9E}fSMsTheoIe=PfR(!h`j?UpyinEm7odf~o|M?p;}~xQgKA8#jU$IKy}MpI|2d z957E1{vqh;kkh8f-(NMpU8v?Kf|KqU`61zj{Q(xYYiV7s8Ak}EsRwtR@bnc!sBupc zoXA(W*OO8@aD->y?MGnB25;aj+C7Ff_OIeOcQfxtq zSMH05w}`iRp&D|DConrE6Z zQx3H;S)kmnqQ_=P)ozh)^`QJ}tz4<~`X0ehS;@@26tnkcnPw4Y?|yRjt5UWl1|>FO zD3^-tRZmeCk`aha7kW=M*DKcZEdm!=h^4s;9;TD_`Y%a$+b`jPltBG0nk}L&f~~q? zHJBb2lUsH*{KDv@=+@}C3y$qC;~V2kT%T=>p4(_s?&uup{8GqMEK?v-;>r{>L96 z1uM-<>U6B~mFmVY4HKF4xkZ}{n$$hTl;xRqn2kTHp5zLt=2YesbV_&X2M%{l02WQ6 zM6bdEd|+R&$}j>wixl#LluRR2D+s$tSH zUO$dxw{1Wk3NUKmG6QJPt6bXsPd=@(ouuS#YsB^ z`k_Xn$@>yfHhmJt-FA6N(Uu5yI^7=qUGRzBPSeBd&=N8m_Qfv4_;?4m5PRyC%E$}e`DlMcoWKtrn&oIJVwn`RTb^}sbwBd() zt?_x#W@&PKay~74>163Qn~?h3`e`vr)5vOiLv}&xcIIA=d=~MRVVl`wP0T!z$pi%& zS6$22^w?htS2EEQS(NW|5$kMgSFNOhIm4=>=-Zc@nVXIy#6uTD4*{H(oSyDh9Ui~J z9=kb@?NBO1=c){rz9@MbcDJlwbsWSw032i%PDi9hVt|Bej*+V~s-IM=J4~$WPPPZc zzKE%=w=<6EquBS`W7{9CSk_hAE~SwhOP!ZM)oOTj|;2 z{et}_I=-`qwK}OrTNY^hrnrwnAiQVLL^v3ZGm5JRO~869r{g|*DHF*5Aya~ zI;{eY10{}R&JxC@L!}D`d*a+@`Df<2OSqG{dSVg>YBrn)K~F!f#C%foSioiDtKhq~_jWp)c#N1)IP2N( zGWt3rvu@Z*emr3e`$HCQj%UER*g)u6R0M7~pSGJ&Oa10lc2#xN>PS%9XQQ((E~QKD z509E@!7FH2lLCp zguO%MTji9T7-3I8_{VcEDhJE$wVRNOhkr)dF9GaJ+zXC%WyQtCOkIH%fR|WR4Pf|J z=*s#MIR6zJ{}G9+ySccVyJ@%pfodR-8-Q6J!25g5|HQj5*ZGWIL|^J%4#gN!-CAWCe@P#=`YQ zdcUP+_;+djJ+t|PKXFqxQ+trr9|@J~UzwDmsf(Sgqb2Bf67zBawgj-6fnZu-h5eht z5&l%z|EQb$Pg(xO0f1RYR}T#PBhdn%{L7kNa-)Bj`tQt%S>4^t?RS>1o;s^n@8&ZtZs-Cjn4pP2P`24;xB{9?L)P!fGP#OPJPgQZLz zt<=RC)qqy+_NFf2Ob(>!X!E-e!1_B!`@5X}&e)$-0!}Wj|8o{*n7kCZ#DW%f@`Tm0 zY{ZaIk%T-@ZBM(I2b*~f;pW1^5$oKB>;3SHeb_NK%rAcJIa=yX4C?2>@j>8f|N5YZ zVsUMhjnTQ5<9rh%Hu{LS5y5PvK=IRAT!S0DovX;{Ucl1GR`_Er}$VFsJ;H51O24e|F`L4@Tcy1O!UW z2oHXK(1W5A3n`;4v}0=GET|3LGRPGDi674FHBiKG*Ezq9j$Q28I_E)DFy}~ApKL1u zQ%RkObS@&tupoh@+6X>}58Y_TZkOsrHHNKEF%1JR!n&Im!YR%C_*nst(E_C!gtt4Hm^jZH|mc zlSa&YgtWR{jY3VGv;s6)@>xl`;%5b7C z>MG9ui#2J_VTmR7k?eA`4*#~}|LPawvJx-${il2U zUzj@rGkA!)x2qe_;a`3Y_5>SnnZ+AGBML6E0BHft$}Sc_mlr>y`ODA1Ky`N~C-6f3 zvOKfA_z1H$fc3@2!E3Yx_>_eO?4U2qytt!_jguS51;F~pTjPJ0gx>>Ro=E?Hoc}k{ zKcDh%WPX!XGIao#MgEH)gG+i=FBPBP*ka)74uU=-HyePF4gA_*Wo6?6@N%+&3pTRg zb#mzKnE=V zH~5CmpJ1++M}7gz0Jy;Oih5XS+gN}LiGRpK(bVhDOZLA+1Fk*-*%4U(^S$^R@RtPs zwdVa@x8nb*0&#PJW%$3AF~L&)vsmT@k>YJ7qA@?$2=~!7y07GH7oSB(Mq}oCja0&_ zC{)2axcB*P1xW30?0pi9!gAlV%ciaJH*%Oq6PQ=gQh9!B9&B3+dRV!6c>4C3&e@vX zpuI3&H=m$G+2E5B5)u-9el87smaNLE3v17+tJzOW`|!cv;IKW_mov1s$3aF<={6%o z#gTKBnvs`*(H#!fAql}N5cq9vmtFR3f8_wPpeeAC&VPy3=FWjTAZfM1;o)Q|2b6TW z+^i_3dmrUc9&m3lO8^gV)*cyAUYq}*N$ns_-K4*)FQDm(`Vn{j?!J=Pml(@~NYL-{ z6ovC+R(n}%c}{!UbT{!Z6BEDIeY>X@uQ$K<9Um{rJFQ&xRW)iL~{H%iA%1*EvL{YIgxV4EPWLDkUg(X02R0iN{ecK58s zXD@#WB*bL6c0KextR&`gpg4<92z!dK$r?o2NWP6`MZEJxVW)8^XW^^=wUG7G13)E= zxW3Z=oo{ap46G7zQzHG3>w31;21j>p+w4r@6TTGnk={fT+@oj%vGz8>c5%Z5c9$_1 z#tWu)O=7*ceN|1Zj87WzhN1g38l-All&4tDmb`C<70TeSCAMI;V2WV@#+$hKXjnv8 zqfwXU&!F5=2B!O<}1c!*B*IlF@>Qk3s*L-yWDg!Pz{&O&=nZ?&*aBTONc~TH8)c|cm#p5 zr#q8gp4PQ=Nyx3DLf9U|O!3XHm8SO5ar2Z0EiNrD3vaT=Ey$TA@xlr1`b!6C^C>tj znM~=N8CWH_b+~ob;7RKTqmfe$&dvBt6zRWm^(3?{w1s}?=Pp&A#$gxR!D~}}gg!Fa z2RV|t{=lVYN%al3R1at0z)4pT|7H!;b+Aa-{&1AOVH`cIqG%+p*s4U_TSn_glUTfk z-ti+c|1wG=LlrJ~It2vG`Q*G7OMj$&mzIFS=cK6EfB%7U^)Re~95DsIIT?lD))=Xi z;K(o3MDTtR*li*bcxvDO5nH)qjiR>T?AuZ^%`4_F7PYb3v~MwatGVT;hx*PV*MaWJ zlx{f0721Xzfz}9#$JP8y;VV5wo3APPDq$F_-dU5xV8?RCtRC65&rg3>UDHqr6`{|L z$kMH=s7oyg5+Y6WqS55MV543kA8gz$Gp1lBXMqeXD)L4+lD3_-#9Xc+?1Xw1vzoW2 z{@jdt^%h?9J7FOps`AFIZ8;XF*zzsD?(pDigZG1xDj0r0d>`=qDG052r@>A)YuJ z$d&xK!}x>7PeD*pFx@-QfHk=DVQBhTIxNG2Gp+jRU(tC}+Dfo!sbXb!_g6T`mWCgD zt*h5n}XR;o}n! z)O~mZNqf1}P+V4ryNx&STnVUHXb(!RgU@q3Ug^A*WSF0Cc-=x%HM9UC7-RMznX*1=5 zk{fd_BN?r)9UBHFepcd+IamO1s84q;i4(b|%B3}6_uucH=J#dvaM#nXVn@gQK&;pUMzT!S23941xlX*{ zuCQYKoc!8YE9fw^0(+z>>elVUfo zweQ$ERsFA2PxDMYb|)=1)GuLub{@0$-`li$EPT049@{+b6H3tt3m4p-sB1%9-G@jO z@a;TUR*i%11m(+{$(ccKIJknVy)=F@ej$yX)mIJiR}vkbkda=G@8!6D2H4(iuluQ) zxQNJ!?By=9JM7atXEQB|pG5WB{bV6dLq!<%CVzFJF17PhnJgCO?puC{AjB^3@6ZeKNi=Ls=fJJc7(Stb)Kly%X2{M z4DTwIo23F!y0;VVWeCC5EV$+^2uABy4HPIvOm8JHWGG)}$khyzVZYNJ99~m8mf4dh z4ylX~q3TdnO4sM=pi##XGyVauWO*d*g&p2-TG`9B$hBw{&1yPZG}09olv8w1vJ$BF zi`p!SxFlX!AV>XYEfn9aT)Jj~O|xkY#xmM6dURYwVwwu-Fok&>uaPi?}=8x>wt^ps4 zt_L2jFr7HicW2g4`bI14cPXQuJX#EpnX$F|e{Ag1@O^Y`ldI8a(8@Oq&!i@@bR1jJ zbMhy7d~**WIO45!)n-v+S>>InMy;dm*Q?|>S2c%LqD3;IDG|9VG2z@~zG}-vfTDM} z*;u;xrM7?A>Wgq5>7=r#aEsuD!OtKzlN3D@32U=6)*^?jk5@pvEoRcvWF1sf{>`x}mGm2E8RrHd}5_p&p3|fnh zGT8_9Ura#cn5~n;nPSe73A__q-eO2X;Fw>gmMSlkY7`gZdO$R=tVWzL;dzH_`i^e( zEiGHSKm%K$)CCHr-;bi&(7kHGH#GDe`V$K9uLFzwpGOg&RjEa9dI5r3hU^`NGsV}r zscrapyL49J_866k=Nx$7>+BUw`Aa6M5~@_0RFj>4)Ht(eyz8_b3@7VSuNRJ-KW4jl=5p>wYWm_eBWlyHCoBQBBuTf*hSV&!~F@2Lu}gS)a$= zYOHo5uAR~P`H<8PmDQKk@4IpJ8o`4myC}VM)(KW^BO~OikNND8L&s@9(2W%`hY^Gk zgs3}_)Sq%;*aaaY!|L3HBGlL0*FrqbWk5QOyCd z-}NghRJIY67;?KM#Mr1Db@YJt9^1ySgklyMTZeqz?S@=ELG`2RN7?nN2riWtHuyT8 z48N88YLHW6u&YE|_q!3$A&o(Z74=eA{Lu-;Z2~2FVM2;IQqjS+`(Ct~3R}e1`z~9f zLQrTBY0^%bFa2OtkLP!kZX)chvLKn}SnB$k@nP>+i5~3_s`+dXDsU7UENo z)0(bCC?wZbNqxR+J5#)+^+~v*I%|?2LZZN(PJ1mEjmW%B;N(;L=CQ}W*N^9>a(`gN z3Ne+(32)HkBf;sZ!*T)Bqd&=7*pf-p+amzX@^&>L9-X;3gK_~E6*qO`z6A@b?-Pnk z9IV1U>b;w~4$0W$^P!$kr(LK^)LbNtwYPoM>uoPalqsynZ?%mEh}Y`he`OOyVD@#? zr>aD9CQ3V1w|;ojlIcas8t2X%j%Ng~E9A%aPT0G`$B04lC*jY!OBLYku7@??!1dtV z7O@1DBF$554n~%DB%HhHNc_=m1M<0YxDr27~}VCmtQ-b@eg)a zC~n#!!V&{Q`PcQ(~})IJm=oF>h*D6 z0foMI*T$uw$Q(|m=KM$$e$aXhE74i;rbES1u&2>7`|)t}lhNykq^?Y#HjEKa=sAbM z*N}~cYHHze_Hp__yMh#F5^m#($K#*wQa^@0H(E`}@h}5pFUACV$d0_FJ6Z#lLzjgdCTd-NO}ol0A#XF8j=h?F50WSQ1E(!Tx- zYh81nsA3q}M~$(o^QfAh_Y4$PuZl@AAEdOb&IgLH43UlmBq8Z9Txji*b;xj2r>+js zeWjB-k{-W;LTVw}!8;({!6X@o%0GM`-?w{dfPX75&LEnp>4h?KRq;VCeBqrK>SJdQ zli3zJ8N75K#)cR?Ptbu(&U==RA6(3;&N4v}9s#n09$sW0B!O?}Ey0r0cpJL2Nj zrC%TKTn@|)6xR9=I_l42B6!Oj3eOk{LdI@Iqe$hBM!Ns?* zXL{r$#+yIiEqbl^%Wsn%+J;=fo5*$tZQ-FGp&?K6S7)M>)ac@u&{llt7vQ45Dz5(3 zsVRF1@F?aqh2Kf~nALn|=gZAlfj8HKz?F>YAANHVk!>WsKomQQGnUIBs@BE%vTvunh2+K#>C{e9 zw}GcovfPgz)B~G!iu5Sz8eUp1oVlq^jI**Dy@W6B(rh>?)>-dXreKR4fo>88@- zFMA26+OI^{NXl#)T;_M;Q=L>l=!BRspd`mQH){GgV|aNOEtQ>~7ui$xxVV+g0LN+V z&Lu{KeHlwCZ0w(!5y*1uKSD33zzDCMq^X3AQ+fX_Ke|6&G6zWQj@$Tc!y&NzurIQ} z+A9GEgxQ(Z3TxBo+T#foKoOfOiZNA9VTlMotOg=;IH@h5xgsn$GNr7c5HOyb>X`Wm z;#MDqnz@~6;a2&?6LArt5C9`=_$gteIj2T-a^9%X)4@CcQz(YfcQz>v-eg0@nj9Ka z$TI)qmU(p(jtJifB+(W?dvf+h<*w71MhfTPIa0vG`B(_^IukPK`qg(u*yZXwf%M)x zT0fGxcRg5Ix4KV1$jBQg8aNu3Pf;-4<`l=r%4R=Mib`riyK6+9!V$Z_8D;!BJ(%G; zJy$myvRCO~OJ~PeG2$?E_(|s&vzWk+kQfac4Q-tykiMHSAuAp9wQp9V58h%)MN>&s zSJZo;39eDqOwtf%v5)EY^%Dk2P@;gkpm3y%jtwVkqSIu}^z;cIMBUaa=C=un&O90D zolTZkt}*e&@RmN6>TZ@Y<}lT#8YaW2Z}I)-q!FHB{f`_o4xOVEl+=`z)aqWz(Sspt z5Y$_a`b7aPn-9@VSZHE~1R%0o&c{Xa)Hp{nKO+Cn4=rj8GZn?CJ##r{T9DC&C_Mu? zXHt+S;V~SmUU*y`2HnZiJtx-oPzGu%wxqbA({dII6aAr1{g2UD&^YG^k0`q)86!Fi z9b<2UT!{hf=b!uSJNX0fg>?3zgsOsw6%MZOS=v!yciwtsVkP6fhm6;}ReTod1jN11 za&C(E6~H}f$Y9e*>{9+9<1o&+m6o>Smb=g#R&qbFxSZUU{5XRWNz!aX<#T{N3%p>O;gEO{OMiuUO?|f7%KGDEkz7rL{Hgr3x7p-o)C4P zU86~{aBe(Y`}X)%+n21lJ%wat!W|Jc(#VJu`(iC=A2AJp4` z0)sttt8&*wQ%WrIEDH6Cu0tNbajlydS5+5gxCV&n-l9~g zA_83u>b~fX9nugD;~-x~g^-<{Yha?b36 zQ9j{B0y5+0LLd1ma*^zh++Hc|my!6Z=d2BS+5iRxYN|*T-=h>p){3{cw8mIECVXW> zL;~Gml)jOMV|acWDH$gA9LDdS$IF4n0VBmKtu)6ajQs=7llylskwLarA3C~E&$@!l z`$5wMJEeBUdL!dG75s4=2_Ov>YYCYG?a_v2C#*!gbQ%>?988~DI=M)M6OH`F@BIgB zyF#ER?Wb`~<`Qt1RgWuAL>N;WjAu6iWJ{5a7GMD;u}hC9wyHm@b3@R{(&_>#-)+2l z&C;x-wy7b1ttF|3v%#bK-09$q5RN!k#Mn!;;)*OyWIZ9FoAu7VSx%#)M6UwngDH~_ z-YVOs>M3IU$iiu_{C(?B>R?zP1GTYNr=nt;MCQdPdP$krTZ!&<(_gB7hEg0+ zF?+|k;!!)DTz&(0D2>Rqnldc;MSW?rOAF-*TBJgEkb$}FCzlKD_l8oV2WKGC@ir2n zs~^j*nh3tVuzL7hhCzm=PU*prJ%~BA-Tb-|#<6W=lg@gt<%T8%A*B7sj56vn>OM5o zn}sP~7=z z4P$hE!yAcgjTx?5=r~KIIHodv1s^4ueUjRaBqzPMUN)S2G$GD~HG?~ct1e$q7gx8& z8Hi93l->(vZEb?gkPpGe6JLK_hft4XSt_RQXlG;x5sJQtjrEgiC#DTUFbxE)3~!Ca zIqmzEN5BcbwN7bm&TixpN$ri3+N^AEHGw0D<-8k>b^Jb8l%js18oq3jT z{%9!I(Xsq%RAbLxJ)y6vF<$H$+?uBaKPYFZ8s~nchPXKswiD6_#$N)?b}l7xP2YSg_ND@8DeForBON{TGs zex&|*ADmhd3&C*Fi~gyCU@$#fac8^o*QXy)A*QXyCO1$4;-hXTs5hf1q&@*8b9>w| z)$?5oy=w*mqI;Np2(XHT}pj&CtWyptg^98&b_S7|2gVQoYF$ZZc8n+ImnAM#wC5lFLw&0m!j1j;lw zEobz#McI8sf7)W;dEybuL#6%hjS(4sitBm;v$0sKw4qq8-euYZQW!_ zOUh;0uVSU}HZ&>0Pf(Gv@0Fs*Ch0lQbu#!3+e)`IO}iA*VpfQ)l_D(km>a&VM4gDa zKLvNj5aRWV1o9i}GH;H7n_Fz{jw%VcKo-jtRmNZT@g|6tu@{v^k2lfDOQ@uvrS(;)067kuo}OrGv)kwuSQO@D4@(o^%R*hHWPh2-X32(vjZb}J83(6 zzu!6P3%;vhOM}Dh-J5vU5i3Xo$Q4MzWpmK#_5gydU07x(C%Bnz<7(c`ua~_t{dzNX zbHHLFUz>5Iw~irw?-Ko*Q(4VPO-0Sb3I~&o1YB|GU!{n$$@+nGg7n!MA|okhL0< zZFVX)n5m?9NegTsPaI61E#b}i_78q_Vw8GWlx6eRctn~4`h}wk<7G2id*HC&72DVZO;5&-ACPbiq;?| zF5aFK5P_nGFmX}0yUzO9-7~aIIZXtE+dO3n0V&_%oIq0Cn$CU5|6^qYT{o@zCQiGkob$$82npO+q8_76!Wh)VwCx>y0n{wEFh~R2t{cl)8l0k9fyf@lu+O`7RTRGXPONBPCHIE$RI??c`K*G?|1X1lZ^pz2>wc z5bGo#)EnEOVSb#2_fp8CB3cuj!-*xs`2`}P_&|wPU_l|ZLy6MyyboEWBoNbZ`iem& z#MmZk44IqY^@b}Q8m6dbgMp`3ILk^@y3HBSWLF&hF0yi5zc)t7?_IR8$kJPRSP!z$ ztUy!3FWKo2Ia2c|hjLZM0a?n3P4XvFq|YZ04`B}cbB%1Pga_@YWU@2UBxpb$}Mo#nlGn2=19;;$&h47|@8h+t^zG*jS}`*bQg_s_v#P zZax6;R)*huBfQ*b!JPr%eJ?Lx?ZGc_aF-F7=5GL)h{Vf|1R4oGHWoHE7G4$(Ru*

$JpZJ}1>P0(pLiVq8;t#* z^fwis4+ PHdbB)N=iv(DTMzIN>ses literal 0 HcmV?d00001 diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index 86e6281ac..f50ee7840 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -626,4 +626,25 @@ describe("pdf_find_controller", function () { pageMatchesLength: [[8]], }); }); + + it("performs a search in a text containing an ideographic at the end of a line", async function () { + const { eventBus, pdfFindController } = await initPdfFindController( + "issue15340.pdf" + ); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: "検知機構", + }, + matchesPerPage: [1], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[29]], + pageMatchesLength: [[4]], + }); + }); }); diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index cfadec222..a0218035b 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -126,7 +126,7 @@ function normalize(text) { } else { // Compile the regular expression for text normalization once. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(""); - const regexp = `([${replace}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\n)`; + const regexp = `([${replace}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\p{Ideographic}\\n)|(\\n)`; if (syllablePositions.length === 0) { // Most of the syllables belong to Hangul so there are no need @@ -188,7 +188,7 @@ function normalize(text) { normalized = normalized.replace( normalizationRegex, - (match, p1, p2, p3, p4, p5, i) => { + (match, p1, p2, p3, p4, p5, p6, i) => { i -= shiftOrigin; if (p1) { // Maybe fractions or quotations mark... @@ -248,6 +248,15 @@ function normalize(text) { } if (p4) { + // An ideographic at the end of a line doesn't imply adding an extra + // white space. + positions.push([i - shift + 1, shift]); + shiftOrigin += 1; + eol += 1; + return p4.charAt(0); + } + + if (p5) { // eol is replaced by space: "foo\nbar" is likely equivalent to // "foo bar". positions.push([i - shift + 1, shift - 1]); @@ -257,7 +266,7 @@ function normalize(text) { return " "; } - // p5 + // p6 if (i + eol === syllablePositions[syllableIndex]?.[1]) { // A syllable (1 char) is replaced with several chars (n) so // newCharsLen = n - 1. @@ -269,7 +278,7 @@ function normalize(text) { shift -= newCharLen; shiftOrigin += newCharLen; } - return p5; + return p6; } );