From 79cfc548fcf3270595f33bd67e70d0e3ebdd5c79 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 5 Jul 2022 14:08:53 +0200 Subject: [PATCH] Improve text-selection for Type3 fonts with bogus /FontBBox-entries (issue 14999) This extends PR 13461, by also building a fallback bounding box for Type3 fonts that contain a much too small /FontBBox-entry. *Please note:* While this patch improves things overall, copy-and-pasting still doesn't work perfectly for this document. In particular the lowercase letter "c" cannot be selected/copied, however this can be reproduced in both Adobe Reader and PDFium (in Google Chrome) too, which is caused by a lack of proper /ToUnicode-data in the PDF document. --- src/core/evaluator.js | 19 ++++++++++++++----- test/pdfs/.gitignore | 1 + test/pdfs/issue14999_reduced.pdf | Bin 0 -> 7915 bytes test/test_manifest.json | 6 ++++++ 4 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 test/pdfs/issue14999_reduced.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index cda5f7df7..a6ebd12b1 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -4404,8 +4404,10 @@ class TranslatedFont { const fontResources = this.dict.get("Resources") || resources; const charProcOperatorList = Object.create(null); - const isEmptyBBox = - !translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]); + const fontBBox = Util.normalizeRect(translatedFont.bbox || [0, 0, 0, 0]), + width = fontBBox[2] - fontBBox[0], + height = fontBBox[3] - fontBBox[1]; + const fontBBoxSize = Math.hypot(width, height); for (const key of charProcs.getKeys()) { loadCharProcsPromise = loadCharProcsPromise.then(() => { @@ -4426,7 +4428,7 @@ class TranslatedFont { // colour-related parameters) in the graphics state; // any use of such operators shall be ignored." if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) { - this._removeType3ColorOperators(operatorList, isEmptyBBox); + this._removeType3ColorOperators(operatorList, fontBBoxSize); } charProcOperatorList[key] = operatorList.getIR(); @@ -4454,7 +4456,7 @@ class TranslatedFont { /** * @private */ - _removeType3ColorOperators(operatorList, isEmptyBBox = false) { + _removeType3ColorOperators(operatorList, fontBBoxSize = NaN) { if ( typeof PDFJSDev === "undefined" || PDFJSDev.test("!PRODUCTION || TESTING") @@ -4467,12 +4469,19 @@ class TranslatedFont { const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2)), width = charBBox[2] - charBBox[0], height = charBBox[3] - charBBox[1]; + const charBBoxSize = Math.hypot(width, height); if (width === 0 || height === 0) { // Skip the d1 operator when its bounds are bogus (fixes issue14953.pdf). operatorList.fnArray.splice(0, 1); operatorList.argsArray.splice(0, 1); - } else if (isEmptyBBox) { + } else if ( + fontBBoxSize === 0 || + Math.round(charBBoxSize / fontBBoxSize) >= 10 + ) { + // Override the fontBBox when it's undefined/empty, or when it's at least + // (approximately) one order of magnitude smaller than the charBBox + // (fixes issue14999_reduced.pdf). if (!this._bbox) { this._bbox = [Infinity, Infinity, -Infinity, -Infinity]; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 5d1d18e73..f71f69d37 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -121,6 +121,7 @@ !issue13916.pdf !issue14023.pdf !issue14438.pdf +!issue14999_reduced.pdf !bad-PageLabels.pdf !decodeACSuccessive.pdf !issue13003.pdf diff --git a/test/pdfs/issue14999_reduced.pdf b/test/pdfs/issue14999_reduced.pdf new file mode 100644 index 0000000000000000000000000000000000000000..52bc44a33448158d66bd620f8ce4ebefc3a36508 GIT binary patch literal 7915 zcmcIpdpuO>|DTMZCUjYqHszGEh`F4(Z#TCt5)B6Jm(s|{h#6)^h3u3{>9VP7;Ai|*cmE-t43QvVSQLbxJsVG4z>k*jL=bU7G*`m&;H?sd^YFwaJi%&7Bm`4f z__=fOVu^^yjlr+aq)@3rBsK{b=I-u}bH`%0TDlXkbFl8&!$)7Ux_vohtIH1*Y@z+4 z$?3eP$n5MjCs*_Nd@kOvw*GdmvgvoWALr1WTpJ6&d%4PCewDdO%Rf)Ayp^jQYfhbg z&`aU1c>dh{mU-d12VH?xxI4_o#{CTq<2F;DKaWc&vCns2<~Q}cuuuMph{$_Qk2!U! zN^Io9X64h1@$z>SXH@ubPK>NFQftQ$@ zyOfw+-RE;Or2A+Ye#ZRVlOf%;E(b!or?~J!x_yS-!>5LH6CSwuue#j+s{P23R~uhq zCQV8f;+ALc694GK7px$&cOIFXabjo6i);L0Asd{&F=`mkJX=@TSp3`G((>9T?ib}w zvK5C~Kd9cw*3?xxB3)hx@e|UGbaWty%^75rN!0Ih1VMU~N<^VR5_^)8J1@O@e&sb` zZfR}xwn?wIJ!iK6+_~t6=if7Znlg}4rdjJJ+(jy~%Wu4CJ~{dp4wDeacQ=F}WvB>} zSu{Na{reH*6ap+i1Bm|&pa1|!qF_+~Hy$#(D$)aRD+*x1U;y3q0L&H;k{f$7t6%<4 zSoZi?vj|Hk;4b2v%_llK?Th{mL9#;5rFr+f>*_|*s+LffV!^^)Qz|%((lQ_MH2U9QF$f$Y`3tKf3WIgwOdO$Il6w(3_At)#1T-vZ@ zqmP^2g2_~N)x}3e7cN{N;T&Rh*>MVGhr=5+J1~Agy^raM&tUc5qgC&=m>q8`Gl)n4 zh$iL7Gr_V;X=q>}-|ak~*MmsUwyZ(wj1bP`4>lySy>|Y=XO`|UG=Juh=TD>Ss*QI3 zUOHJHqsjXCn|Xi^=`d7zqsqngso(wT>gsEj$IEa3?4@`Zlyf|vdcbR*b^akQ1@`!U zFNH(-4_DK@6f?e!lsILS9WE;>nv_0i<*H-KGub2e$7wjxtmQ;ibycraw6dcIxq;20 zHP~29G}a{P-zNCYGBi!rP^Sq~D7w>RptSczX-^p>s-I~ek&4ouB8INt_*dFv>We+F z=DWdyKU}wVEmnk$j&3jARC2lfh`s>$#6|SnNa}X2bMHJjHVwb6!O@~^MweaCr!s1& zy#96HKaZuMPgErc!c!@`ZIC@#@yk?F(cg$vf%NffGUP3<-zm=puPrD&`ui2$;Vled zC4I_c`jk!Er<2o$Z}#4|+jN_+=OKKNih{8rT+h!g0m}H)l6SLDz_Dj)-u9hmr zirEfcX|%Gerf|EK?WMgKn^Gvt;FN}Lb74LCE8+kisFN$j*I;su;;XHO>DRRyTOVd7 z%#9<@joeVR;nt1=3SWqyYFm?$=XgIO(Lc1gbx#dLrFc;t^i(!Cdinbfqyh+m^v(+@ zqC#4iF4^6;yeF|GS<-olFN2KtTua^TJ;pO^>=={1vkzPTVr8jefL*F@s6n`f?rrrL z(C%+zC*Pe$r#C}BK^73(+`hM%RIm0SCHNk^0E;siP z704Q-K=lEMR{>|ML`+j|=d>Wm{l|z@h{P!61+v#QYe#GsBsDf)8Zo)S_t{Nb$+DI1 z*HX85?+dVSGP4LddlbJZ1e0$Yy!2ppLT;TLHS(+>^i+%p&HvUIsDec-@Db~~-tMO_< zQ^JlJL~h9Dm(0{fjzU4xu}PS}*q~YS|3+ihq6h6kznWQ#^!jT|;YHt%e&T}L_n*dJ zZFiJsTOnZ+!C|~G(vv1TcGb0Uyx=I%C4dO6l(+ZPxZAm|a z)TVXtW_%zmH#BI{@}vKcw46N+SsmD=>PBw4a8Foo+b|NzDe0S%WKmm^*Xz}>0%-_+ zp;Wb0s2-!vwZmr7pN6dK?f)B%}4#9pyG{gw5cSvc59>?R>qE42P%z< ziJ};LeV-s|O6qppZ+Re3)U*v70KRS9AJ;Ss$DE0A$cW57cP^w`(0%Woz(ufe_wF(V zdsar7cAu?vYAHsee6|JyWeH+V?&_HbuMd;FPm@%bR@nSB?yuroZ6hWO0pW_q^)p=w5Oo4)j27-v^nNe36k%YkTk|k^&BaZw{9D) z=&XoIHuvb3dJ_w1tt*1#UnK<`=ieeIm&_Taw0^vMOd9L^vEQ1RTWbWYvLtksL9B*u zxMAH`rBLap;|9+YZ7GAK9G6bYM4yQb=d6sxjK?{upXgXhoI9MHot>As;xlJoZl9Ap zZt;%A{+HXYyPg8yHZBIn+9-3}Mr#nS4|L;ImO^0PWX(H4WfWK)uekS9<-_@|F}0p1 z3r+pU+`MUb7i+3IKJL&D^@k?g+}UZ>zV0^d5bePk08Kq!vlaW_@z1 zdH3R+7i9CGZs*u#6?WD2joXdqnEy0C9#cn(IlaU>e)Rd$rhod3Q~q#S`)1voeH&KX z3-}>m&G6D3hsfjk36Ss1$B&-SQ=avD24~9-w@IS$`~C7huA8w?oTjq-)7m!9(cA0i zkaDu>=!m)XmQJHXT}b}n24|~dnmGxvJcu}70G^@q1*`Ey4}L@hPs9_f;)$VPDh(pK zvB2R9(a}>5#9}~1cQ(WVh3`TgSVyxvRlAVbqoR;feio zapy|7(Zba_H|f|mFMc?f8;n{QKhSmmUFw$bM0a7lKmw^P%RrAhUWd&E;7A6#^y7(z z@gfwg{udKMaIOE048f=$@I%J|{)pbZa6Z>fxE=~dqlv;GL3EN%?`Y=&31}Ql2u+0Q zI|l}#>n%V~hd2d$ypB|*;W576EMzDO)_kK~FV;4z3CVSs>-z8--9(rS#fm_jg0 z0{|LYmn{;m(#!y?CMu2K0$49?6@Q4_KNoZ%$><88*C}BAG;R1nWKH{E#nr%qC3S_= z!hu#LLu5^>?rcc2T7V;H!UC58#ngLw^c0Xu)4T^n)?5dy>IVenjHY+MHnOH8Kql?z z7!X-olMa#lJ3_(tj}McmiHLdc4 zW*@1p!~}KdBy?(^L3@c%-3d&f4|HO`N+<~DOZY+oHyXX@45}083>IKDfrZ|w9!NM3 zXokdr!eX%qREm9qx@|g@plz5ZmhfY^Xe$J?k@+0F0eM_6;zi&|5X{7bA!>er_|X6W z5zs&xNQq*um5BT=`E4Aa2k=#|k>5zs4Rph)VL!6e!jV}W6m z1bX#oED#QQWlYi+_fk;()~iRMqS32+FNHw?k)T(`0_j1oj0!Wp=$A@kebFzKg@&tc zdo=KH`inB!m)O%7lrP3dW1#`0+a8@v|FR55MM|$8%=}`F=+rOALT9l6x3u?4L|j0A n5ne6-enKHg5*n@Y=O+RI);J@sNTTK#17?DE+cV}bU4Z{z4_5J6 literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 10529d872..81fb2d0a8 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2848,6 +2848,12 @@ "link": false, "type": "text" }, + { "id": "issue14999", + "file": "pdfs/issue14999_reduced.pdf", + "md5": "a4e664e734f6869aa66245e72d448874", + "rounds": 1, + "type": "text" + }, { "id": "issue6901-eq", "file": "pdfs/issue6901.pdf", "md5": "1a0604b1a7a3aaf2162b425a9a84230b",