From b46e0d61cf5b641cec247af441029aa7454a9703 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 10 Nov 2022 14:00:23 +0100 Subject: [PATCH] Use the *full* inline image as the cacheKey in `Parser.makeInlineImage` (bug 1799927) *Please note:* This only fixes the "wrong letter" part of bug 1799927. It appears that the simple `computeAdler32` function, used when caching inline images, generates hash collisions for some (very short) TypedArrays. In this case that leads to some of the "letters", which are actually inline images, being rendered incorrectly. Rather than switching to another hashing algorithm, e.g. the `MurmurHash3_64` class, we simply cache using a stringified version of the inline image data as the cacheKey to prevent any future collisions. While this will (naturally) lead to slightly higher peak memory usage, it'll however be limited to the current `Parser`-instance which means that it's not persistent. One small benefit of these changes is that we can avoid creating lots of `Stream`-instances for already cached inline images. --- src/core/parser.js | 50 ++++++++++++++++----------------------- test/pdfs/.gitignore | 1 + test/pdfs/bug1799927.pdf | Bin 0 -> 13676 bytes test/test_manifest.json | 6 +++++ 4 files changed, 28 insertions(+), 29 deletions(-) create mode 100644 test/pdfs/bug1799927.pdf diff --git a/src/core/parser.js b/src/core/parser.js index 088a26c6e..25a101f68 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -40,27 +40,23 @@ import { PredictorStream } from "./predictor_stream.js"; import { RunLengthStream } from "./run_length_stream.js"; const MAX_LENGTH_TO_CACHE = 1000; -const MAX_ADLER32_LENGTH = 5552; -function computeAdler32(bytes) { - const bytesLength = bytes.length; - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - bytesLength < MAX_ADLER32_LENGTH, - 'computeAdler32: Unsupported "bytes" length.' - ); +function getInlineImageCacheKey(bytes) { + const strBuf = [], + ii = bytes.length; + let i = 0; + while (i < ii - 1) { + strBuf.push((bytes[i++] << 8) | bytes[i++]); } - let a = 1, - b = 0; - for (let i = 0; i < bytesLength; ++i) { - // No modulo required in the loop if `bytesLength < 5552`. - a += bytes[i] & 0xff; - b += a; + // Handle an odd number of elements. + if (i < ii) { + strBuf.push(bytes[i]); } - return (b % 65521 << 16) | a % 65521; + // We purposely include the "raw" length in the cacheKey, to prevent any + // possible issues with hash collisions in the inline image cache. + // Here we also assume that `strBuf` is never larger than 8192 elements, + // please refer to the `bytesToString` implementation. + return ii + "_" + String.fromCharCode.apply(null, strBuf); } class Parser { @@ -71,6 +67,7 @@ class Parser { this.recoveryMode = recoveryMode; this.imageCache = Object.create(null); + this._imageId = 0; this.refill(); } @@ -532,25 +529,19 @@ class Parser { default: length = this.findDefaultInlineStreamEnd(stream); } - let imageStream = stream.makeSubStream(startPos, length, dict); // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their - // adler32 checksum. + // stringified content, to prevent possible hash collisions. let cacheKey; - if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) { - const imageBytes = imageStream.getBytes(); - imageStream.reset(); - + if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) { const initialStreamPos = stream.pos; // Set the stream position to the beginning of the dictionary data... stream.pos = lexer.beginInlineImagePos; - // ... and fetch the bytes of the *entire* dictionary. - const dictBytes = stream.getBytes(dictLength); + // ... and fetch the bytes of the dictionary *and* the inline image. + cacheKey = getInlineImageCacheKey(stream.getBytes(dictLength + length)); // Finally, don't forget to reset the stream position. stream.pos = initialStreamPos; - cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes); - const cacheEntry = this.imageCache[cacheKey]; if (cacheEntry !== undefined) { this.buf2 = Cmd.get("EI"); @@ -561,6 +552,7 @@ class Parser { } } + let imageStream = stream.makeSubStream(startPos, length, dict); if (cipherTransform) { imageStream = cipherTransform.createStream(imageStream, length); } @@ -568,7 +560,7 @@ class Parser { imageStream = this.filter(imageStream, dict, length); imageStream.dict = dict; if (cacheKey !== undefined) { - imageStream.cacheKey = `inline_${length}_${cacheKey}`; + imageStream.cacheKey = `inline_img_${++this._imageId}`; this.imageCache[cacheKey] = imageStream; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c0ef6527d..86fb8fc99 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -534,6 +534,7 @@ !issue14415.pdf !issue14307.pdf !issue14497.pdf +!bug1799927.pdf !issue14502.pdf !issue13211.pdf !issue14627.pdf diff --git a/test/pdfs/bug1799927.pdf b/test/pdfs/bug1799927.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2605ee54e27383ed24f11b7db432a74f222513ca GIT binary patch literal 13676 zcmb_@2UJtp_P09vWUwGsMiC;6Mo|c%hDczPW}yiLRGI+-L?Iy%N|2(Cg9r#SHaaMd zAW_l5QKU#5X(|XRO{7T>kRk}7O8fS`2_wGm6?yBQ#af(u&OT?i-`@M2lbg+^y87~X z1r^cFB|T9oqMJAGrTZ`(DJ&X;PQk9B6Jd7Tb{OLv@n{CK!1pM&48&#?5!>>zToSV-wR| zWOF@e?cQafvj-#ZrK@MTS4U6Q!^zp#lg6S#kBK+S!<(f~bEC?d>zL`A?ZPX-=)Y&8 zEvkx9#4sF=ifU@&45@S%mMcaLddzUJ28-&6!RfnESX5oABg2WRr6uac@}yGSMg0;! zHj=`BJmkOV9sap%82-70*9zLjSLMZpz6~ev=gA^NKP}OB`)mFBzfSr8Rw?VV_WSz> z$|^SO=hEsn{Lpute{H*LIjL^3lLgLChVXKw&&nHmL5B-u>+z({Gxm`}u*{ z;}zrfy*EvJ+o!v8MyGezUCQN7u}4Z8gBzz;_EoFt*?$bGt<_t_7&5Z5n3R~Nhf@;H zHcxv@H;PSXj`wk=Dw6xSRF zbDk9@kBpl8ScF2pXB9#8q`DLpo^B+&0oVabCX&$Y(a5f{DxEoxH``)LW+ zAgG;uNK9{I=Fo_VG$wFs_!a-NmgMTS*<75jS28>b-35l8G>P7cY#>6sb3~(q(ewQV8{{ ztwwAl)52u7Tp)=*sf!D3aoT^{tvsF0Q_|Aa_h2NdCVjo0YgxU84WP`(5d z<)Ph%wA0=kRUYxc21-KLxrgPeOXNwc)`s~vnj}&ugSck<8dJ3*LR<&#xt2NalXW#u z6u+M?ma;3J7IDQWXz97ShP0Qzz6cUo#xu0mW%kOtzHHYzS?qymu1;Ar^|$yo9nIUNDbR+Y_#S;LY&+wpwVR><%sYaEX0dq(`8(}%=K2MrG{-7m9VDR z4wx5}+$Zr*L+4ZHv@i){<29QS66G8S8stI`PmV}SkA--_vvir$mF9Y;*tD=1AlQ|V zAm>2bNiJm3IS-k8n6J;K%NPP%3t)Q}VVf8)_k^fQKJ9&!6WRrAYtqwYIKbhc3&>S& zw1$!xCrAH4J{000`H)9w9iJTfxnY&@YJ;lOFxd#@BJrY5J%ttJs|_U5)@Gj|kR>Sz z+akA?uQ9N336s4A<0Y{?k!}kn7?nVmzq;2yhzaH=fcYhviQ6K<{3vAxP}ZdL!2BpB zm>(9Bi$u)tPRhTu{JmWC=75oUf~{oM#|>e!C-_Co4cx1Q1q6G^`3bnl`q5=Y;*ZC8 z56?B5Ns1@pB2SdBGg#OWE_tKb7yB+%DRi~mXA&lvwId*XM5wW7%K$q z@}{>f;dt%0|0&UC{8U#1Z=EUDihG5~-=!LMq1)ax(R$v!s}-?L=cUPKXw#K%JKlZY z5$SZ=ZP-+QDK?6F+GF?#(r_;H81T>+afxz)#tD6qv?$kw-UGkt8)Bnq3z-8~^bK92 zjux^W5HgZHwsu-D7t%5b4n%9oU5Ps)W6IYWm}0|aZ(Jx6udL&VbU85*%2PxuNqmBQ z9YnIpewF zIvkxhI*(UYDE?>mSzxouU`5B4NPsP{k0StVnE+d!2=9F%c=4<(2M8q6wnW}22ZF=| zLPl~TsS_q&hsifCtTM2G$ybbxVJx1ILR4oi^!DTd_zK0Vo(U$ez+%zKS%Wd*-(?fp z>(%=!ay>U-!(@-ns0ff^f{L>&u(|^kSiA=GX%*n;36Y&qtS**IYx`NMHr7iuMa4Hm z-xOO>4cxNV?@H9PlI3`H9M$#iwK;diIO(Q%b+(u4DVU^h0lXqE2!}TK*DupEP>76e znjB4&ZoVDR&`RJRP<8c^CSM~A9Z(IOX@r?elYb_R98eWB#tx|7f}U$cpou^mFvKx0 zV#cLr-Q*kA^223~mH!F&;Idh9f%@kxPo&iX^(ei|)_k2R7zCF)+QGbji8wnD(}4`s z2CJm}r#Q?c zg#5Y%3)nY?7)XFv*omS*qzfQaSto#rp$j(h3P1>-kO=Z=FdJU#kQaI~%TxmW{JHD% zL(Fe-@bNa}WxNK?(WO``uzVJ0ePK4aTEaMk^A9GStQvCfCg(t+HF;SVkgV>oB>zmz z=B%&k;hAn|e%H*FAi+;@+e8?6CBjoQVTJMTfYJ$M2-{~e%;9n{Sd%B4XHo|(i|e@ zG5^r`eL4z}V=6}7R`wnV*_MF8T8=)m>u@!u-1g@BqV!{cNblQ6S9D~M&+)46b9N=< zSpo=w$Qy_hje)2-8;FQmoX@mYC+&_g$|}!q9f~T(#h3#jc>ywFybw+ zGitw*p)%o{&V?P1woQ z^$6mHcJ>sCzXpPkd#gZ6EGU~)!sG`1rBE9SHc~-sgso_W$Q0TLA``IFs|l$hsRY2o z2M~a@g^?*3vi1*U7`*4ju#LM}c3h22g6`0mpkNh zE${DhHYMa+s@}K&L@Xe}7z0sd9S{MqZIWHKB*uab{y7zTeVr?jAV8xDNwg46ts!=* z)K|ZTJ3i1_Y(h97c_3a;3sytwMrt!!3j~Q0c0_{20;NLI1xm9yuZ4i>sai5$XXt zFZ30Hj?S|Xf&z3PnE{_Da1p{#q!WBt3H9N$P@?Y7iscN00H%#bt;rK8n{NUDW;y7V zKs-SqQUpS29_#>4Rl?4Iu@wz1T?nny@ce2FLhF5}UroizSXMho^Fk=i6(Wdi3L^j9 z1tOa!?%XJL1N7R3hzux`QN^XLRjj$GR z0ao{l#Q*B#iL|(ZWcg^l-;^jHDFO8!3G&~)ikPiADtn>Q7=kydAsNF)2}#Dt?g%Nv zNHT`P4K)g!C8+D@KC085v{pW_EoP|uhb>V4V~o~94hoYExljb#N+`on{z05m;BsUG zfu?jA4ue%7rHepHZ^&8c^ZI|OK6Pu?K=t`VsdziTJrdCQ)EOHI`%q9jkpTNp;Ohzc zf-4Jzo-)Nkk_Q5$7$8X=_MyDd-V*KGNo7yG9e5&@!n$zFTmW_0lY;7T=(Yg%q=Ich zO)OLv;C2x96!HE$yk$WKwm<)Fe9htXmNL%Km+g?Z2=@iae=Cx=m`BM_l_75>9ESSv zekQd6Nin3h8|6r9;}C+zw@k6E=-p#Q;%Tv{^#$7|!QK=$pQ!c!-kT!nk3-ogg^;8i2*Q#8yLdJ@0J6QHBn)eUE1)D)0Z|G;nC?U% z?{=LzcVY6O@hwgW4aDYv*p$Rs8M%N6Vpl_1`;w7YL2L^)*H_ z$ognufIYFGFjPH5tQTN$4zZ~P6x8HyuJtFSDVMQ-7@3dg)%J9y-VXCP_At1-7VldU z_r8EN(oPx4D^~A6W7a=WS1qg6aT7nKzjv~?n;{kaDA&5uF?YMc`QWBLZFkL4B|72c zWSwr%;kHt(;(6l*?4;m(Vq(VvHY(pr3%+N5FmP~)u#|sZIc_>=$i+U2X>iMpHWjdb zSVOvav<7?pRn66Nef62^+&4ZyNOZ!z_5a}Dc>a+q30E&SmH0?(VEEUPpX@k(rTfJh z?DW_2IC5Uw_0W+#i~h^e>$Tdhhh^oh?oW!AYZ@>svbGBYx>r821D?0Swu~oCMMbsr z_pdr1Ez`s|D>!Z!#;Vva-p5XFF$cnm{oemAhE5q`LOmSo?^c=aQ- ziv4RC>}@I1rlz2*yH5K9zsyzXMuc|qvaZf@}Y|%5}@=T z46-hw66oO`ofNH8-4>jo8T{dH^PM1!BRZ@H!~3#k^Ex_u5!JZxIJ%5bJiU7A^U{6~ z4ZV-y8AE)duW0PLi`6Z0;<3z$c~W;2 zTBCvWe!J&`n{m{M^Qe-=I8>S0k3uTWgVKx9Egqaf&8dOYdfVr&v*l#V(!XMdhInO< zYf&d6__;(ETD=2tUe%{4yDu6zbHfPf5`-jeIje0|E>Lk58n(dPGJ5@GpEi*>%Hbu)iID_oUlNJ;`ra9E|YArWFS+jvNj$^hM;fUjY)?b5L`1tyi1 zxoe-!F`~@vLN-n34SPvCmaIw#MV9rkx4kCe^Z?($5P|yU3H<75S%l{8c$pGx_D4t@Q^d?BqjzsnM~A!a(`PB(M!v(84$soa+C4g&M3sv=kw7@LC;XLL+~JRKkM37xZS={$JvkQ2xO)@NV)O(W!yY=Op|utPg>H^Lfl>d#_W~po-)N2H8b_cq7&*A64ipLg4nzW z@u8q(L7hShk!$W?-qQZ+OE7r_+ja6`1X6#$xxQ_~>0AJFb+9!*&$|U~5|p3cztzbl z3h(WkGz)hW&$X^zj<&5+8H-kr8XkE$VFnc?ofn;Vu@edn1P4-nBkWPVXFm$5CY1cM ztw(=RsC4XW?el%wRL3P8?3B|gT$k}ws*5;%>xyENubzGQC(}s1MDEnQLdVsMt1C1k z0;ewRI}qSjn$kla&5OD`z5dB3&aE>k%asDRo@GmNtjN|kg07R-+_byy|C^=vp*25u zkq@i>@kw<|dw9}h)rwrQbqAwsYhzSdzr~d%a(~~1G?C-mzig-WhFypbUtV4pj8;@DG`4zlPe@U<7jv5(v%Q&7 znKwSSl9p$c_r8iU!m9;|<$4E2jrtpF-)$={;#%n)_dHW@n}2+%?SZu98;wM+K>&7Z zrAZY%v8nZ!b1ps0qm5FBmnc|aZHmrLE~tX`k%T(jPt2p-dBw4l3nW~ImzXBJ#hqX} zbLSOhPcE>5jw6Y6xW`CGP5R`5gIo&t2-W5zG1vaY%ZZ@*SDNqIpZGAlk-{HebRwuA zX#ST*<@lm`Gd;CK&O!IWwdov!okC?!*#B@ekc&)~>d;PpFf_pnt(>`+7v2xU(FncZ zHy0i$kn715IB^J0PVxQ$S=+lJHCMJcJnAARXsn$7XSS@kX##QO{KA<=QM#=7PYF9$ z&KEQ^SI)129@9jiDMlMGR9&ZYaOAjd+H%Sw+Q14+Ll-g1BJbG^Q>+-Jh&8;z@=K#* zIi<)$*wgs(3`N1Np|IbfWs3YiUsvAE=hERCb0G~bxsW?7r9h6h$SpQqc~ZS?t6&$3C_pnC9!(1b)9?kW9_pj&R@|h z#|{jQr-_BaEymz4&jT%%JtJtZT+;&c!sADdjyfKRB)@9#cWc^*k}OL``w?nk^XTYmSYv_twY0~p z`=!w33ep~5{IpFmH$NG7+^haXxBn52-x5RdK9n^%Z&7NffC+7ibyXAgsiY$ zYmsyFWq$a`gNh9qsF+AEBG8POjFYf2RU}WWjFD+Xg`Ziz#r()}a`be|K)cJnZRnap zPC|8N=y=hOS9gB3=Ca_WWq<65v&PkC!m3~r&|#K3Lb*~yvmRG{*oK-9R)*b#;^qL+ zvX{Pk-rYs6cjhc?tg8z=GU!JikYrL>F<;y`2Vj3awXoxnkb!`h#|GE&I~${dPc7em{!23cJ5LESHSqiW zE8M6~;1zYEVJ@S&P`hv6k7!0hunNwef$fan0aLG#P!>Z19Es|EAh(fyyZx}H=865X z)GzsXCVXbl_Wx(ekNw9HVc%8_m7LWls$C z(swFq1WB3bMBl&!6Mj3he7lVFz9^+i~F4F+xDRM?LGH*J^hkWEp4QpVM(R7fgg8xXyul^ zz2N$J)cymOnUAZv$e-6UI$0rI(IRfwR6tMGJezZNL*WwNL&sNoTMtcKAS6E~^R3@L z8_^Edn+D?vV-45CwWCZev10Ff=F8(0a`%&$7rm5L zYV8STxnGOozNR!Rqe^ttc$d~k1{VZPx#nvHE*y?oULUBsnmqX=v1dtI#q~S!86Q4J z_pv6k2ZsXs>YG|8Hl$9Ff_2j>-t!bnM&?iN4rnZ6*x?%5K6`zL8x3k~9@g(_R*3s} z{=&Mb4}*sSe6ZtBI+bZ123AA3=FZPvwEC+GZ#OjGpWJZS-R(wN)MKg2ImVNcYG>K@TxUT(^r(#-Qyu4qm1Ne<4f8aLri7e&7gEFRT% zuRj*`NB3@W*_~GktG{C|im$BwFgCJZ?45)RHmj9e-beU26#T%N-;mC|JUaE^=1WPt zztpe4A5%AsdZ$uak?$EGdd&BadL7#rxuM-A*QB1O-pCAI(!sFlXjUYhs?4grcSkb0 z_^oY^kNN|?RM+Q9*PN@{<9eEl+0lgQkWTRgwWPl(>ZMc9!5Zf#Gd_4G7l-ToFdb52 z;9<5KxD2RR_B!B(QZ<$ysY%vv*uOqDdiYSQ7D|BMFkt(S#g*@`fi9odry*hc|iZxmwA` z6F)sN9<{>;+*y@h`+alw=|1l#BgB^f);%4yB>?v*Fk&*`jlFt=(SZ&HN>I>(p_h^& zR|3GHjT)%GOztpuQzIA5WdKFFr_@-wW5f+=}y;V1H}tl_8}@Trqpn_aJ$+^ zleBe`ITuc?&zZ;O}-NB@`?cady-%%dC z?4#eFv4hjQgZkJg8N;ag#&xk>CeS z?Kw@quoXW4(oD6LE{(I6Sv=`fEV1I}o(`>$KU_T>7dJXTjp{0YFfyhcyqH@2=n$3e zBz#{PTF5)j$eYc`e>`4ASy5Ejsq*CoXbZLn6=zCuq2i3FPBhAHh9AaS5r$PYR51h< zC0kJ#^rX^R7-i%YX;B>VDz~?%Bh?FDo;LMlIGR&g);M@c9E02I4y)|86@^!(aXJh- z3nqDCkXNnEkS-x-WEH`y*BBha8iVro!r=DPoV+mBqKIT>qP7@V!5f(3XHCWbJykeU zhr*(`FtdVEoTi>YCGlZfzb8iP0Vupi8hBGgO^9F>dPPmendZa4|k9k&bw2^Kc z7K4(oj*+kjjZ85t2E*+bjfGKC&``k3jel$7LwLf(r_XHop*>0YwH4;6%V@b77K zF9lR41xJQEj&=}-S5U;wFwzBeJZT=td;J)MF~x!5%_7Oj2v>o(@c;EPFaYZ)gL=>+ z0*aix9jTrqtbwT^#=w=~#S$!w!7C~#!XFkVRP4X-17rSce&7M2+?qf`G-5c>oY}hY zj=z?YBG>?rmsixVz^fBg@kAxH?TUD!qM|15pD4jwFjWVB3IIP1_=c&oC}&M&cv^rD zwaD~ibcQc|mxqTNP4Ht10Y7w-kVX@Sm~@uP-)IDihmp~LHiNr60xX1%sLOElc85US zt4qRqd()hV%Fa$2suX3ayqXe)0+yjV$t%J~UY)9}tm34oqN?Jg^sWAIa{OBk4{uL5 z6najMII0^JVd(`f!Gj=9jznj{uST$xFZRFzGO${jFvy~@+^AY8uy?t!1PwPD0x_Z+ zh3-Pa`pG*{ohjaKEG*a+rFIh7&W)xeXrS|OU)K5;mPNZDaKOm8zr#{X6!U-d`M+D} zOD=+pO2YagnFd;4Cw-+asct5^w*H&o+6KuQ0X4)h0_&~1L?kBgcts7ox+)$G2dttZ z7RVrM{v(B!mN3Wrc~YH46)^-+1UZ5aMom>!Sry}q5%l5l8hEG-$Ol88=~GrxCj4_B zo}dZ^>}xu_x-wLWulf`fmGF@Mzv{!|)&E7GhPuWVI+iB|el_F?zXQVH%xV5q3;_c_ z&S5~=K}rWyG`cebgV+-4-d