From 90d19de935def13faa056ba0c754bd76353ba62f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 18 Jul 2016 16:01:02 +0200 Subject: [PATCH] Catch errors and continue parsing in `parseCMap` (issue 7492) After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously. The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name: ``` /CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def ``` When we parse that line, things obviously break because there are spaces present in the wrong places. To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection. Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection. Fixes 7492. --- src/core/cmap.js | 81 ++++++++++++++++++++++------------------ test/pdfs/.gitignore | 1 + test/pdfs/issue7492.pdf | Bin 0 -> 4619 bytes test/test_manifest.json | 14 +++++++ 4 files changed, 59 insertions(+), 37 deletions(-) create mode 100644 test/pdfs/issue7492.pdf diff --git a/src/core/cmap.js b/src/core/cmap.js index 13f2b03af..c5144f960 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -31,9 +31,11 @@ var Util = sharedUtil.Util; var assert = sharedUtil.assert; +var warn = sharedUtil.warn; var error = sharedUtil.error; var isInt = sharedUtil.isInt; var isString = sharedUtil.isString; +var MissingDataException = sharedUtil.MissingDataException; var isName = corePrimitives.isName; var isCmd = corePrimitives.isCmd; var isStream = corePrimitives.isStream; @@ -881,41 +883,49 @@ var CMapFactory = (function CMapFactoryClosure() { var previous; var embededUseCMap; objLoop: while (true) { - var obj = lexer.getObj(); - if (isEOF(obj)) { - break; - } else if (isName(obj)) { - if (obj.name === 'WMode') { - parseWMode(cMap, lexer); - } else if (obj.name === 'CMapName') { - parseCMapName(cMap, lexer); + try { + var obj = lexer.getObj(); + if (isEOF(obj)) { + break; + } else if (isName(obj)) { + if (obj.name === 'WMode') { + parseWMode(cMap, lexer); + } else if (obj.name === 'CMapName') { + parseCMapName(cMap, lexer); + } + previous = obj; + } else if (isCmd(obj)) { + switch (obj.cmd) { + case 'endcmap': + break objLoop; + case 'usecmap': + if (isName(previous)) { + embededUseCMap = previous.name; + } + break; + case 'begincodespacerange': + parseCodespaceRange(cMap, lexer); + break; + case 'beginbfchar': + parseBfChar(cMap, lexer); + break; + case 'begincidchar': + parseCidChar(cMap, lexer); + break; + case 'beginbfrange': + parseBfRange(cMap, lexer); + break; + case 'begincidrange': + parseCidRange(cMap, lexer); + break; + } } - previous = obj; - } else if (isCmd(obj)) { - switch (obj.cmd) { - case 'endcmap': - break objLoop; - case 'usecmap': - if (isName(previous)) { - embededUseCMap = previous.name; - } - break; - case 'begincodespacerange': - parseCodespaceRange(cMap, lexer); - break; - case 'beginbfchar': - parseBfChar(cMap, lexer); - break; - case 'begincidchar': - parseCidChar(cMap, lexer); - break; - case 'beginbfrange': - parseBfRange(cMap, lexer); - break; - case 'begincidrange': - parseCidRange(cMap, lexer); - break; + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; } + warn('Invalid cMap data: ' + ex); + continue; } } @@ -926,9 +936,8 @@ var CMapFactory = (function CMapFactoryClosure() { } if (useCMap) { return extendCMap(cMap, builtInCMapParams, useCMap); - } else { - return Promise.resolve(cMap); } + return Promise.resolve(cMap); } function extendCMap(cMap, builtInCMapParams, useCMap) { @@ -990,8 +999,6 @@ var CMapFactory = (function CMapFactoryClosure() { parseCMap(cMap, lexer, builtInCMapParams, null).then( function (parsedCMap) { resolve(parsedCMap); - }).catch(function (e) { - reject(new Error({ message: 'Invalid CMap data', error: e })); }); } else { reject(new Error('Unable to get cMap at: ' + url)); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 9cf555ca1..2792b2e5d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -28,6 +28,7 @@ !issue7200.pdf !issue7229.pdf !issue7439.pdf +!issue7492.pdf !filled-background.pdf !ArabicCIDTrueType.pdf !ThuluthFeatures.pdf diff --git a/test/pdfs/issue7492.pdf b/test/pdfs/issue7492.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9a16ee0d46cb8bce4fa5ce6c539d41f2dcd90ca5 GIT binary patch literal 4619 zcmai12|QG58!r{^h-ejEok_B1_C;MGdy<&!6T>WsF=mD$bVW(oCCXCP6q3qPB5R0J ziVBq_lthXU<$DiZ)xG!ozVma&bN=u9{GaE2&;LEY=aDnj(NjflB_iZ{dYd03&=3mp zaCC(b+qWasv}rtso(G!`sTr8=voP0HHl?xo9&A-J2FuHh#zm;{ zB0@t0p=R#o$oJteAea-5O#u&p05)|PJSVOThws6KFd$7cgqkIUoj2fF(A*@Lguw=U zU=6sR7o+Fm#%FNBBsUtLp~GVfg+r3W63_lxbZZ#|Y`*Y7!VcX7Co#F}O9@;4!O+a$fr zM~b|gm&8}S)- z*QDBx^SivrBR-_77_*zSQR#cABG$g#C2tFv`!bYrbF7oFI|}w&i(cAlxb1=Ud8?$M zn$}^`bH%3D^xcvj1*FD^YxTxa1WMlW@ys!AYr$52S2z7>x_#-;#fGt=&X0>Pt+|A6 z%YP<4^S13##5<)gUxl6(*Ji=*aQ<~b3$GJ?=?mI~1z|#S2XFnqXww|X(E=i3fu#68 zZj5z%{Qsm=8#Vfraq%lG5Ti!Qv(2VD-7tSUd;_9hM-Bs$vKL z*79aqy3m1s@E9^e&5%!Xb8*sQvp_H~2%iZCA0o`N!SveN9)}=XRSb#@{P8#ljmD86 zG66$HsQu9I50L{38pnX)!g8L^;wNFlDK6~9zn++#AnE@hcnq41{zdWb7A}<4mTRx1 zrCzQzStXaL*`Ul^y{1!7GE(|(xTM!Ztu3M^cuK?q3>dkizTr?;JI8lj_|% z>ysX`LreT=S#b22WMy;oj75Tv-mbehn?^PV1`8Sb`lFcCA%|zj3K}|k&eYzO%N#bk zZFs*ZikGr3~^wgWRBhM9kN+VH!Aa76)oR(!FsGv6Ho7`@W<)xnp^_`bjPMV#++q(C4&RIUm$4Sfuo@VU>8YR**!_w6o^#xR$c(+`xRT z2#)OP>c}VgTae3K$QwL0Pl=q8XP-JmTvwUCv%0TRMyjN4B5=vpvBi3J5$080R?dM| zgZp?C4u?(=m(L41P9RMwcL}d^xc~6axGw!yd1vynd-nGf%5Y=J#bJ4ALm>!NSFux& zV!C?oC!NHRBStnITCauD2LdAXWj|DRoP9X{2q}EA%FrM)h2XZlJLCGAaPb$yBKc8; z?>YC7x9UUCuE(_8ThB13tngD)%u|y?p;-BLYmV}%q`KONA(*zavew)GaNL$9?^hIZ7RYZT zRpx*S;n3G(AsdWSH?|RP2aL6*F!ZDMT3@)nK62d`tMKCjc~n39qK~#JjM178QI3i^l7B8yovb-^ar5?92VA*~a^wu7m+DH<ofXc)!XoX=b7Vf>9g;pW|Q zT!d=3>UP!+qOBM&-Z@GoAS}qUYX&tGG}+B$%CpJ3J8nLF_O*!SB^=`@dM9-abH7}m z_lYi(=EYk=s(QzAEP4w9u8G^Eu8Mi4uPkaK?b~Cgtr(^*efff$U{H8(ye8z1mCWn( zY|p|fh>NMD2dMA+E!g2kYCt+)WaBF%b>DqU3jRE=IexU*)JuC$k!iJ1GliAnL+?VoH@%Dz+dnJm)Av$4gxLLJ zyoe>2o*HxgdMp{AY?nZdKxBWcs^UJ-iD#|bB0KWgaRa)dLQ%R&lklgydj7y7AM>c{ z*=FPBa~kg+=4M9q+#5c(M2xK2D?X|+t7x8bqqh7cwFHgj$;W3m?cFj-9vI8Sio0xg zCsUk>$af7!kBLLok8O97I?;K!%)WJZJY2ARJh9J0`oi7at78M%_C$gr~*{Kuh*D}5dJS?6bS z$o)H0+r#`f>zYg4f%X%A?2X9w#tI z-!O8rJk4^=?0V~pW4aVKql@*`O^pL%g_7geJu~TEAMfa3qcIpa9?Vgdj7#YBT7F#i zYE?<4Jj;&Qdf457_#3zGVr*X7Y;@#+s`2dZo(;Q{$H`}1GmJW8*3_D``wc4!Jr!Cb z&rUC}Y;^o^#CwXy6bD8yo2g4+d@@plv!oxYzNCG}h|v+N7rf$~Kd4ko(SR zv*dJ+eDS;^vrh1x=Oz2dkq7as{R20>ZAeTPSXX0_{3?Wd58p_cX;=R<)`}66 zX+EE3trEC>5@9`)EddQ2PNeu6*Whp6p&!l-XHsm+`k@eB!5yPK^F&Jd=dvxC)*i&^ zDly6aZol`w%w56hPo|s1_{wGj^!u{c%6Et(Y1f}s6R)DZaO3;y1Zzt?B%s@-nA`4wr!DvgTKETux>bHsrE){d#s*gj_7N` zOUIx-0>bAcC5Gk`)9s#;T-~ub#-!o!?E#?~Z2#!R29wh+`gMvNCAOs0bgzz{r6~dtA{9y|XB4TkM4hBvOM(c&|L;%_VJd!Vk$icQFh3pE)POQQvX1QqUARR}DiJr)@Lx6LO09MqJ?V(=gm zfI!DJU#RY+fm+vt^>eQUk1yo8n?ZM>%~iW70E5kh@F-B_?sB2?AX_rb1fF(!c!5e9 z{TJt4n4cNpc?*oDG%j3sf6wB-@S8Ds9$s80knLRi3fJ7;DN;h-?<