From b1472cddbb07692089371dd041ad8b31717f6826 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 17 Sep 2017 13:35:18 +0200 Subject: [PATCH] Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704) This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects. Fixes 8702. Fixes 8704. --- src/core/evaluator.js | 197 +++++++++++++++++++++--------------- test/pdfs/.gitignore | 1 + test/pdfs/issue8702.pdf | Bin 0 -> 7379 bytes test/test_manifest.json | 16 +++ test/unit/evaluator_spec.js | 10 +- 5 files changed, 138 insertions(+), 86 deletions(-) create mode 100644 test/pdfs/issue8702.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 73672b51f..3389f7a30 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -948,52 +948,65 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { case OPS.paintXObject: // eagerly compile XForm objects var name = args[0].name; - if (!name) { - warn('XObject must be referred to by name.'); - continue; - } - if (imageCache[name] !== undefined) { + if (name && imageCache[name] !== undefined) { operatorList.addOp(imageCache[name].fn, imageCache[name].args); args = null; continue; } - var xobj = xobjs.get(name); - if (xobj) { + next(new Promise(function(resolveXObject, rejectXObject) { + if (!name) { + throw new FormatError('XObject must be referred to by name.'); + } + + let xobj = xobjs.get(name); + if (!xobj) { + operatorList.addOp(fn, args); + resolveXObject(); + return; + } if (!isStream(xobj)) { throw new FormatError('XObject should be a stream'); } - var type = xobj.dict.get('Subtype'); + let type = xobj.dict.get('Subtype'); if (!isName(type)) { throw new FormatError('XObject should have a Name subtype'); } if (type.name === 'Form') { stateManager.save(); - next(self.buildFormXObject(resources, xobj, null, - operatorList, task, - stateManager.state.clone()). - then(function () { + self.buildFormXObject(resources, xobj, null, operatorList, + task, stateManager.state.clone()). + then(function() { stateManager.restore(); - })); + resolveXObject(); + }, rejectXObject); return; } else if (type.name === 'Image') { self.buildPaintImageXObject(resources, xobj, false, - operatorList, name, imageCache); - args = null; - continue; + operatorList, name, imageCache); } else if (type.name === 'PS') { // PostScript XObjects are unused when viewing documents. // See section 4.7.1 of Adobe's PDF reference. info('Ignored XObject subtype PS'); - continue; } else { throw new FormatError( `Unhandled XObject subtype ${type.name}`); } - } - break; + resolveXObject(); + }).catch(function(reason) { + if (self.options.ignoreErrors) { + // Error(s) in the XObject -- sending unsupported feature + // notification and allow rendering to continue. + self.handler.send('UnsupportedFeature', + { featureId: UNSUPPORTED_FEATURES.unknown, }); + warn(`getOperatorList - ignoring XObject: "${reason}".`); + return; + } + throw reason; + })); + return; case OPS.setFont: var fontSize = args[1]; // eagerly collect all fonts @@ -1666,73 +1679,93 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var name = args[0].name; - if (name in skipEmptyXObjs) { + if (name && skipEmptyXObjs[name] !== undefined) { break; } - var xobj = xobjs.get(name); - if (!xobj) { - break; - } - if (!isStream(xobj)) { - throw new FormatError('XObject should be a stream'); - } - - var type = xobj.dict.get('Subtype'); - if (!isName(type)) { - throw new FormatError('XObject should have a Name subtype'); - } - - if (type.name !== 'Form') { - skipEmptyXObjs[name] = true; - break; - } - - // Use a new `StateManager` to prevent incorrect positioning of - // textItems *after* the Form XObject, since errors in the data - // can otherwise prevent `restore` operators from being executed. - // NOTE: This is only an issue when `options.ignoreErrors = true`. - var currentState = stateManager.state.clone(); - var xObjStateManager = new StateManager(currentState); - - var matrix = xobj.dict.getArray('Matrix'); - if (Array.isArray(matrix) && matrix.length === 6) { - xObjStateManager.transform(matrix); - } - - // Enqueue the `textContent` chunk before parsing the /Form - // XObject. - enqueueChunk(); - let sinkWrapper = { - enqueueInvoked: false, - - enqueue(chunk, size) { - this.enqueueInvoked = true; - sink.enqueue(chunk, size); - }, - - get desiredSize() { - return sink.desiredSize; - }, - - get ready() { - return sink.ready; - }, - }; - - next(self.getTextContent({ - stream: xobj, - task, - resources: xobj.dict.get('Resources') || resources, - stateManager: xObjStateManager, - normalizeWhitespace, - combineTextItems, - sink: sinkWrapper, - seenStyles, - }).then(function() { - if (!sinkWrapper.enqueueInvoked) { - skipEmptyXObjs[name] = true; + next(new Promise(function(resolveXObject, rejectXObject) { + if (!name) { + throw new FormatError('XObject must be referred to by name.'); } + + let xobj = xobjs.get(name); + if (!xobj) { + resolveXObject(); + return; + } + if (!isStream(xobj)) { + throw new FormatError('XObject should be a stream'); + } + + let type = xobj.dict.get('Subtype'); + if (!isName(type)) { + throw new FormatError('XObject should have a Name subtype'); + } + + if (type.name !== 'Form') { + skipEmptyXObjs[name] = true; + resolveXObject(); + return; + } + + // Use a new `StateManager` to prevent incorrect positioning of + // textItems *after* the Form XObject, since errors in the data + // can otherwise prevent `restore` operators from executing. + // NOTE: Only an issue when `options.ignoreErrors === true`. + let currentState = stateManager.state.clone(); + let xObjStateManager = new StateManager(currentState); + + let matrix = xobj.dict.getArray('Matrix'); + if (Array.isArray(matrix) && matrix.length === 6) { + xObjStateManager.transform(matrix); + } + + // Enqueue the `textContent` chunk before parsing the /Form + // XObject. + enqueueChunk(); + let sinkWrapper = { + enqueueInvoked: false, + + enqueue(chunk, size) { + this.enqueueInvoked = true; + sink.enqueue(chunk, size); + }, + + get desiredSize() { + return sink.desiredSize; + }, + + get ready() { + return sink.ready; + }, + }; + + self.getTextContent({ + stream: xobj, + task, + resources: xobj.dict.get('Resources') || resources, + stateManager: xObjStateManager, + normalizeWhitespace, + combineTextItems, + sink: sinkWrapper, + seenStyles, + }).then(function() { + if (!sinkWrapper.enqueueInvoked) { + skipEmptyXObjs[name] = true; + } + resolveXObject(); + }, rejectXObject); + }).catch(function(reason) { + if (reason instanceof AbortException) { + return; + } + if (self.options.ignoreErrors) { + // Error(s) in the XObject -- allow text-extraction to + // continue. + warn(`getTextContent - ignoring XObject: "${reason}".`); + return; + } + throw reason; })); return; case OPS.setGState: diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index e15216121..debd68f74 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -57,6 +57,7 @@ !issue8480.pdf !issue8570.pdf !issue8697.pdf +!issue8702.pdf !issue8707.pdf !issue8798r.pdf !issue8823.pdf diff --git a/test/pdfs/issue8702.pdf b/test/pdfs/issue8702.pdf new file mode 100644 index 0000000000000000000000000000000000000000..46ebfffec160828b4c6caea7221e24dba15f3db2 GIT binary patch literal 7379 zcmeHMc|6qH`&X7Ug=`h&`ZQFO@%hYZOpNSXB2Ct|G4mNSn8nOz82T0}Dyc+%H(D-T zw8^z(TC}OljYzAyTu}*W(WXty?=vIe=67#@{C>Z`8Z$3vd7k%k&htFyInOy~rk{%& z4sT7?ntAVj<8=%kfB<1wgciomP74Dv;zTe2`bv=~o&c6;Vd!)%3@l(P2}r~tQs4`v zGZSG(3Bv)H2$PLs0OSG2#fVtS zLKtEg_7@5f08jnG@52;FdI~r~Krsdd-~y%~93Tx;MhsHX_GLbUgY*rHSd8!iZ~-g` zN4NkU4^aTnjVG2M0D*)S7Xn10q9F=_#pwSiCIY*_EX8OQ$Pz>hGx=H=+3xB9A7Kl1 z(&=#*6J+xn8$>DZ)znls>F;-J3cf&~sHaVzn*E3ynVI4eyC>o7xY~m)uOhdEO`)X| z?@haW%0};CUV5TF`^p?AZ4=9qwF~u2053zT;~}y?8^4a&QQtnjh@rh`>2GuyyT{3VTOM)aR)Pyg&oX7cUTu}7`6a{DwKawBR!-4u zXwuejbu91NQm`pF^D>tt#zBJXypb%Nsc`*+% znb`fYcU!`ra}3nC@S*WReW2T&k*#kmQ@2m%cUW^z-v@ow4iv1$vAWPH6Jhl*v8%4xn11bq&(Bt0FjQj_RkYId|XP&DlH4 z@5Jhfjnll%+KL>fj7xsdV>RXX==jO@i8ZrGecF-(#H{jb#u!Xb+RQEkOwX9Je$}7- ztUhk8zE`Cp@hEzBc<=gm^Mv=Qj^{rlI==|Laz(f-@e2Pm7wS}nVpa((L^ zd-+_rDQI3FJ+W{N{0B$%avt<#QF%?s#(N}vWmJs&8+vr=IHwY73B{y|Ga%i@^rV(tQ&Wp znD`v{Oj^?2u&V7pvR9u}m%Wdulo!WV_%>anvY2tN2iquP#Y!(w4O65 zvfy5Oxo~FIwEH59_M3IZj+yX&TI=hC^zqad=YYQRYt$wEb4)&BBTb8S`pQq*P5m83~z4o%iQI&3Zib=ATTR zTAft~`oll8%xa#Uk>H>DpkzYF{*&$Dm^j1AmRIwRKF0&L8u{DCxc-rNFEBSawzyKn zYdLh#rphMwrM%1N}(ywW?~KRbzk@w9ywF;X7$`c!G|gzL>H z!lTM%C+fh6WBL8fdn)C3T5|j2b@w)Z?lRcFsP!oGE!&uz*S5f+aUYnKc4K+>b^V^6 zJv$5dzP|6x&h~r!w9xl;VAA!>s`ghMrt{@X)?k#Obuheqjk1IB%K{eQ+?_0k;xB$! zsQwbIMg*(D_(s7~==4EY+3wTXK9SS#lTvyo^qY2a$6*_}uSzG(*l=6G<>gj|##`90 zZ=Z^Z=7n^({T34_ZjA^P={Hnsly@7Qj{ksV1)W;Ad2@Bci$2V=7kb43-fu$!H4jYM ze$-88xmnP-=iQ?r*uS6E2m829}%&9y@)zMTQ@* zO(pq_+l9qJUXKEjb?oM!qm`iV@xD|IvC zs2Z&fDN>=lt#^9SnR8Ec&8IB|@7#G33$;9LY!i06$7y9wif`3$qn}$cC1AB{t`?%R zI_jf^T1SwsXmcH}`=@@3Sk*Ho&opmI`=XnU*t9O)WmNCr_@=P9sWrXlGpj;ABRy?i zd#-Oq!YBIL6{$v-OAR??uGI<5$s2F_rF+XZ*MugYujd~5^;76vwl67QhtZZjVUtT{ z*UmXn%`UPa8qcjwt08Q-dFdB+h7=PR65z=p1Y~5V8NzNX-*C>Gan|C5`M;`F?9I)R zhfK=O|F2roL#yLe>~cVSgE6jT`)P0^);-YIoimf^wPQVHp>1i#&V5sQBkrX78(c7Qi~$KIDk$8vswrEY1n)6F(O z$Df1IDz;KPHD<))iM29Tw5E_0;~w0zYN<0G9hYKkmjz~=_sTDb+|hh+@^Fd)0TDA#CjC9%^WyU%R{O;CgKb9SQF7ec}Gz z#sz)NjH8vUdiFqV=YtLRlLBJ@cxlt$ry6zA{dly=^NDW`nci+(zclyLCl}Kz=H?a0 zoX)U9=k+-JGKZFrtz?*A-*WKIk@u}s!^cVrRsy{r5hZK3aP8s_*gf?p+rO z_P;bW{J^M}mzwN+Xcqc;3MZ*lBU-L;Y-$eIsGboSS&FOwi9ETE!Rp$n{U`1*xG%e)A^q@$4`j}z2FqKH6Z@k{QML(ZlST>MGdlf zy2nk1am!lgv6~oyQ_N=1(EZ6o&x~I-hF57+aBGdUtl6QiXZ#`?$hDC9RvwqCzTn_} zy>pC)i5)UB_1u<-m>mU|H-(lPiegq~_dF4In%jqMnw=sm{nU8A?lhhgT@b#F-TAx9 z7V>`evg+N29))#vm)($%`PmVV#T>8AW31$oSFLGhJ$Lt}bO`dxmw8{^-}sv`y7uT- z&-(W2w~te4?$|l1f02eqL^Qp0K{GeN!i7eF@HUVQfr2MN1c-=(pxNlx5_CorIfN$^ z{0kX_heV+;zT$AEfEO-i^7&|H>ksqz5&&2Xix8L}CWZk51mP{g#nP|{n1uiqmY|~) z;R?loMTqM{S7%4S&%@E*$I%k>hT~#|Vzxv{;>Yj~aPy_hz5eOv9Z=n zC8LPVv1SSRmcx@|MY8=RM^@(XLut5TLI`>W0E2uMd`*~%R1#rmecCz5L`+sBi~wP9 zI8R`Y?X5hE1$b%&)WJ4rStWfpi z2_QrT2}B&8NP`q#;L8U~$Dk!UF`Gm4N3}K}4z1Z^hbbqPXe|_ngLqq8TM#0E1Og7F zz)9i+2vdd=NX!O6260?q2}{fqDXar1aG7C3DPoVsDg+ID2BffAUxkRI;wYsEHVcHK zU_LBBBp}`z4}Qah>g6k!Kw_=bio#@|j447npiwz|eEyk-&;OQ9#1$e!30L?(i2K?- zx=_@SMi+^8pd7?ebrTtqTDVmNwat3rap^ie#8N716t z^QS#lhGWAVrZft{(nl+A4-Ji0QpIBMbfJfl z@L?>XEeA(|h%6il4cRzb7$)Ith!iFpvL!$^oG|6oMzVba=_!yPOaTiL{) zUp(a-kxEbb`v8@GGHf~!5oS?XWCD&wwPmAPfNgP9au^O5t?>xL4UJme>&aFqp5dCloV;!YKN8ZBzr}Qaa)nwqb~#Qilo{O=e3KZN{{@%?uDp{pOd`hE?3 zKjja<`k|}u*TDBv{_v~+J6#POcG3GSdu%Lvl-0tFeit$9P>sG%E#}3;0EvQ@l+RYm zn+nCdmGW6j`I4rndAfw!Ig%k;3f0kx2oc;|Atzfh5h6R;5M3y?HbiGPh)n-?ie*{@ zPhDV;Kg?b~2&zwD;`L1)R>Y}xpE_te2viTF1sJL$Hu*z#5ca!YlMaZz=nXilZ#iQA{E$OG@ zSR*g%j8Zl>+v|+^s@er*SxCkS_KaY!fQ$;Z*~6LhYPWR+iqk)MZp{$Z9xMb^QWH{b nq{p9+86%vSG5Xy&n&yiUnHc71VJPUqT?;dFrmL@;7UsVJq(0Ur literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 5285e0feb..6d07597d8 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1640,6 +1640,22 @@ "lastPage": 1, "type": "load" }, + { "id": "issue8702-eq", + "file": "pdfs/issue8702.pdf", + "md5": "59d501ed1518d78ef6ee442cf824b0f6", + "rounds": 1, + "link": false, + "lastPage": 1, + "type": "eq" + }, + { "id": "issue8702-text", + "file": "pdfs/issue8702.pdf", + "md5": "59d501ed1518d78ef6ee442cf824b0f6", + "rounds": 1, + "link": false, + "lastPage": 1, + "type": "text" + }, { "id": "pr4897", "file": "pdfs/pr4897.pdf", "md5": "26897633eea5e6d10345a130b1c1777c", diff --git a/test/unit/evaluator_spec.js b/test/unit/evaluator_spec.js index 3509557c0..3985d6fb3 100644 --- a/test/unit/evaluator_spec.js +++ b/test/unit/evaluator_spec.js @@ -14,9 +14,9 @@ */ import { Dict, Name } from '../../src/core/primitives'; +import { FormatError, OPS } from '../../src/shared/util'; import { OperatorList, PartialEvaluator } from '../../src/core/evaluator'; import { Stream, StringStream } from '../../src/core/stream'; -import { OPS } from '../../src/shared/util'; import { WorkerTask } from '../../src/core/worker'; import { XRefMock } from './test_utils'; @@ -48,6 +48,8 @@ describe('evaluator', function() { operatorList: result, }).then(function() { callback(result); + }, function(reason) { + callback(reason); }); } @@ -229,9 +231,9 @@ describe('evaluator', function() { it('should skip paintXObject if name is missing', function(done) { var stream = new StringStream('/ Do'); runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), - function (result) { - expect(result.argsArray).toEqual([]); - expect(result.fnArray).toEqual([]); + function(result) { + expect(result instanceof FormatError).toEqual(true); + expect(result.message).toEqual('XObject must be referred to by name.'); done(); }); });