Attempt to handle corrupt PDF documents that contains path operators inside of text object (issue 10542)
First of all, while this simple approach appears to work OK in practice I'm not sure if it's the best way of addressing the problem (assuming that you even want to). Second of all, while the solution implemented here only requires tracking/checking one new boolean in order for this to work, I'm nonetheless not entirely happy about this since it will add additional overhead (albeit *very* small) to the parsing of path operators in PDF documents just for a handful of *corrupt* ones.
This commit is contained in:
		
							parent
							
								
									f87dc42780
								
							
						
					
					
						commit
						5335285cda
					
				| @ -822,14 +822,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
|       return fontCapability.promise; | ||||
|     }, | ||||
| 
 | ||||
|     buildPath: function PartialEvaluator_buildPath(operatorList, fn, args) { | ||||
|     buildPath(operatorList, fn, args, parsingText = false) { | ||||
|       var lastIndex = operatorList.length - 1; | ||||
|       if (!args) { | ||||
|         args = []; | ||||
|       } | ||||
|       if (lastIndex < 0 || | ||||
|           operatorList.fnArray[lastIndex] !== OPS.constructPath) { | ||||
|         // Handle corrupt PDF documents that contains path operators inside of
 | ||||
|         // text objects, which may shift subsequent text, by enclosing the path
 | ||||
|         // operator in save/restore operators (fixes issue10542_reduced.pdf).
 | ||||
|         //
 | ||||
|         // Note that this will effectively disable the optimization in the
 | ||||
|         // `else` branch below, but given that this type of corruption is
 | ||||
|         // *extremely* rare that shouldn't really matter much in practice.
 | ||||
|         if (parsingText) { | ||||
|           warn(`Encountered path operator "${fn}" inside of a text object.`); | ||||
|           operatorList.addOp(OPS.save, null); | ||||
|         } | ||||
| 
 | ||||
|         operatorList.addOp(OPS.constructPath, [[fn], args]); | ||||
| 
 | ||||
|         if (parsingText) { | ||||
|           operatorList.addOp(OPS.restore, null); | ||||
|         } | ||||
|       } else { | ||||
|         var opArgs = operatorList.argsArray[lastIndex]; | ||||
|         opArgs[0].push(fn); | ||||
| @ -881,6 +897,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
| 
 | ||||
|       var self = this; | ||||
|       var xref = this.xref; | ||||
|       let parsingText = false; | ||||
|       var imageCache = Object.create(null); | ||||
| 
 | ||||
|       var xobjs = (resources.get('XObject') || Dict.empty); | ||||
| @ -999,6 +1016,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
|                   operatorList.addOp(OPS.setFont, [loadedName, fontSize]); | ||||
|                 })); | ||||
|               return; | ||||
|             case OPS.beginText: | ||||
|               parsingText = true; | ||||
|               break; | ||||
|             case OPS.endText: | ||||
|               parsingText = false; | ||||
|               break; | ||||
|             case OPS.endInlineImage: | ||||
|               var cacheKey = args[0].cacheKey; | ||||
|               if (cacheKey) { | ||||
| @ -1158,10 +1181,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | ||||
|             case OPS.curveTo2: | ||||
|             case OPS.curveTo3: | ||||
|             case OPS.closePath: | ||||
|               self.buildPath(operatorList, fn, args); | ||||
|               continue; | ||||
|             case OPS.rectangle: | ||||
|               self.buildPath(operatorList, fn, args); | ||||
|               self.buildPath(operatorList, fn, args, parsingText); | ||||
|               continue; | ||||
|             case OPS.markPoint: | ||||
|             case OPS.markPointProps: | ||||
|  | ||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -76,6 +76,7 @@ | ||||
| !issue10388_reduced.pdf | ||||
| !issue10438_reduced.pdf | ||||
| !issue10529.pdf | ||||
| !issue10542_reduced.pdf | ||||
| !issue10665_reduced.pdf | ||||
| !bad-PageLabels.pdf | ||||
| !decodeACSuccessive.pdf | ||||
|  | ||||
							
								
								
									
										81
									
								
								test/pdfs/issue10542_reduced.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								test/pdfs/issue10542_reduced.pdf
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,81 @@ | ||||
| %PDF-1.5 | ||||
| %âãÏÓ | ||||
| 1 0 obj | ||||
| << | ||||
| /Type /Catalog | ||||
| /Pages 2 0 R | ||||
| >> | ||||
| endobj | ||||
| 
 | ||||
| 2 0 obj | ||||
| << | ||||
| /Type /Pages | ||||
| /Count 1 | ||||
| /Kids [3 0 R] | ||||
| >> | ||||
| endobj | ||||
| 
 | ||||
| 3 0 obj | ||||
| << | ||||
| /Type /Page | ||||
| /Parent 2 0 R | ||||
| /Contents 6 0 R | ||||
| /MediaBox [0 0 350 100] | ||||
| /Resources 4 0 R | ||||
| >> | ||||
| endobj | ||||
| 
 | ||||
| 4 0 obj | ||||
| << | ||||
|   /Font << /F1 5 0 R >> | ||||
| >> | ||||
| endobj | ||||
| 
 | ||||
| 5 0 obj | ||||
| << | ||||
| /Type /Font | ||||
| /Subtype /Type1 | ||||
| /BaseFont /Helvetica | ||||
| /Encoding /WinAnsiEncoding | ||||
| >> | ||||
| endobj | ||||
| 
 | ||||
| 6 0 obj | ||||
| << /Length 165 >> | ||||
| stream | ||||
| BT | ||||
|   1 0 0 1 25 44 Tm | ||||
|   /F1 25 Tf | ||||
|   0 0 0 rg | ||||
|   (Abc ) Tj | ||||
|   0 0 1 RG | ||||
|   74 40 m | ||||
|   265 40 l | ||||
|   S | ||||
|   0 0 1 rg | ||||
|   (www.google.com ) Tj | ||||
|   0 0 0 rg | ||||
|   (test) Tj | ||||
| ET | ||||
| endstream | ||||
| endobj | ||||
| 
 | ||||
| xref | ||||
| 0 7 | ||||
| 0000000000 65535 f | ||||
| 0000000017 00000 n | ||||
| 0000000074 00000 n | ||||
| 0000000140 00000 n | ||||
| 0000000255 00000 n | ||||
| 0000000307 00000 n | ||||
| 0000000414 00000 n | ||||
| 
 | ||||
| trailer | ||||
| << | ||||
| /Size 7 | ||||
| /Root 1 0 R | ||||
| /ID [<281dda44e224156a5143dc0ac9d261ed> <281dda44e224156a5143dc0ac9d261ed>] | ||||
| >> | ||||
| startxref | ||||
| 638 | ||||
| %%EOF | ||||
| @ -848,6 +848,13 @@ | ||||
|        "firstPage": 2, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue10542", | ||||
|        "file": "pdfs/issue10542_reduced.pdf", | ||||
|        "md5": "92406cb903be6c7a63221ba61fcb8eaf", | ||||
|        "rounds": 1, | ||||
|        "link": false, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue6289", | ||||
|        "file": "pdfs/issue6289.pdf", | ||||
|        "md5": "0869f3d147c734ec484ffd492104095d", | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user