Merge pull request #11523 from Snuffleupagus/issue-10880
Add a heuristic, in `src/core/jpg.js`, to handle JPEG images with a wildly incorrect SOF (Start of Frame) `scanLines` parameter (issue 10880)
This commit is contained in:
		
						commit
						1a97c142b3
					
				| @ -512,7 +512,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { | |||||||
|           this.xref, |           this.xref, | ||||||
|           resources, |           resources, | ||||||
|           this.pdfFunctionFactory |           this.pdfFunctionFactory | ||||||
|         ) |         ) && | ||||||
|  |         image.maybeValidDimensions | ||||||
|       ) { |       ) { | ||||||
|         // These JPEGs don't need any more processing so we can just send it.
 |         // These JPEGs don't need any more processing so we can just send it.
 | ||||||
|         return this.handler |         return this.handler | ||||||
|  | |||||||
| @ -41,7 +41,8 @@ class NativeImageDecoder { | |||||||
|         this.xref, |         this.xref, | ||||||
|         this.resources, |         this.resources, | ||||||
|         this.pdfFunctionFactory |         this.pdfFunctionFactory | ||||||
|       ) |       ) && | ||||||
|  |       image.maybeValidDimensions | ||||||
|     ); |     ); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -109,6 +109,135 @@ const JpegStream = (function JpegStreamClosure() { | |||||||
|     this.eof = true; |     this.eof = true; | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|  |   Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", { | ||||||
|  |     get: function JpegStream_maybeValidDimensions() { | ||||||
|  |       const { dict, stream } = this; | ||||||
|  |       const dictHeight = dict.get("Height", "H"); | ||||||
|  |       const startPos = stream.pos; | ||||||
|  | 
 | ||||||
|  |       let validDimensions = true, | ||||||
|  |         foundSOF = false, | ||||||
|  |         b; | ||||||
|  |       while ((b = stream.getByte()) !== -1) { | ||||||
|  |         if (b !== 0xff) { | ||||||
|  |           // Not a valid marker.
 | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  |         switch (stream.getByte()) { | ||||||
|  |           case 0xc0: // SOF0
 | ||||||
|  |           case 0xc1: // SOF1
 | ||||||
|  |           case 0xc2: // SOF2
 | ||||||
|  |             // These three SOF{n} markers are the only ones that the built-in
 | ||||||
|  |             // PDF.js JPEG decoder currently supports.
 | ||||||
|  |             foundSOF = true; | ||||||
|  | 
 | ||||||
|  |             stream.pos += 2; // Skip marker length.
 | ||||||
|  |             stream.pos += 1; // Skip precision.
 | ||||||
|  |             const scanLines = stream.getUint16(); | ||||||
|  | 
 | ||||||
|  |             // The "normal" case, where the image data and dictionary agrees.
 | ||||||
|  |             if (scanLines === dictHeight) { | ||||||
|  |               break; | ||||||
|  |             } | ||||||
|  |             // A DNL (Define Number of Lines) marker is expected,
 | ||||||
|  |             // which browsers (usually) cannot decode natively.
 | ||||||
|  |             if (scanLines === 0) { | ||||||
|  |               validDimensions = false; | ||||||
|  |               break; | ||||||
|  |             } | ||||||
|  |             // The dimensions of the image, among other properties, should
 | ||||||
|  |             // always be taken from the image data *itself* rather than the
 | ||||||
|  |             // XObject dictionary. However there's cases of corrupt images that
 | ||||||
|  |             // browsers cannot decode natively, for example:
 | ||||||
|  |             //  - JPEG images with DNL markers, where the SOF `scanLines`
 | ||||||
|  |             //    parameter has an unexpected value (see issue 8614).
 | ||||||
|  |             //  - JPEG images with too large SOF `scanLines` parameter, where
 | ||||||
|  |             //    the EOI marker is encountered prematurely (see issue 10880).
 | ||||||
|  |             // In an attempt to handle these kinds of corrupt images, compare
 | ||||||
|  |             // the dimensions in the image data with the dictionary and *always*
 | ||||||
|  |             // let the PDF.js JPEG decoder (rather than the browser) handle the
 | ||||||
|  |             // image if the difference is larger than one order of magnitude
 | ||||||
|  |             // (since that would generally suggest that something is off).
 | ||||||
|  |             if (scanLines > dictHeight * 10) { | ||||||
|  |               validDimensions = false; | ||||||
|  |               break; | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |           case 0xc3: // SOF3
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xc5: // SOF5
 | ||||||
|  |           case 0xc6: // SOF6
 | ||||||
|  |           case 0xc7: // SOF7
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xc9: // SOF9
 | ||||||
|  |           case 0xca: // SOF10
 | ||||||
|  |           case 0xcb: // SOF11
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xcd: // SOF13
 | ||||||
|  |           case 0xce: // SOF14
 | ||||||
|  |           case 0xcf: // SOF15
 | ||||||
|  |             foundSOF = true; | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |           case 0xc4: // DHT
 | ||||||
|  |           case 0xcc: // DAC
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xda: // SOS
 | ||||||
|  |           case 0xdb: // DQT
 | ||||||
|  |           case 0xdc: // DNL
 | ||||||
|  |           case 0xdd: // DRI
 | ||||||
|  |           case 0xde: // DHP
 | ||||||
|  |           case 0xdf: // EXP
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xe0: // APP0
 | ||||||
|  |           case 0xe1: // APP1
 | ||||||
|  |           case 0xe2: // APP2
 | ||||||
|  |           case 0xe3: // APP3
 | ||||||
|  |           case 0xe4: // APP4
 | ||||||
|  |           case 0xe5: // APP5
 | ||||||
|  |           case 0xe6: // APP6
 | ||||||
|  |           case 0xe7: // APP7
 | ||||||
|  |           case 0xe8: // APP8
 | ||||||
|  |           case 0xe9: // APP9
 | ||||||
|  |           case 0xea: // APP10
 | ||||||
|  |           case 0xeb: // APP11
 | ||||||
|  |           case 0xec: // APP12
 | ||||||
|  |           case 0xed: // APP13
 | ||||||
|  |           case 0xee: // APP14
 | ||||||
|  |           case 0xef: // APP15
 | ||||||
|  |           /* falls through */ | ||||||
|  |           case 0xfe: // COM
 | ||||||
|  |             const markerLength = stream.getUint16(); | ||||||
|  |             if (markerLength > 2) { | ||||||
|  |               stream.skip(markerLength - 2); // Jump to the next marker.
 | ||||||
|  |             } else { | ||||||
|  |               // The marker length is invalid, resetting the stream position.
 | ||||||
|  |               stream.skip(-2); | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |           case 0xff: // Fill byte.
 | ||||||
|  |             // Avoid skipping a valid marker, resetting the stream position.
 | ||||||
|  |             stream.skip(-1); | ||||||
|  |             break; | ||||||
|  | 
 | ||||||
|  |           case 0xd9: // EOI
 | ||||||
|  |             foundSOF = true; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         if (foundSOF) { | ||||||
|  |           break; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |       // Finally, don't forget to reset the stream position.
 | ||||||
|  |       stream.pos = startPos; | ||||||
|  | 
 | ||||||
|  |       return shadow(this, "maybeValidDimensions", validDimensions); | ||||||
|  |     }, | ||||||
|  |     configurable: true, | ||||||
|  |   }); | ||||||
|  | 
 | ||||||
|   JpegStream.prototype.getIR = function(forceDataSchema = false) { |   JpegStream.prototype.getIR = function(forceDataSchema = false) { | ||||||
|     return createObjectURL(this.bytes, "image/jpeg", forceDataSchema); |     return createObjectURL(this.bytes, "image/jpeg", forceDataSchema); | ||||||
|   }; |   }; | ||||||
|  | |||||||
| @ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() { | |||||||
|       if (bitsData === 0xff) { |       if (bitsData === 0xff) { | ||||||
|         var nextByte = data[offset++]; |         var nextByte = data[offset++]; | ||||||
|         if (nextByte) { |         if (nextByte) { | ||||||
|           if (nextByte === 0xdc && parseDNLMarker) { |           if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) { | ||||||
|             offset += 2; // Skip marker length.
 |             offset += 2; // Skip marker length.
 | ||||||
| 
 | 
 | ||||||
|             const scanLines = readUint16(data, offset); |             const scanLines = readUint16(data, offset); | ||||||
| @ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() { | |||||||
|                 scanLines |                 scanLines | ||||||
|               ); |               ); | ||||||
|             } |             } | ||||||
|           } else if (nextByte === 0xd9) { |           } else if (nextByte === /* EOI = */ 0xd9) { | ||||||
|  |             if (parseDNLMarker) { | ||||||
|  |               // NOTE: only 8-bit JPEG images are supported in this decoder.
 | ||||||
|  |               const maybeScanLines = blockRow * 8; | ||||||
|  |               // Heuristic to attempt to handle corrupt JPEG images with too
 | ||||||
|  |               // large `scanLines` parameter, by falling back to the currently
 | ||||||
|  |               // parsed number of scanLines when it's at least one order of
 | ||||||
|  |               // magnitude smaller than expected (fixes issue10880.pdf).
 | ||||||
|  |               if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) { | ||||||
|  |                 throw new DNLMarkerError( | ||||||
|  |                   "Found EOI marker (0xFFD9) while parsing scan data, " + | ||||||
|  |                     "possibly caused by incorrect `scanLines` parameter", | ||||||
|  |                   maybeScanLines | ||||||
|  |                 ); | ||||||
|  |               } | ||||||
|  |             } | ||||||
|             throw new EOIMarkerError( |             throw new EOIMarkerError( | ||||||
|               "Found EOI marker (0xFFD9) while parsing scan data" |               "Found EOI marker (0xFFD9) while parsing scan data" | ||||||
|             ); |             ); | ||||||
| @ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     let blockRow = 0; | ||||||
|     function decodeMcu(component, decode, mcu, row, col) { |     function decodeMcu(component, decode, mcu, row, col) { | ||||||
|       var mcuRow = (mcu / mcusPerLine) | 0; |       var mcuRow = (mcu / mcusPerLine) | 0; | ||||||
|       var mcuCol = mcu % mcusPerLine; |       var mcuCol = mcu % mcusPerLine; | ||||||
|       var blockRow = mcuRow * component.v + row; |       blockRow = mcuRow * component.v + row; | ||||||
|       var blockCol = mcuCol * component.h + col; |       var blockCol = mcuCol * component.h + col; | ||||||
|       var offset = getBlockBufferOffset(component, blockRow, blockCol); |       var offset = getBlockBufferOffset(component, blockRow, blockCol); | ||||||
|       decode(component, offset); |       decode(component, offset); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     function decodeBlock(component, decode, mcu) { |     function decodeBlock(component, decode, mcu) { | ||||||
|       var blockRow = (mcu / component.blocksPerLine) | 0; |       blockRow = (mcu / component.blocksPerLine) | 0; | ||||||
|       var blockCol = mcu % component.blocksPerLine; |       var blockCol = mcu % component.blocksPerLine; | ||||||
|       var offset = getBlockBufferOffset(component, blockRow, blockCol); |       var offset = getBlockBufferOffset(component, blockRow, blockCol); | ||||||
|       decode(component, offset); |       decode(component, offset); | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								test/pdfs/issue10880.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/issue10880.pdf.link
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | |||||||
|  | https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf | ||||||
| @ -3634,6 +3634,15 @@ | |||||||
|        "lastPage": 1, |        "lastPage": 1, | ||||||
|        "type": "eq" |        "type": "eq" | ||||||
|     }, |     }, | ||||||
|  |     {  "id": "issue10880", | ||||||
|  |        "file": "pdfs/issue10880.pdf", | ||||||
|  |        "md5": "244ee5ee3ab88db8d8eb51d4416e2c97", | ||||||
|  |        "rounds": 1, | ||||||
|  |        "link": true, | ||||||
|  |        "firstPage": 7, | ||||||
|  |        "lastPage": 7, | ||||||
|  |        "type": "eq" | ||||||
|  |     }, | ||||||
|     {  "id": "issue9650", |     {  "id": "issue9650", | ||||||
|        "file": "pdfs/issue9650.pdf", |        "file": "pdfs/issue9650.pdf", | ||||||
|        "md5": "20d50bda6b1080b6d9088811299c791e", |        "md5": "20d50bda6b1080b6d9088811299c791e", | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user