Merge pull request #11523 from Snuffleupagus/issue-10880
Add a heuristic, in `src/core/jpg.js`, to handle JPEG images with a wildly incorrect SOF (Start of Frame) `scanLines` parameter (issue 10880)
This commit is contained in:
		
						commit
						1a97c142b3
					
				@ -512,7 +512,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 | 
				
			|||||||
          this.xref,
 | 
					          this.xref,
 | 
				
			||||||
          resources,
 | 
					          resources,
 | 
				
			||||||
          this.pdfFunctionFactory
 | 
					          this.pdfFunctionFactory
 | 
				
			||||||
        )
 | 
					        ) &&
 | 
				
			||||||
 | 
					        image.maybeValidDimensions
 | 
				
			||||||
      ) {
 | 
					      ) {
 | 
				
			||||||
        // These JPEGs don't need any more processing so we can just send it.
 | 
					        // These JPEGs don't need any more processing so we can just send it.
 | 
				
			||||||
        return this.handler
 | 
					        return this.handler
 | 
				
			||||||
 | 
				
			|||||||
@ -41,7 +41,8 @@ class NativeImageDecoder {
 | 
				
			|||||||
        this.xref,
 | 
					        this.xref,
 | 
				
			||||||
        this.resources,
 | 
					        this.resources,
 | 
				
			||||||
        this.pdfFunctionFactory
 | 
					        this.pdfFunctionFactory
 | 
				
			||||||
      )
 | 
					      ) &&
 | 
				
			||||||
 | 
					      image.maybeValidDimensions
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -109,6 +109,135 @@ const JpegStream = (function JpegStreamClosure() {
 | 
				
			|||||||
    this.eof = true;
 | 
					    this.eof = true;
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
 | 
				
			||||||
 | 
					    get: function JpegStream_maybeValidDimensions() {
 | 
				
			||||||
 | 
					      const { dict, stream } = this;
 | 
				
			||||||
 | 
					      const dictHeight = dict.get("Height", "H");
 | 
				
			||||||
 | 
					      const startPos = stream.pos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      let validDimensions = true,
 | 
				
			||||||
 | 
					        foundSOF = false,
 | 
				
			||||||
 | 
					        b;
 | 
				
			||||||
 | 
					      while ((b = stream.getByte()) !== -1) {
 | 
				
			||||||
 | 
					        if (b !== 0xff) {
 | 
				
			||||||
 | 
					          // Not a valid marker.
 | 
				
			||||||
 | 
					          continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        switch (stream.getByte()) {
 | 
				
			||||||
 | 
					          case 0xc0: // SOF0
 | 
				
			||||||
 | 
					          case 0xc1: // SOF1
 | 
				
			||||||
 | 
					          case 0xc2: // SOF2
 | 
				
			||||||
 | 
					            // These three SOF{n} markers are the only ones that the built-in
 | 
				
			||||||
 | 
					            // PDF.js JPEG decoder currently supports.
 | 
				
			||||||
 | 
					            foundSOF = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            stream.pos += 2; // Skip marker length.
 | 
				
			||||||
 | 
					            stream.pos += 1; // Skip precision.
 | 
				
			||||||
 | 
					            const scanLines = stream.getUint16();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // The "normal" case, where the image data and dictionary agrees.
 | 
				
			||||||
 | 
					            if (scanLines === dictHeight) {
 | 
				
			||||||
 | 
					              break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            // A DNL (Define Number of Lines) marker is expected,
 | 
				
			||||||
 | 
					            // which browsers (usually) cannot decode natively.
 | 
				
			||||||
 | 
					            if (scanLines === 0) {
 | 
				
			||||||
 | 
					              validDimensions = false;
 | 
				
			||||||
 | 
					              break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            // The dimensions of the image, among other properties, should
 | 
				
			||||||
 | 
					            // always be taken from the image data *itself* rather than the
 | 
				
			||||||
 | 
					            // XObject dictionary. However there's cases of corrupt images that
 | 
				
			||||||
 | 
					            // browsers cannot decode natively, for example:
 | 
				
			||||||
 | 
					            //  - JPEG images with DNL markers, where the SOF `scanLines`
 | 
				
			||||||
 | 
					            //    parameter has an unexpected value (see issue 8614).
 | 
				
			||||||
 | 
					            //  - JPEG images with too large SOF `scanLines` parameter, where
 | 
				
			||||||
 | 
					            //    the EOI marker is encountered prematurely (see issue 10880).
 | 
				
			||||||
 | 
					            // In an attempt to handle these kinds of corrupt images, compare
 | 
				
			||||||
 | 
					            // the dimensions in the image data with the dictionary and *always*
 | 
				
			||||||
 | 
					            // let the PDF.js JPEG decoder (rather than the browser) handle the
 | 
				
			||||||
 | 
					            // image if the difference is larger than one order of magnitude
 | 
				
			||||||
 | 
					            // (since that would generally suggest that something is off).
 | 
				
			||||||
 | 
					            if (scanLines > dictHeight * 10) {
 | 
				
			||||||
 | 
					              validDimensions = false;
 | 
				
			||||||
 | 
					              break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          case 0xc3: // SOF3
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xc5: // SOF5
 | 
				
			||||||
 | 
					          case 0xc6: // SOF6
 | 
				
			||||||
 | 
					          case 0xc7: // SOF7
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xc9: // SOF9
 | 
				
			||||||
 | 
					          case 0xca: // SOF10
 | 
				
			||||||
 | 
					          case 0xcb: // SOF11
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xcd: // SOF13
 | 
				
			||||||
 | 
					          case 0xce: // SOF14
 | 
				
			||||||
 | 
					          case 0xcf: // SOF15
 | 
				
			||||||
 | 
					            foundSOF = true;
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          case 0xc4: // DHT
 | 
				
			||||||
 | 
					          case 0xcc: // DAC
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xda: // SOS
 | 
				
			||||||
 | 
					          case 0xdb: // DQT
 | 
				
			||||||
 | 
					          case 0xdc: // DNL
 | 
				
			||||||
 | 
					          case 0xdd: // DRI
 | 
				
			||||||
 | 
					          case 0xde: // DHP
 | 
				
			||||||
 | 
					          case 0xdf: // EXP
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xe0: // APP0
 | 
				
			||||||
 | 
					          case 0xe1: // APP1
 | 
				
			||||||
 | 
					          case 0xe2: // APP2
 | 
				
			||||||
 | 
					          case 0xe3: // APP3
 | 
				
			||||||
 | 
					          case 0xe4: // APP4
 | 
				
			||||||
 | 
					          case 0xe5: // APP5
 | 
				
			||||||
 | 
					          case 0xe6: // APP6
 | 
				
			||||||
 | 
					          case 0xe7: // APP7
 | 
				
			||||||
 | 
					          case 0xe8: // APP8
 | 
				
			||||||
 | 
					          case 0xe9: // APP9
 | 
				
			||||||
 | 
					          case 0xea: // APP10
 | 
				
			||||||
 | 
					          case 0xeb: // APP11
 | 
				
			||||||
 | 
					          case 0xec: // APP12
 | 
				
			||||||
 | 
					          case 0xed: // APP13
 | 
				
			||||||
 | 
					          case 0xee: // APP14
 | 
				
			||||||
 | 
					          case 0xef: // APP15
 | 
				
			||||||
 | 
					          /* falls through */
 | 
				
			||||||
 | 
					          case 0xfe: // COM
 | 
				
			||||||
 | 
					            const markerLength = stream.getUint16();
 | 
				
			||||||
 | 
					            if (markerLength > 2) {
 | 
				
			||||||
 | 
					              stream.skip(markerLength - 2); // Jump to the next marker.
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					              // The marker length is invalid, resetting the stream position.
 | 
				
			||||||
 | 
					              stream.skip(-2);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          case 0xff: // Fill byte.
 | 
				
			||||||
 | 
					            // Avoid skipping a valid marker, resetting the stream position.
 | 
				
			||||||
 | 
					            stream.skip(-1);
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          case 0xd9: // EOI
 | 
				
			||||||
 | 
					            foundSOF = true;
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (foundSOF) {
 | 
				
			||||||
 | 
					          break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      // Finally, don't forget to reset the stream position.
 | 
				
			||||||
 | 
					      stream.pos = startPos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return shadow(this, "maybeValidDimensions", validDimensions);
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    configurable: true,
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  JpegStream.prototype.getIR = function(forceDataSchema = false) {
 | 
					  JpegStream.prototype.getIR = function(forceDataSchema = false) {
 | 
				
			||||||
    return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
 | 
					    return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
				
			|||||||
@ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() {
 | 
				
			|||||||
      if (bitsData === 0xff) {
 | 
					      if (bitsData === 0xff) {
 | 
				
			||||||
        var nextByte = data[offset++];
 | 
					        var nextByte = data[offset++];
 | 
				
			||||||
        if (nextByte) {
 | 
					        if (nextByte) {
 | 
				
			||||||
          if (nextByte === 0xdc && parseDNLMarker) {
 | 
					          if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) {
 | 
				
			||||||
            offset += 2; // Skip marker length.
 | 
					            offset += 2; // Skip marker length.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            const scanLines = readUint16(data, offset);
 | 
					            const scanLines = readUint16(data, offset);
 | 
				
			||||||
@ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() {
 | 
				
			|||||||
                scanLines
 | 
					                scanLines
 | 
				
			||||||
              );
 | 
					              );
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          } else if (nextByte === 0xd9) {
 | 
					          } else if (nextByte === /* EOI = */ 0xd9) {
 | 
				
			||||||
 | 
					            if (parseDNLMarker) {
 | 
				
			||||||
 | 
					              // NOTE: only 8-bit JPEG images are supported in this decoder.
 | 
				
			||||||
 | 
					              const maybeScanLines = blockRow * 8;
 | 
				
			||||||
 | 
					              // Heuristic to attempt to handle corrupt JPEG images with too
 | 
				
			||||||
 | 
					              // large `scanLines` parameter, by falling back to the currently
 | 
				
			||||||
 | 
					              // parsed number of scanLines when it's at least one order of
 | 
				
			||||||
 | 
					              // magnitude smaller than expected (fixes issue10880.pdf).
 | 
				
			||||||
 | 
					              if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) {
 | 
				
			||||||
 | 
					                throw new DNLMarkerError(
 | 
				
			||||||
 | 
					                  "Found EOI marker (0xFFD9) while parsing scan data, " +
 | 
				
			||||||
 | 
					                    "possibly caused by incorrect `scanLines` parameter",
 | 
				
			||||||
 | 
					                  maybeScanLines
 | 
				
			||||||
 | 
					                );
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
            throw new EOIMarkerError(
 | 
					            throw new EOIMarkerError(
 | 
				
			||||||
              "Found EOI marker (0xFFD9) while parsing scan data"
 | 
					              "Found EOI marker (0xFFD9) while parsing scan data"
 | 
				
			||||||
            );
 | 
					            );
 | 
				
			||||||
@ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() {
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let blockRow = 0;
 | 
				
			||||||
    function decodeMcu(component, decode, mcu, row, col) {
 | 
					    function decodeMcu(component, decode, mcu, row, col) {
 | 
				
			||||||
      var mcuRow = (mcu / mcusPerLine) | 0;
 | 
					      var mcuRow = (mcu / mcusPerLine) | 0;
 | 
				
			||||||
      var mcuCol = mcu % mcusPerLine;
 | 
					      var mcuCol = mcu % mcusPerLine;
 | 
				
			||||||
      var blockRow = mcuRow * component.v + row;
 | 
					      blockRow = mcuRow * component.v + row;
 | 
				
			||||||
      var blockCol = mcuCol * component.h + col;
 | 
					      var blockCol = mcuCol * component.h + col;
 | 
				
			||||||
      var offset = getBlockBufferOffset(component, blockRow, blockCol);
 | 
					      var offset = getBlockBufferOffset(component, blockRow, blockCol);
 | 
				
			||||||
      decode(component, offset);
 | 
					      decode(component, offset);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    function decodeBlock(component, decode, mcu) {
 | 
					    function decodeBlock(component, decode, mcu) {
 | 
				
			||||||
      var blockRow = (mcu / component.blocksPerLine) | 0;
 | 
					      blockRow = (mcu / component.blocksPerLine) | 0;
 | 
				
			||||||
      var blockCol = mcu % component.blocksPerLine;
 | 
					      var blockCol = mcu % component.blocksPerLine;
 | 
				
			||||||
      var offset = getBlockBufferOffset(component, blockRow, blockCol);
 | 
					      var offset = getBlockBufferOffset(component, blockRow, blockCol);
 | 
				
			||||||
      decode(component, offset);
 | 
					      decode(component, offset);
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										1
									
								
								test/pdfs/issue10880.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/issue10880.pdf.link
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf
 | 
				
			||||||
@ -3634,6 +3634,15 @@
 | 
				
			|||||||
       "lastPage": 1,
 | 
					       "lastPage": 1,
 | 
				
			||||||
       "type": "eq"
 | 
					       "type": "eq"
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    {  "id": "issue10880",
 | 
				
			||||||
 | 
					       "file": "pdfs/issue10880.pdf",
 | 
				
			||||||
 | 
					       "md5": "244ee5ee3ab88db8d8eb51d4416e2c97",
 | 
				
			||||||
 | 
					       "rounds": 1,
 | 
				
			||||||
 | 
					       "link": true,
 | 
				
			||||||
 | 
					       "firstPage": 7,
 | 
				
			||||||
 | 
					       "lastPage": 7,
 | 
				
			||||||
 | 
					       "type": "eq"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {  "id": "issue9650",
 | 
					    {  "id": "issue9650",
 | 
				
			||||||
       "file": "pdfs/issue9650.pdf",
 | 
					       "file": "pdfs/issue9650.pdf",
 | 
				
			||||||
       "md5": "20d50bda6b1080b6d9088811299c791e",
 | 
					       "md5": "20d50bda6b1080b6d9088811299c791e",
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user