Attempt to detect inline images which contain "EI" sequence in the actual image data (issue 11124)
This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1] There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here. *Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere. --- [1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
This commit is contained in:
		
							parent
							
								
									276d917b7c
								
							
						
					
					
						commit
						28d2ada59c
					
				@ -203,10 +203,11 @@ class Parser {
 | 
				
			|||||||
      I = 0x49,
 | 
					      I = 0x49,
 | 
				
			||||||
      SPACE = 0x20,
 | 
					      SPACE = 0x20,
 | 
				
			||||||
      LF = 0xa,
 | 
					      LF = 0xa,
 | 
				
			||||||
      CR = 0xd;
 | 
					      CR = 0xd,
 | 
				
			||||||
    const n = 10,
 | 
					 | 
				
			||||||
      NUL = 0x0;
 | 
					      NUL = 0x0;
 | 
				
			||||||
    const startPos = stream.pos;
 | 
					    const lexer = this.lexer,
 | 
				
			||||||
 | 
					      startPos = stream.pos,
 | 
				
			||||||
 | 
					      n = 10;
 | 
				
			||||||
    let state = 0,
 | 
					    let state = 0,
 | 
				
			||||||
      ch,
 | 
					      ch,
 | 
				
			||||||
      maybeEIPos;
 | 
					      maybeEIPos;
 | 
				
			||||||
@ -243,6 +244,25 @@ class Parser {
 | 
				
			|||||||
              break;
 | 
					              break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          if (state !== 2) {
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          // Check that the "EI" sequence isn't part of the image data, since
 | 
				
			||||||
 | 
					          // that would cause the image to be truncated (fixes issue11124.pdf).
 | 
				
			||||||
 | 
					          if (lexer.knownCommands) {
 | 
				
			||||||
 | 
					            const nextObj = lexer.peekObj();
 | 
				
			||||||
 | 
					            if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
 | 
				
			||||||
 | 
					              // Not a valid command, i.e. the inline image data *itself*
 | 
				
			||||||
 | 
					              // contains an "EI" sequence. Resetting the state.
 | 
				
			||||||
 | 
					              state = 0;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          } else {
 | 
				
			||||||
 | 
					            warn(
 | 
				
			||||||
 | 
					              "findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          if (state === 2) {
 | 
					          if (state === 2) {
 | 
				
			||||||
            break; // Finished!
 | 
					            break; // Finished!
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
@ -1276,6 +1296,28 @@ class Lexer {
 | 
				
			|||||||
    return Cmd.get(str);
 | 
					    return Cmd.get(str);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  peekObj() {
 | 
				
			||||||
 | 
					    const streamPos = this.stream.pos,
 | 
				
			||||||
 | 
					      currentChar = this.currentChar,
 | 
				
			||||||
 | 
					      beginInlineImagePos = this.beginInlineImagePos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let nextObj;
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      nextObj = this.getObj();
 | 
				
			||||||
 | 
					    } catch (ex) {
 | 
				
			||||||
 | 
					      if (ex instanceof MissingDataException) {
 | 
				
			||||||
 | 
					        throw ex;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      warn(`peekObj: ${ex}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Ensure that we reset *all* relevant `Lexer`-instance state.
 | 
				
			||||||
 | 
					    this.stream.pos = streamPos;
 | 
				
			||||||
 | 
					    this.currentChar = currentChar;
 | 
				
			||||||
 | 
					    this.beginInlineImagePos = beginInlineImagePos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return nextObj;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  skipToNextLine() {
 | 
					  skipToNextLine() {
 | 
				
			||||||
    let ch = this.currentChar;
 | 
					    let ch = this.currentChar;
 | 
				
			||||||
    while (ch >= 0) {
 | 
					    while (ch >= 0) {
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							@ -254,6 +254,7 @@
 | 
				
			|||||||
!issue6336.pdf
 | 
					!issue6336.pdf
 | 
				
			||||||
!issue6387.pdf
 | 
					!issue6387.pdf
 | 
				
			||||||
!issue6410.pdf
 | 
					!issue6410.pdf
 | 
				
			||||||
 | 
					!issue11124.pdf
 | 
				
			||||||
!issue8586.pdf
 | 
					!issue8586.pdf
 | 
				
			||||||
!jbig2_symbol_offset.pdf
 | 
					!jbig2_symbol_offset.pdf
 | 
				
			||||||
!gradientfill.pdf
 | 
					!gradientfill.pdf
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										33
									
								
								test/pdfs/issue11124.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								test/pdfs/issue11124.pdf
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					%PDF-1.3
 | 
				
			||||||
 | 
					%âãÏÓ
 | 
				
			||||||
 | 
					1 0 obj<</Type/Catalog/Pages 3 0 R>>
 | 
				
			||||||
 | 
					endobj
 | 
				
			||||||
 | 
					2 0 obj<</CreationDate(D:20190906183146+02'00')/Producer(PoDoFo - http://podofo.sf.net)>>
 | 
				
			||||||
 | 
					endobj
 | 
				
			||||||
 | 
					3 0 obj<</Type/Pages/Count 1/Kids[ 4 0 R]>>
 | 
				
			||||||
 | 
					endobj
 | 
				
			||||||
 | 
					4 0 obj<</Type/Page/Contents 5 0 R/MediaBox[ 0 0 100 100]/Parent 3 0 R/Resources<</ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>>>
 | 
				
			||||||
 | 
					endobj
 | 
				
			||||||
 | 
					5 0 obj<</Length 103>>
 | 
				
			||||||
 | 
					stream
 | 
				
			||||||
 | 
					100 0 0 100 0 0 cm
 | 
				
			||||||
 | 
					BI /W 4 /H 4 /CS /RGB /BPC 8
 | 
				
			||||||
 | 
					ID
 | 
				
			||||||
 | 
					00000z0z00zzz00z0zzz0zzzEI aazazaazzzaazazzzazzz
 | 
				
			||||||
 | 
					EI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					endstream
 | 
				
			||||||
 | 
					endobj
 | 
				
			||||||
 | 
					xref
 | 
				
			||||||
 | 
					0 6
 | 
				
			||||||
 | 
					0000000000 65535 f 
 | 
				
			||||||
 | 
					0000000015 00000 n 
 | 
				
			||||||
 | 
					0000000059 00000 n 
 | 
				
			||||||
 | 
					0000000156 00000 n 
 | 
				
			||||||
 | 
					0000000207 00000 n 
 | 
				
			||||||
 | 
					0000000341 00000 n 
 | 
				
			||||||
 | 
					trailer
 | 
				
			||||||
 | 
					<</ID[<D047079C2B662F2617BF6BC31251DAB1><D047079C2B662F2617BF6BC31251DAB1>]/Info 2 0 R/Root 1 0 R/Size 6>>
 | 
				
			||||||
 | 
					startxref
 | 
				
			||||||
 | 
					492
 | 
				
			||||||
 | 
					%%EOF
 | 
				
			||||||
@ -3147,6 +3147,12 @@
 | 
				
			|||||||
       "type": "text",
 | 
					       "type": "text",
 | 
				
			||||||
       "about": "Invisible (and broken) TrueType font used for text-selection."
 | 
					       "about": "Invisible (and broken) TrueType font used for text-selection."
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    {  "id": "issue11124",
 | 
				
			||||||
 | 
					       "file": "pdfs/issue11124.pdf",
 | 
				
			||||||
 | 
					       "md5": "9bde831515dc6b8bb2c7c00c8189aca9",
 | 
				
			||||||
 | 
					       "rounds": 1,
 | 
				
			||||||
 | 
					       "type": "eq"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {  "id": "issue11768",
 | 
					    {  "id": "issue11768",
 | 
				
			||||||
       "file": "pdfs/issue11768_reduced.pdf",
 | 
					       "file": "pdfs/issue11768_reduced.pdf",
 | 
				
			||||||
       "md5": "0cafde97d78bb6883531a325a996a5ef",
 | 
					       "md5": "0cafde97d78bb6883531a325a996a5ef",
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user