Merge pull request #16461 from Snuffleupagus/issue-16454
Improve "EI" detection in inline images (PR 12028 follow-up, issue 16454)
This commit is contained in:
		
						commit
						a6f9505a39
					
				@ -26,6 +26,7 @@ import {
 | 
			
		||||
  MissingDataException,
 | 
			
		||||
  ParserEOFException,
 | 
			
		||||
} from "./core_utils.js";
 | 
			
		||||
import { NullStream, Stream } from "./stream.js";
 | 
			
		||||
import { Ascii85Stream } from "./ascii_85_stream.js";
 | 
			
		||||
import { AsciiHexStream } from "./ascii_hex_stream.js";
 | 
			
		||||
import { CCITTFaxStream } from "./ccitt_stream.js";
 | 
			
		||||
@ -34,7 +35,6 @@ import { Jbig2Stream } from "./jbig2_stream.js";
 | 
			
		||||
import { JpegStream } from "./jpeg_stream.js";
 | 
			
		||||
import { JpxStream } from "./jpx_stream.js";
 | 
			
		||||
import { LZWStream } from "./lzw_stream.js";
 | 
			
		||||
import { NullStream } from "./stream.js";
 | 
			
		||||
import { PredictorStream } from "./predictor_stream.js";
 | 
			
		||||
import { RunLengthStream } from "./run_length_stream.js";
 | 
			
		||||
 | 
			
		||||
@ -190,9 +190,9 @@ class Parser {
 | 
			
		||||
      LF = 0xa,
 | 
			
		||||
      CR = 0xd,
 | 
			
		||||
      NUL = 0x0;
 | 
			
		||||
    const lexer = this.lexer,
 | 
			
		||||
    const { knownCommands } = this.lexer,
 | 
			
		||||
      startPos = stream.pos,
 | 
			
		||||
      n = 10;
 | 
			
		||||
      n = 15;
 | 
			
		||||
    let state = 0,
 | 
			
		||||
      ch,
 | 
			
		||||
      maybeEIPos;
 | 
			
		||||
@ -209,7 +209,12 @@ class Parser {
 | 
			
		||||
          maybeEIPos = stream.pos;
 | 
			
		||||
          // Let's check that the next `n` bytes are ASCII... just to be sure.
 | 
			
		||||
          const followingBytes = stream.peekBytes(n);
 | 
			
		||||
          for (let i = 0, ii = followingBytes.length; i < ii; i++) {
 | 
			
		||||
 | 
			
		||||
          const ii = followingBytes.length;
 | 
			
		||||
          if (ii === 0) {
 | 
			
		||||
            break; // The end of the stream was reached, nothing to check.
 | 
			
		||||
          }
 | 
			
		||||
          for (let i = 0; i < ii; i++) {
 | 
			
		||||
            ch = followingBytes[i];
 | 
			
		||||
            if (ch === NUL && followingBytes[i + 1] !== NUL) {
 | 
			
		||||
              // NUL bytes are not supposed to occur *outside* of inline
 | 
			
		||||
@ -235,19 +240,47 @@ class Parser {
 | 
			
		||||
          if (state !== 2) {
 | 
			
		||||
            continue;
 | 
			
		||||
          }
 | 
			
		||||
          // Check that the "EI" sequence isn't part of the image data, since
 | 
			
		||||
          // that would cause the image to be truncated (fixes issue11124.pdf).
 | 
			
		||||
          if (lexer.knownCommands) {
 | 
			
		||||
            const nextObj = lexer.peekObj();
 | 
			
		||||
            if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
 | 
			
		||||
              // Not a valid command, i.e. the inline image data *itself*
 | 
			
		||||
              // contains an "EI" sequence. Resetting the state.
 | 
			
		||||
              state = 0;
 | 
			
		||||
            }
 | 
			
		||||
          } else {
 | 
			
		||||
          if (!knownCommands) {
 | 
			
		||||
            warn(
 | 
			
		||||
              "findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
 | 
			
		||||
            );
 | 
			
		||||
            continue;
 | 
			
		||||
          }
 | 
			
		||||
          // Check that the "EI" sequence isn't part of the image data, since
 | 
			
		||||
          // that would cause the image to be truncated (fixes issue11124.pdf).
 | 
			
		||||
          const tmpLexer = new Lexer(
 | 
			
		||||
            new Stream(followingBytes.slice()),
 | 
			
		||||
            knownCommands
 | 
			
		||||
          );
 | 
			
		||||
          // Reduce the number of (potential) warning messages.
 | 
			
		||||
          tmpLexer._hexStringWarn = () => {};
 | 
			
		||||
          let numArgs = 0;
 | 
			
		||||
 | 
			
		||||
          while (true) {
 | 
			
		||||
            const nextObj = tmpLexer.getObj();
 | 
			
		||||
 | 
			
		||||
            if (nextObj === EOF) {
 | 
			
		||||
              state = 0; // No valid command found, resetting the state.
 | 
			
		||||
              break;
 | 
			
		||||
            }
 | 
			
		||||
            if (nextObj instanceof Cmd) {
 | 
			
		||||
              const knownCommand = knownCommands[nextObj.cmd];
 | 
			
		||||
              if (!knownCommand) {
 | 
			
		||||
                // Not a valid command, i.e. the inline image data *itself*
 | 
			
		||||
                // contains an "EI" sequence. Resetting the state.
 | 
			
		||||
                state = 0;
 | 
			
		||||
                break;
 | 
			
		||||
              } else if (
 | 
			
		||||
                knownCommand.variableArgs
 | 
			
		||||
                  ? numArgs <= knownCommand.numArgs
 | 
			
		||||
                  : numArgs === knownCommand.numArgs
 | 
			
		||||
              ) {
 | 
			
		||||
                break; // Valid command found.
 | 
			
		||||
              }
 | 
			
		||||
              numArgs = 0;
 | 
			
		||||
              continue;
 | 
			
		||||
            }
 | 
			
		||||
            numArgs++;
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if (state === 2) {
 | 
			
		||||
@ -1284,28 +1317,6 @@ class Lexer {
 | 
			
		||||
    return Cmd.get(str);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  peekObj() {
 | 
			
		||||
    const streamPos = this.stream.pos,
 | 
			
		||||
      currentChar = this.currentChar,
 | 
			
		||||
      beginInlineImagePos = this.beginInlineImagePos;
 | 
			
		||||
 | 
			
		||||
    let nextObj;
 | 
			
		||||
    try {
 | 
			
		||||
      nextObj = this.getObj();
 | 
			
		||||
    } catch (ex) {
 | 
			
		||||
      if (ex instanceof MissingDataException) {
 | 
			
		||||
        throw ex;
 | 
			
		||||
      }
 | 
			
		||||
      warn(`peekObj: ${ex}`);
 | 
			
		||||
    }
 | 
			
		||||
    // Ensure that we reset *all* relevant `Lexer`-instance state.
 | 
			
		||||
    this.stream.pos = streamPos;
 | 
			
		||||
    this.currentChar = currentChar;
 | 
			
		||||
    this.beginInlineImagePos = beginInlineImagePos;
 | 
			
		||||
 | 
			
		||||
    return nextObj;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  skipToNextLine() {
 | 
			
		||||
    let ch = this.currentChar;
 | 
			
		||||
    while (ch >= 0) {
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								test/pdfs/issue16454.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/issue16454.pdf.link
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
https://github.com/mozilla/pdf.js/files/11537582/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf
 | 
			
		||||
@ -5972,6 +5972,14 @@
 | 
			
		||||
       "link": true,
 | 
			
		||||
       "type": "eq"
 | 
			
		||||
    },
 | 
			
		||||
    {  "id": "issue16454",
 | 
			
		||||
       "file": "pdfs/issue16454.pdf",
 | 
			
		||||
       "md5": "82fe0c54a96667472ce999be7a789199",
 | 
			
		||||
       "rounds": 1,
 | 
			
		||||
       "lastPage": 1,
 | 
			
		||||
       "link": true,
 | 
			
		||||
       "type": "eq"
 | 
			
		||||
    },
 | 
			
		||||
    {  "id": "decodeACSuccessive",
 | 
			
		||||
       "file": "pdfs/decodeACSuccessive.pdf",
 | 
			
		||||
       "md5": "7749c032624fe27ab8e8d7d5e9a4a93f",
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user