Merge pull request #10635 from timvandermeij/lexer-parser

Convert `src/core/parser.js` to ES6 syntax and write more unit tests for the lexer and the parser
2019-03-19 23:17:34 +01:00 · 2019-03-19 23:17:34 +01:00 · 33bfbef6ba
commit 33bfbef6ba
parent ee3cfb7986 4a4b197b9d
2 changed files with 1238 additions and 1167 deletions
--- a/src/core/parser.js
+++ b/src/core/parser.js
@ -12,6 +12,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+/* eslint no-var: error */

 import {
  Ascii85Stream, AsciiHexStream, FlateStream, LZWStream, NullStream,
@ -34,7 +35,7 @@ const MAX_LENGTH_TO_CACHE = 1000;
 const MAX_ADLER32_LENGTH = 5552;

 function computeAdler32(bytes) {
-  let bytesLength = bytes.length;
+  const bytesLength = bytes.length;
  if (typeof PDFJSDev === 'undefined' ||
      PDFJSDev.test('!PRODUCTION || TESTING')) {
    assert(bytesLength < MAX_ADLER32_LENGTH,
@ -49,22 +50,23 @@ function computeAdler32(bytes) {
  return ((b % 65521) << 16) | (a % 65521);
 }

-var Parser = (function ParserClosure() {
-  function Parser(lexer, allowStreams, xref, recoveryMode) {
+class Parser {
+  constructor(lexer, allowStreams, xref, recoveryMode = false) {
    this.lexer = lexer;
    this.allowStreams = allowStreams;
    this.xref = xref;
-    this.recoveryMode = recoveryMode || false;
+    this.recoveryMode = recoveryMode;
+
    this.imageCache = Object.create(null);
    this.refill();
  }

-  Parser.prototype = {
-    refill: function Parser_refill() {
+  refill() {
    this.buf1 = this.lexer.getObj();
    this.buf2 = this.lexer.getObj();
-    },
-    shift: function Parser_shift() {
+  }
+
+  shift() {
    if (isCmd(this.buf2, 'ID')) {
      this.buf1 = this.buf2;
      this.buf2 = null;
@ -72,8 +74,9 @@ var Parser = (function ParserClosure() {
      this.buf1 = this.buf2;
      this.buf2 = this.lexer.getObj();
    }
-    },
-    tryShift: function Parser_tryShift() {
+  }
+
+  tryShift() {
    try {
      this.shift();
      return true;
@ -85,9 +88,10 @@ var Parser = (function ParserClosure() {
      // state and call this.shift() twice to reset the buffers.
      return false;
    }
-    },
-    getObj: function Parser_getObj(cipherTransform) {
-      var buf1 = this.buf1;
+  }
+
+  getObj(cipherTransform) {
+    const buf1 = this.buf1;
    this.shift();

    if (buf1 instanceof Cmd) {
@ -95,7 +99,7 @@ var Parser = (function ParserClosure() {
        case 'BI': // inline image
          return this.makeInlineImage(cipherTransform);
        case '[': // array
-            var array = [];
+          const array = [];
          while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
            array.push(this.getObj(cipherTransform));
          }
@ -108,7 +112,7 @@ var Parser = (function ParserClosure() {
          this.shift();
          return array;
        case '<<': // dictionary or stream
-            var dict = new Dict(this.xref);
+          const dict = new Dict(this.xref);
          while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
            if (!isName(this.buf1)) {
              info('Malformed dictionary: key must be a name object');
@ -116,7 +120,7 @@ var Parser = (function ParserClosure() {
              continue;
            }

-              var key = this.buf1.name;
+            const key = this.buf1.name;
            this.shift();
            if (isEOF(this.buf1)) {
              break;
@ -144,9 +148,9 @@ var Parser = (function ParserClosure() {
    }

    if (Number.isInteger(buf1)) { // indirect reference or integer
-        var num = buf1;
+      const num = buf1;
      if (Number.isInteger(this.buf1) && isCmd(this.buf2, 'R')) {
-          var ref = new Ref(num, this.buf1);
+        const ref = new Ref(num, this.buf1);
        this.shift();
        this.shift();
        return ref;
@ -155,7 +159,7 @@ var Parser = (function ParserClosure() {
    }

    if (isString(buf1)) { // string
-        var str = buf1;
+      let str = buf1;
      if (cipherTransform) {
        str = cipherTransform.decryptString(str);
      }
@ -164,7 +168,8 @@ var Parser = (function ParserClosure() {

    // simple object
    return buf1;
-    },
+  }
+
  /**
   * Find the end of the stream by searching for the /EI\s/.
   * @returns {number} The inline stream length.
@ -183,7 +188,7 @@ var Parser = (function ParserClosure() {
        if (ch === SPACE || ch === LF || ch === CR) {
          maybeEIPos = stream.pos;
          // Let's check that the next `n` bytes are ASCII... just to be sure.
-            let followingBytes = stream.peekBytes(n);
+          const followingBytes = stream.peekBytes(n);
          for (let i = 0, ii = followingBytes.length; i < ii; i++) {
            ch = followingBytes[i];
            if (ch === NUL && followingBytes[i + 1] !== NUL) {
@ -235,14 +240,14 @@ var Parser = (function ParserClosure() {
      endOffset--;
    }
    return ((stream.pos - endOffset) - startPos);
-    },
+  }
+
  /**
   * Find the EOI (end-of-image) marker 0xFFD9 of the stream.
   * @returns {number} The inline stream length.
   */
-    findDCTDecodeInlineStreamEnd:
-        function Parser_findDCTDecodeInlineStreamEnd(stream) {
-      var startPos = stream.pos, foundEOI = false, b, markerLength, length;
+  findDCTDecodeInlineStreamEnd(stream) {
+    let startPos = stream.pos, foundEOI = false, b, markerLength, length;
    while ((b = stream.getByte()) !== -1) {
      if (b !== 0xFF) { // Not a valid marker.
        continue;
@ -331,14 +336,15 @@ var Parser = (function ParserClosure() {
    }
    this.inlineStreamSkipEI(stream);
    return length;
-    },
+  }
+
  /**
   * Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
   * @returns {number} The inline stream length.
   */
  findASCII85DecodeInlineStreamEnd(stream) {
-      var TILDE = 0x7E, GT = 0x3E;
-      var startPos = stream.pos, ch, length;
+    const TILDE = 0x7E, GT = 0x3E;
+    let startPos = stream.pos, ch, length;
    while ((ch = stream.getByte()) !== -1) {
      if (ch === TILDE) {
        ch = stream.peekByte();
@ -363,15 +369,15 @@ var Parser = (function ParserClosure() {
    }
    this.inlineStreamSkipEI(stream);
    return length;
-    },
+  }
+
  /**
   * Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
   * @returns {number} The inline stream length.
   */
-    findASCIIHexDecodeInlineStreamEnd:
-        function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
-      var GT = 0x3E;
-      var startPos = stream.pos, ch, length;
+  findASCIIHexDecodeInlineStreamEnd(stream) {
+    const GT = 0x3E;
+    let startPos = stream.pos, ch, length;
    while ((ch = stream.getByte()) !== -1) {
      if (ch === GT) {
        break;
@ -386,13 +392,14 @@ var Parser = (function ParserClosure() {
    }
    this.inlineStreamSkipEI(stream);
    return length;
-    },
+  }
+
  /**
   * Skip over the /EI/ for streams where we search for an EOD marker.
   */
-    inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
-      var E = 0x45, I = 0x49;
-      var state = 0, ch;
+  inlineStreamSkipEI(stream) {
+    const E = 0x45, I = 0x49;
+    let state = 0, ch;
    while ((ch = stream.getByte()) !== -1) {
      if (state === 0) {
        state = (ch === E) ? 1 : 0;
@ -402,18 +409,20 @@ var Parser = (function ParserClosure() {
        break;
      }
    }
-    },
-    makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
-      var lexer = this.lexer;
-      var stream = lexer.stream;
+  }
+
+  makeInlineImage(cipherTransform) {
+    const lexer = this.lexer;
+    const stream = lexer.stream;

    // Parse dictionary.
-      let dict = new Dict(this.xref), dictLength;
+    const dict = new Dict(this.xref);
+    let dictLength;
    while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
      if (!isName(this.buf1)) {
        throw new FormatError('Dictionary key must be a name object');
      }
-        var key = this.buf1.name;
+      const key = this.buf1.name;
      this.shift();
      if (isEOF(this.buf1)) {
        break;
@ -425,18 +434,20 @@ var Parser = (function ParserClosure() {
    }

    // Extract the name of the first (i.e. the current) image filter.
-      var filter = dict.get('Filter', 'F'), filterName;
+    const filter = dict.get('Filter', 'F');
+    let filterName;
    if (isName(filter)) {
      filterName = filter.name;
    } else if (Array.isArray(filter)) {
-        var filterZero = this.xref.fetchIfRef(filter[0]);
+      const filterZero = this.xref.fetchIfRef(filter[0]);
      if (isName(filterZero)) {
        filterName = filterZero.name;
      }
    }

    // Parse image stream.
-      let startPos = stream.pos, length;
+    const startPos = stream.pos;
+    let length;
    if (filterName === 'DCTDecode' || filterName === 'DCT') {
      length = this.findDCTDecodeInlineStreamEnd(stream);
    } else if (filterName === 'ASCII85Decode' || filterName === 'A85') {
@ -446,26 +457,26 @@ var Parser = (function ParserClosure() {
    } else {
      length = this.findDefaultInlineStreamEnd(stream);
    }
-      var imageStream = stream.makeSubStream(startPos, length, dict);
+    let imageStream = stream.makeSubStream(startPos, length, dict);

    // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
    // adler32 checksum.
    let cacheKey;
    if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
-        var imageBytes = imageStream.getBytes();
+      const imageBytes = imageStream.getBytes();
      imageStream.reset();

      const initialStreamPos = stream.pos;
      // Set the stream position to the beginning of the dictionary data...
      stream.pos = lexer.beginInlineImagePos;
      // ... and fetch the bytes of the *entire* dictionary.
-        let dictBytes = stream.getBytes(dictLength);
+      const dictBytes = stream.getBytes(dictLength);
      // Finally, don't forget to reset the stream position.
      stream.pos = initialStreamPos;

      cacheKey = computeAdler32(imageBytes) + '_' + computeAdler32(dictBytes);

-        let cacheEntry = this.imageCache[cacheKey];
+      const cacheEntry = this.imageCache[cacheKey];
      if (cacheEntry !== undefined) {
        this.buf2 = Cmd.get('EI');
        this.shift();
@ -482,7 +493,7 @@ var Parser = (function ParserClosure() {
    imageStream = this.filter(imageStream, dict, length);
    imageStream.dict = dict;
    if (cacheKey !== undefined) {
-        imageStream.cacheKey = 'inline_' + length + '_' + cacheKey;
+      imageStream.cacheKey = `inline_${length}_${cacheKey}`;
      this.imageCache[cacheKey] = imageStream;
    }

@ -490,7 +501,7 @@ var Parser = (function ParserClosure() {
    this.shift();

    return imageStream;
-    },
+  }

  _findStreamLength(startPos, signature) {
    const { stream, } = this.lexer;
@ -521,28 +532,28 @@ var Parser = (function ParserClosure() {
      stream.pos += scanLength;
    }
    return -1;
-    },
+  }

-    makeStream: function Parser_makeStream(dict, cipherTransform) {
-      var lexer = this.lexer;
-      var stream = lexer.stream;
+  makeStream(dict, cipherTransform) {
+    const lexer = this.lexer;
+    let stream = lexer.stream;

-      // get stream start position
+    // Get the stream's start position.
    lexer.skipToNextLine();
    const startPos = stream.pos - 1;

-      // get length
-      var length = dict.get('Length');
+    // Get the length.
+    let length = dict.get('Length');
    if (!Number.isInteger(length)) {
-        info('Bad ' + length + ' attribute in stream');
+      info(`Bad length "${length}" in stream`);
      length = 0;
    }

-      // skip over the stream data
+    // Skip over the stream data.
    stream.pos = startPos + length;
    lexer.nextChar();

-      // Shift '>>' and check whether the new object marks the end of the stream
+    // Shift '>>' and check whether the new object marks the end of the stream.
    if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
      this.shift(); // 'stream'
    } else {
@ -561,7 +572,7 @@ var Parser = (function ParserClosure() {
          const end = ENDSTREAM_SIGNATURE.length - i;
          const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);

-            let maybeLength = this._findStreamLength(startPos,
+          const maybeLength = this._findStreamLength(startPos,
                                                     TRUNCATED_SIGNATURE);
          if (maybeLength >= 0) {
            // Ensure that the byte immediately following the truncated
@ -596,10 +607,12 @@ var Parser = (function ParserClosure() {
    stream = this.filter(stream, dict, length);
    stream.dict = dict;
    return stream;
-    },
-    filter: function Parser_filter(stream, dict, length) {
-      var filter = dict.get('Filter', 'F');
-      var params = dict.get('DecodeParms', 'DP');
+  }
+
+  filter(stream, dict, length) {
+    let filter = dict.get('Filter', 'F');
+    let params = dict.get('DecodeParms', 'DP');
+
    if (isName(filter)) {
      if (Array.isArray(params)) {
        warn('/DecodeParms should not contain an Array, ' +
@ -608,14 +621,14 @@ var Parser = (function ParserClosure() {
      return this.makeFilter(stream, filter.name, length, params);
    }

-      var maybeLength = length;
+    let maybeLength = length;
    if (Array.isArray(filter)) {
-        var filterArray = filter;
-        var paramsArray = params;
-        for (var i = 0, ii = filterArray.length; i < ii; ++i) {
+      let filterArray = filter;
+      let paramsArray = params;
+      for (let i = 0, ii = filterArray.length; i < ii; ++i) {
        filter = this.xref.fetchIfRef(filterArray[i]);
        if (!isName(filter)) {
-            throw new FormatError('Bad filter name: ' + filter);
+          throw new FormatError(`Bad filter name "${filter}"`);
        }

        params = null;
@ -623,22 +636,24 @@ var Parser = (function ParserClosure() {
          params = this.xref.fetchIfRef(paramsArray[i]);
        }
        stream = this.makeFilter(stream, filter.name, maybeLength, params);
-          // after the first stream the length variable is invalid
+        // After the first stream the `length` variable is invalid.
        maybeLength = null;
      }
    }
    return stream;
-    },
-    makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
+  }
+
+  makeFilter(stream, name, maybeLength, params) {
    // Since the 'Length' entry in the stream dictionary can be completely
    // wrong, e.g. zero for non-empty streams, only skip parsing the stream
    // when we can be absolutely certain that it actually is empty.
    if (maybeLength === 0) {
-        warn('Empty "' + name + '" stream.');
+      warn(`Empty "${name}" stream.`);
      return new NullStream();
    }
+
    try {
-        var xrefStreamStats = this.xref.stats.streamTypes;
+      const xrefStreamStats = this.xref.stats.streamTypes;
      if (name === 'FlateDecode' || name === 'Fl') {
        xrefStreamStats[StreamType.FLATE] = true;
        if (params) {
@ -649,7 +664,7 @@ var Parser = (function ParserClosure() {
      }
      if (name === 'LZWDecode' || name === 'LZW') {
        xrefStreamStats[StreamType.LZW] = true;
-          var earlyChange = 1;
+        let earlyChange = 1;
        if (params) {
          if (params.has('EarlyChange')) {
            earlyChange = params.get('EarlyChange');
@ -688,48 +703,21 @@ var Parser = (function ParserClosure() {
        xrefStreamStats[StreamType.JBIG] = true;
        return new Jbig2Stream(stream, maybeLength, stream.dict, params);
      }
-        warn('filter "' + name + '" not supported yet');
+      warn(`Filter "${name}" is not supported.`);
      return stream;
    } catch (ex) {
      if (ex instanceof MissingDataException) {
        throw ex;
      }
-        warn('Invalid stream: \"' + ex + '\"');
+      warn(`Invalid stream: "${ex}"`);
      return new NullStream();
    }
-    },
-  };
-
-  return Parser;
-})();
-
-var Lexer = (function LexerClosure() {
-  function Lexer(stream, knownCommands) {
-    this.stream = stream;
-    this.nextChar();
-
-    // While lexing, we build up many strings one char at a time. Using += for
-    // this can result in lots of garbage strings. It's better to build an
-    // array of single-char strings and then join() them together at the end.
-    // And reusing a single array (i.e. |this.strBuf|) over and over for this
-    // purpose uses less memory than using a new array for each string.
-    this.strBuf = [];
-
-    // The PDFs might have "glued" commands with other commands, operands or
-    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
-    // commands and their prefixes. The prefixes are built the following way:
-    // if there a command that is a prefix of the other valid command or
-    // literal (e.g. 'f' and 'false') the following prefixes must be included,
-    // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
-    // other commands or literals as a prefix. The knowCommands is optional.
-    this.knownCommands = knownCommands;
-
-    this.beginInlineImagePos = -1;
+  }
 }

 // A '1' in this array means the character is white space. A '1' or
 // '2' means the character ends a name or command.
-  var specialChars = [
+const specialChars = [
  1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
  1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
@ -759,18 +747,43 @@ var Lexer = (function LexerClosure() {
  return -1;
 }

-  Lexer.prototype = {
-    nextChar: function Lexer_nextChar() {
+class Lexer {
+  constructor(stream, knownCommands) {
+    this.stream = stream;
+    this.nextChar();
+
+    // While lexing, we build up many strings one char at a time. Using += for
+    // this can result in lots of garbage strings. It's better to build an
+    // array of single-char strings and then join() them together at the end.
+    // And reusing a single array (i.e. |this.strBuf|) over and over for this
+    // purpose uses less memory than using a new array for each string.
+    this.strBuf = [];
+
+    // The PDFs might have "glued" commands with other commands, operands or
+    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
+    // commands and their prefixes. The prefixes are built the following way:
+    // if there a command that is a prefix of the other valid command or
+    // literal (e.g. 'f' and 'false') the following prefixes must be included,
+    // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
+    // other commands or literals as a prefix. The knowCommands is optional.
+    this.knownCommands = knownCommands;
+
+    this.beginInlineImagePos = -1;
+  }
+
+  nextChar() {
    return (this.currentChar = this.stream.getByte());
-    },
-    peekChar: function Lexer_peekChar() {
+  }
+
+  peekChar() {
    return this.stream.peekByte();
-    },
-    getNumber: function Lexer_getNumber() {
-      var ch = this.currentChar;
-      var eNotation = false;
-      var divideBy = 0; // different from 0 if it's a floating point value
-      var sign = 0;
+  }
+
+  getNumber() {
+    let ch = this.currentChar;
+    let eNotation = false;
+    let divideBy = 0; // Different from 0 if it's a floating point value.
+    let sign = 0;

    if (ch === 0x2D) { // '-'
      sign = -1;
@ -806,17 +819,17 @@ var Lexer = (function LexerClosure() {
    }

    sign = sign || 1;
-      var baseValue = ch - 0x30; // '0'
-      var powerValue = 0;
-      var powerValueSign = 1;
+    let baseValue = ch - 0x30; // '0'
+    let powerValue = 0;
+    let powerValueSign = 1;

    while ((ch = this.nextChar()) >= 0) {
      if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
-          var currentDigit = ch - 0x30; // '0'
-          if (eNotation) { // We are after an 'e' or 'E'
+        const currentDigit = ch - 0x30; // '0'
+        if (eNotation) { // We are after an 'e' or 'E'.
          powerValue = powerValue * 10 + currentDigit;
        } else {
-            if (divideBy !== 0) { // We are after a point
+          if (divideBy !== 0) { // We are after a point.
            divideBy *= 10;
          }
          baseValue = baseValue * 10 + currentDigit;
@ -825,27 +838,27 @@ var Lexer = (function LexerClosure() {
        if (divideBy === 0) {
          divideBy = 1;
        } else {
-            // A number can have only one '.'
+          // A number can have only one dot.
          break;
        }
      } else if (ch === 0x2D) { // '-'
-          // ignore minus signs in the middle of numbers to match
-          // Adobe's behavior
-          warn('Badly formatted number');
+        // Ignore minus signs in the middle of numbers to match
+        // Adobe's behavior.
+        warn('Badly formatted number: minus sign in the middle');
      } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
        // 'E' can be either a scientific notation or the beginning of a new
-          // operator
+        // operator.
        ch = this.peekChar();
        if (ch === 0x2B || ch === 0x2D) { // '+', '-'
          powerValueSign = (ch === 0x2D) ? -1 : 1;
-            this.nextChar(); // Consume the sign character
+          this.nextChar(); // Consume the sign character.
        } else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
-            // The 'E' must be the beginning of a new operator
+          // The 'E' must be the beginning of a new operator.
          break;
        }
        eNotation = true;
      } else {
-          // the last character doesn't belong to us
+        // The last character doesn't belong to us.
        break;
      }
    }
@ -857,16 +870,17 @@ var Lexer = (function LexerClosure() {
      baseValue *= Math.pow(10, powerValueSign * powerValue);
    }
    return sign * baseValue;
-    },
-    getString: function Lexer_getString() {
-      var numParen = 1;
-      var done = false;
-      var strBuf = this.strBuf;
+  }
+
+  getString() {
+    let numParen = 1;
+    let done = false;
+    const strBuf = this.strBuf;
    strBuf.length = 0;

-      var ch = this.nextChar();
+    let ch = this.nextChar();
    while (true) {
-        var charBuffered = false;
+      let charBuffered = false;
      switch (ch | 0) {
        case -1:
          warn('Unterminated string');
@ -913,7 +927,7 @@ var Lexer = (function LexerClosure() {
              break;
            case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
            case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
-                var x = ch & 0x0F;
+              let x = ch & 0x0F;
              ch = this.nextChar();
              charBuffered = true;
              if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
@ -950,11 +964,13 @@ var Lexer = (function LexerClosure() {
      }
    }
    return strBuf.join('');
-    },
-    getName: function Lexer_getName() {
-      var ch, previousCh;
-      var strBuf = this.strBuf;
+  }
+
+  getName() {
+    let ch, previousCh;
+    const strBuf = this.strBuf;
    strBuf.length = 0;
+
    while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
      if (ch === 0x23) { // '#'
        ch = this.nextChar();
@ -964,14 +980,14 @@ var Lexer = (function LexerClosure() {
          strBuf.push('#');
          break;
        }
-          var x = toHexDigit(ch);
+        const x = toHexDigit(ch);
        if (x !== -1) {
          previousCh = ch;
          ch = this.nextChar();
-            var x2 = toHexDigit(ch);
+          const x2 = toHexDigit(ch);
          if (x2 === -1) {
-              warn('Lexer_getName: Illegal digit (' +
-                   String.fromCharCode(ch) + ') in hexadecimal number.');
+            warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
+                 'in hexadecimal number.');
            strBuf.push('#', String.fromCharCode(previousCh));
            if (specialChars[ch]) {
              break;
@ -988,17 +1004,18 @@ var Lexer = (function LexerClosure() {
      }
    }
    if (strBuf.length > 127) {
-        warn('name token is longer than allowed by the spec: ' + strBuf.length);
+      warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
    }
    return Name.get(strBuf.join(''));
-    },
-    getHexString: function Lexer_getHexString() {
-      var strBuf = this.strBuf;
+  }
+
+  getHexString() {
+    const strBuf = this.strBuf;
    strBuf.length = 0;
-      var ch = this.currentChar;
-      var isFirstHex = true;
-      var firstDigit;
-      var secondDigit;
+    let ch = this.currentChar;
+    let isFirstHex = true;
+    let firstDigit, secondDigit;
+
    while (true) {
      if (ch < 0) {
        warn('Unterminated hex string');
@ -1013,14 +1030,14 @@ var Lexer = (function LexerClosure() {
        if (isFirstHex) {
          firstDigit = toHexDigit(ch);
          if (firstDigit === -1) {
-              warn('Ignoring invalid character "' + ch + '" in hex string');
+            warn(`Ignoring invalid character "${ch}" in hex string`);
            ch = this.nextChar();
            continue;
          }
        } else {
          secondDigit = toHexDigit(ch);
          if (secondDigit === -1) {
-              warn('Ignoring invalid character "' + ch + '" in hex string');
+            warn(`Ignoring invalid character "${ch}" in hex string`);
            ch = this.nextChar();
            continue;
          }
@ -1031,11 +1048,12 @@ var Lexer = (function LexerClosure() {
      }
    }
    return strBuf.join('');
-    },
-    getObj: function Lexer_getObj() {
-      // skip whitespace and comments
-      var comment = false;
-      var ch = this.currentChar;
+  }
+
+  getObj() {
+    // Skip whitespace and comments.
+    let comment = false;
+    let ch = this.currentChar;
    while (true) {
      if (ch < 0) {
        return EOF;
@ -1052,7 +1070,7 @@ var Lexer = (function LexerClosure() {
      ch = this.nextChar();
    }

-      // start reading token
+    // Start reading a token.
    switch (ch | 0) {
      case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
      case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
@ -1101,14 +1119,14 @@ var Lexer = (function LexerClosure() {
        throw new FormatError(`Illegal character: ${ch}`);
    }

-      // command
-      var str = String.fromCharCode(ch);
-      var knownCommands = this.knownCommands;
-      var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
+    // Start reading a command.
+    let str = String.fromCharCode(ch);
+    const knownCommands = this.knownCommands;
+    let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
    while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
-        // stop if known command is found and next character does not make
-        // the str a command
-        var possibleCommand = str + String.fromCharCode(ch);
+      // Stop if a known command is found and next character does not make
+      // the string a command.
+      const possibleCommand = str + String.fromCharCode(ch);
      if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
        break;
      }
@ -1135,9 +1153,10 @@ var Lexer = (function LexerClosure() {
    }

    return Cmd.get(str);
-    },
-    skipToNextLine: function Lexer_skipToNextLine() {
-      var ch = this.currentChar;
+  }
+
+  skipToNextLine() {
+    let ch = this.currentChar;
    while (ch >= 0) {
      if (ch === 0x0D) { // CR
        ch = this.nextChar();
@ -1151,61 +1170,64 @@ var Lexer = (function LexerClosure() {
      }
      ch = this.nextChar();
    }
-    },
-  };
+  }
+}

-  return Lexer;
-})();
-
-var Linearization = {
-  create: function LinearizationCreate(stream) {
-    function getInt(name, allowZeroValue) {
-      var obj = linDict.get(name);
+class Linearization {
+  static create(stream) {
+    function getInt(linDict, name, allowZeroValue = false) {
+      const obj = linDict.get(name);
      if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
        return obj;
      }
-      throw new Error('The "' + name + '" parameter in the linearization ' +
+      throw new Error(`The "${name}" parameter in the linearization ` +
                      'dictionary is invalid.');
    }
-    function getHints() {
-      var hints = linDict.get('H'), hintsLength, item;
+
+    function getHints(linDict) {
+      const hints = linDict.get('H');
+      let hintsLength;
+
      if (Array.isArray(hints) &&
          ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
-        for (var index = 0; index < hintsLength; index++) {
-          if (!(Number.isInteger(item = hints[index]) && item > 0)) {
-            throw new Error('Hint (' + index +
-                            ') in the linearization dictionary is invalid.');
+        for (let index = 0; index < hintsLength; index++) {
+          const hint = hints[index];
+          if (!(Number.isInteger(hint) && hint > 0)) {
+            throw new Error(`Hint (${index}) in the linearization dictionary ` +
+                            'is invalid.');
          }
        }
        return hints;
      }
      throw new Error('Hint array in the linearization dictionary is invalid.');
    }
-    var parser = new Parser(new Lexer(stream), false, null);
-    var obj1 = parser.getObj();
-    var obj2 = parser.getObj();
-    var obj3 = parser.getObj();
-    var linDict = parser.getObj();
-    var obj, length;
+
+    const parser = new Parser(new Lexer(stream), false, null);
+    const obj1 = parser.getObj();
+    const obj2 = parser.getObj();
+    const obj3 = parser.getObj();
+    const linDict = parser.getObj();
+    let obj, length;
    if (!(Number.isInteger(obj1) && Number.isInteger(obj2) &&
          isCmd(obj3, 'obj') && isDict(linDict) &&
          isNum(obj = linDict.get('Linearized')) && obj > 0)) {
      return null; // No valid linearization dictionary found.
-    } else if ((length = getInt('L')) !== stream.length) {
+    } else if ((length = getInt(linDict, 'L')) !== stream.length) {
      throw new Error('The "L" parameter in the linearization dictionary ' +
                      'does not equal the stream length.');
    }
    return {
      length,
-      hints: getHints(),
-      objectNumberFirst: getInt('O'),
-      endFirst: getInt('E'),
-      numPages: getInt('N'),
-      mainXRefEntriesOffset: getInt('T'),
-      pageFirst: (linDict.has('P') ? getInt('P', true) : 0),
-    };
-  },
+      hints: getHints(linDict),
+      objectNumberFirst: getInt(linDict, 'O'),
+      endFirst: getInt(linDict, 'E'),
+      numPages: getInt(linDict, 'N'),
+      mainXRefEntriesOffset: getInt(linDict, 'T'),
+      pageFirst: (linDict.has('P') ?
+                  getInt(linDict, 'P', /* allowZeroValue = */ true) : 0),
    };
+  }
+}

 export {
  Lexer,
--- a/test/unit/parser_spec.js
+++ b/test/unit/parser_spec.js
@ -12,74 +12,129 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+/* eslint no-var: error */

-import { Lexer, Linearization } from '../../src/core/parser';
+import { Lexer, Linearization, Parser } from '../../src/core/parser';
 import { FormatError } from '../../src/shared/util';
 import { Name } from '../../src/core/primitives';
 import { StringStream } from '../../src/core/stream';

 describe('parser', function() {
-  describe('Lexer', function() {
-    it('should stop parsing numbers at the end of stream', function() {
-      var input = new StringStream('11.234');
-      var lexer = new Lexer(input);
-      var result = lexer.getNumber();
+  describe('Parser', function() {
+    describe('inlineStreamSkipEI', function() {
+      it('should skip over the EI marker if it is found', function() {
+        const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
+                       '/F /A85 ID abc123~> EI Q';
+        const input = new StringStream(string);
+        const lexer = new Lexer(input);
+        const parser = new Parser(lexer, /* allowStreams = */ true,
+                                  /* xref = */ null);
+        parser.inlineStreamSkipEI(input);
+        expect(input.pos).toEqual(string.indexOf('Q'));
+        expect(input.peekByte()).toEqual(0x51); // 'Q'
+      });

-      expect(result).toEqual(11.234);
+      it('should skip to the end of stream if the EI marker is not found',
+          function() {
+        const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
+                       '/F /A85 ID abc123~> Q';
+        const input = new StringStream(string);
+        const lexer = new Lexer(input);
+        const parser = new Parser(lexer, /* allowStreams = */ true,
+                                  /* xref = */ null);
+        parser.inlineStreamSkipEI(input);
+        expect(input.pos).toEqual(string.length);
+        expect(input.peekByte()).toEqual(-1);
+      });
+    });
+  });
+
+  describe('Lexer', function() {
+    describe('nextChar', function() {
+      it('should return and set -1 when the end of the stream is reached',
+          function() {
+        const input = new StringStream('');
+        const lexer = new Lexer(input);
+        expect(lexer.nextChar()).toEqual(-1);
+        expect(lexer.currentChar).toEqual(-1);
+      });
+
+      it('should return and set the character after the current position',
+          function() {
+        const input = new StringStream('123');
+        const lexer = new Lexer(input);
+        expect(lexer.nextChar()).toEqual(0x32); // '2'
+        expect(lexer.currentChar).toEqual(0x32); // '2'
+      });
+    });
+
+    describe('peekChar', function() {
+      it('should only return -1 when the end of the stream is reached',
+          function() {
+        const input = new StringStream('');
+        const lexer = new Lexer(input);
+        expect(lexer.peekChar()).toEqual(-1);
+        expect(lexer.currentChar).toEqual(-1);
+      });
+
+      it('should only return the character after the current position',
+          function() {
+        const input = new StringStream('123');
+        const lexer = new Lexer(input);
+        expect(lexer.peekChar()).toEqual(0x32); // '2'
+        expect(lexer.currentChar).toEqual(0x31); // '1'
+      });
+    });
+
+    describe('getNumber', function() {
+      it('should stop parsing numbers at the end of stream', function() {
+        const input = new StringStream('11.234');
+        const lexer = new Lexer(input);
+        expect(lexer.getNumber()).toEqual(11.234);
      });

      it('should parse PostScript numbers', function() {
-      var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
-                    '123', '-98', '43445', '0', '+17'];
-      for (var i = 0, ii = numbers.length; i < ii; i++) {
-        var num = numbers[i];
-        var input = new StringStream(num);
-        var lexer = new Lexer(input);
-        var result = lexer.getNumber();
-
-        expect(result).toEqual(parseFloat(num));
+        const numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.',
+                         '0.0', '123', '-98', '43445', '0', '+17'];
+        for (const number of numbers) {
+          const input = new StringStream(number);
+          const lexer = new Lexer(input);
+          expect(lexer.getNumber()).toEqual(parseFloat(number));
        }
      });

      it('should ignore double negative before number', function() {
-      var input = new StringStream('--205.88');
-      var lexer = new Lexer(input);
-      var result = lexer.getNumber();
-
-      expect(result).toEqual(-205.88);
+        const input = new StringStream('--205.88');
+        const lexer = new Lexer(input);
+        expect(lexer.getNumber()).toEqual(-205.88);
      });

      it('should ignore minus signs in the middle of number', function() {
-      var input = new StringStream('205--.88');
-      var lexer = new Lexer(input);
-      var result = lexer.getNumber();
-
-      expect(result).toEqual(205.88);
+        const input = new StringStream('205--.88');
+        const lexer = new Lexer(input);
+        expect(lexer.getNumber()).toEqual(205.88);
      });

      it('should ignore line-breaks between operator and digit in number',
          function() {
-      let minusInput = new StringStream('-\r\n205.88');
-      let minusLexer = new Lexer(minusInput);
-
+        const minusInput = new StringStream('-\r\n205.88');
+        const minusLexer = new Lexer(minusInput);
        expect(minusLexer.getNumber()).toEqual(-205.88);

-      let plusInput = new StringStream('+\r\n205.88');
-      let plusLexer = new Lexer(plusInput);
-
+        const plusInput = new StringStream('+\r\n205.88');
+        const plusLexer = new Lexer(plusInput);
        expect(plusLexer.getNumber()).toEqual(205.88);
      });

      it('should treat a single decimal point as zero', function() {
-      let input = new StringStream('.');
-      let lexer = new Lexer(input);
-
+        const input = new StringStream('.');
+        const lexer = new Lexer(input);
        expect(lexer.getNumber()).toEqual(0);

-      let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
-      for (let number of numbers) {
-        let input = new StringStream(number);
-        let lexer = new Lexer(input);
+        const numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
+        for (const number of numbers) {
+          const input = new StringStream(number);
+          const lexer = new Lexer(input);

          expect(function() {
            return lexer.getNumber();
@ -88,68 +143,62 @@ describe('parser', function() {
      });

      it('should handle glued numbers and operators', function() {
-      var input = new StringStream('123ET');
-      var lexer = new Lexer(input);
-      var value = lexer.getNumber();
-
-      expect(value).toEqual(123);
+        const input = new StringStream('123ET');
+        const lexer = new Lexer(input);
+        expect(lexer.getNumber()).toEqual(123);
        // The lexer must not have consumed the 'E'
        expect(lexer.currentChar).toEqual(0x45); // 'E'
      });
-
-    it('should stop parsing strings at the end of stream', function() {
-      var input = new StringStream('(1$4)');
-      input.getByte = function(super_getByte) {
-        // simulating end of file using null (see issue 2766)
-        var ch = super_getByte.call(input);
-        return (ch === 0x24 /* '$' */ ? -1 : ch);
-      }.bind(input, input.getByte);
-      var lexer = new Lexer(input);
-      var result = lexer.getString();
-
-      expect(result).toEqual('1');
    });

-    it('should not throw exception on bad input', function() {
-      // '8 0 2 15 5 2 2 2 4 3 2 4'
-      // should be parsed as
-      // '80 21 55 22 24 32'
-      var input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
-      var lexer = new Lexer(input);
-      var result = lexer.getHexString();
-
-      expect(result).toEqual('p!U"$2');
+    describe('getString', function() {
+      it('should stop parsing strings at the end of stream', function() {
+        const input = new StringStream('(1$4)');
+        input.getByte = function(super_getByte) {
+          // Simulating end of file using null (see issue 2766).
+          const ch = super_getByte.call(input);
+          return (ch === 0x24 /* '$' */ ? -1 : ch);
+        }.bind(input, input.getByte);
+        const lexer = new Lexer(input);
+        expect(lexer.getString()).toEqual('1');
      });

      it('should ignore escaped CR and LF', function() {
-      // '(\101\<CR><LF>\102)'
-      // should be parsed as
-      // "AB"
-      var input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
-      var lexer = new Lexer(input);
-      var result = lexer.getString();
-
-      expect(result).toEqual('ABCD');
+        // '(\101\<CR><LF>\102)' should be parsed as 'AB'.
+        const input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
+        const lexer = new Lexer(input);
+        expect(lexer.getString()).toEqual('ABCD');
+      });
    });

-    it('should handle Names with invalid usage of NUMBER SIGN (#)', function() {
-      var inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
-      var expectedNames = ['#', '#AQwerty', '#A'];
+    describe('getHexString', function() {
+      it('should not throw exception on bad input', function() {
+        // '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
+        const input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
+        const lexer = new Lexer(input);
+        expect(lexer.getHexString()).toEqual('p!U"$2');
+      });
+    });

-      for (var i = 0, ii = inputNames.length; i < ii; i++) {
-        var input = new StringStream(inputNames[i]);
-        var lexer = new Lexer(input);
-        var result = lexer.getName();
+    describe('getName', function() {
+      it('should handle Names with invalid usage of NUMBER SIGN (#)',
+          function() {
+        const inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
+        const expectedNames = ['#', '#AQwerty', '#A'];

-        expect(result).toEqual(Name.get(expectedNames[i]));
+        for (let i = 0, ii = inputNames.length; i < ii; i++) {
+          const input = new StringStream(inputNames[i]);
+          const lexer = new Lexer(input);
+          expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
        }
      });
    });
+  });

  describe('Linearization', function() {
    it('should not find a linearization dictionary', function() {
      // Not an actual linearization dictionary.
-      var stream1 = new StringStream(
+      const stream1 = new StringStream(
        '3 0 obj\n' +
        '<<\n' +
        '/Length 4622\n' +
@ -160,7 +209,7 @@ describe('parser', function() {
      expect(Linearization.create(stream1)).toEqual(null);

      // Linearization dictionary with invalid version number.
-      var stream2 = new StringStream(
+      const stream2 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 0\n' +
@ -171,7 +220,7 @@ describe('parser', function() {
    });

    it('should accept a valid linearization dictionary', function() {
-      var stream = new StringStream(
+      const stream = new StringStream(
        '131 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -184,7 +233,7 @@ describe('parser', function() {
        '>>\n' +
        'endobj'
      );
-      var expectedLinearizationDict = {
+      const expectedLinearizationDict = {
        length: 90,
        hints: [1388, 863],
        objectNumberFirst: 133,
@ -199,7 +248,7 @@ describe('parser', function() {
    it('should reject a linearization dictionary with invalid ' +
       'integer parameters', function() {
      // The /L parameter should be equal to the stream length.
-      var stream1 = new StringStream(
+      const stream1 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -218,7 +267,7 @@ describe('parser', function() {
                           'dictionary does not equal the stream length.'));

      // The /E parameter should not be zero.
-      var stream2 = new StringStream(
+      const stream2 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -237,7 +286,7 @@ describe('parser', function() {
                           'dictionary is invalid.'));

      // The /O parameter should be an integer.
-      var stream3 = new StringStream(
+      const stream3 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -259,7 +308,7 @@ describe('parser', function() {
    it('should reject a linearization dictionary with invalid hint parameters',
       function() {
      // The /H parameter should be an array.
-      var stream1 = new StringStream(
+      const stream1 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -278,7 +327,7 @@ describe('parser', function() {
                           'is invalid.'));

      // The hint array should contain two, or four, elements.
-      var stream2 = new StringStream(
+      const stream2 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +
@ -297,7 +346,7 @@ describe('parser', function() {
                           'is invalid.'));

      // The hint array should not contain zero.
-      var stream3 = new StringStream(
+      const stream3 = new StringStream(
        '1 0 obj\n' +
        '<<\n' +
        '/Linearized 1\n' +