Convert the Lexer class in src/core/parser.js to ES6 syntax

2019-03-10 14:27:26 +01:00 · 2019-03-10 14:27:26 +01:00 · 8d4d7dbf58
commit 8d4d7dbf58
parent 7d0ecee771
1 changed files with 409 additions and 405 deletions
--- a/src/core/parser.js
+++ b/src/core/parser.js
@ -714,33 +714,9 @@ class Parser {
  }
 }
 var Lexer = (function LexerClosure() {
  function Lexer(stream, knownCommands) {
    this.stream = stream;
    this.nextChar();
    // While lexing, we build up many strings one char at a time. Using += for
    // this can result in lots of garbage strings. It's better to build an
    // array of single-char strings and then join() them together at the end.
    // And reusing a single array (i.e. |this.strBuf|) over and over for this
    // purpose uses less memory than using a new array for each string.
    this.strBuf = [];
    // The PDFs might have "glued" commands with other commands, operands or
    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
    // commands and their prefixes. The prefixes are built the following way:
    // if there a command that is a prefix of the other valid command or
    // literal (e.g. 'f' and 'false') the following prefixes must be included,
    // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
    // other commands or literals as a prefix. The knowCommands is optional.
    this.knownCommands = knownCommands;
    this.beginInlineImagePos = -1;
  }
 // A '1' in this array means the character is white space. A '1' or
 // '2' means the character ends a name or command.
-  var specialChars = [
+const specialChars = [
  1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
  1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
@ -770,18 +746,43 @@ var Lexer = (function LexerClosure() {
  return -1;
 }
-  Lexer.prototype = {
+class Lexer {
-    nextChar: function Lexer_nextChar() {
+  constructor(stream, knownCommands) {
    this.stream = stream;
    this.nextChar();
    // While lexing, we build up many strings one char at a time. Using += for
    // this can result in lots of garbage strings. It's better to build an
    // array of single-char strings and then join() them together at the end.
    // And reusing a single array (i.e. |this.strBuf|) over and over for this
    // purpose uses less memory than using a new array for each string.
    this.strBuf = [];
    // The PDFs might have "glued" commands with other commands, operands or
    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
    // commands and their prefixes. The prefixes are built the following way:
    // if there a command that is a prefix of the other valid command or
    // literal (e.g. 'f' and 'false') the following prefixes must be included,
    // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
    // other commands or literals as a prefix. The knowCommands is optional.
    this.knownCommands = knownCommands;
    this.beginInlineImagePos = -1;
  }
  nextChar() {
    return (this.currentChar = this.stream.getByte());
-    },
+  }
-    peekChar: function Lexer_peekChar() {
+
  peekChar() {
    return this.stream.peekByte();
-    },
+  }
-    getNumber: function Lexer_getNumber() {
+
-      var ch = this.currentChar;
+  getNumber() {
-      var eNotation = false;
+    let ch = this.currentChar;
-      var divideBy = 0; // different from 0 if it's a floating point value
+    let eNotation = false;
-      var sign = 0;
+    let divideBy = 0; // Different from 0 if it's a floating point value.
    let sign = 0;
    if (ch === 0x2D) { // '-'
      sign = -1;
@ -817,17 +818,17 @@ var Lexer = (function LexerClosure() {
    }
    sign = sign || 1;
-      var baseValue = ch - 0x30; // '0'
+    let baseValue = ch - 0x30; // '0'
-      var powerValue = 0;
+    let powerValue = 0;
-      var powerValueSign = 1;
+    let powerValueSign = 1;
    while ((ch = this.nextChar()) >= 0) {
      if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
-          var currentDigit = ch - 0x30; // '0'
+        const currentDigit = ch - 0x30; // '0'
-          if (eNotation) { // We are after an 'e' or 'E'
+        if (eNotation) { // We are after an 'e' or 'E'.
          powerValue = powerValue * 10 + currentDigit;
        } else {
-            if (divideBy !== 0) { // We are after a point
+          if (divideBy !== 0) { // We are after a point.
            divideBy *= 10;
          }
          baseValue = baseValue * 10 + currentDigit;
@ -836,27 +837,27 @@ var Lexer = (function LexerClosure() {
        if (divideBy === 0) {
          divideBy = 1;
        } else {
-            // A number can have only one '.'
+          // A number can have only one dot.
          break;
        }
      } else if (ch === 0x2D) { // '-'
-          // ignore minus signs in the middle of numbers to match
+        // Ignore minus signs in the middle of numbers to match
-          // Adobe's behavior
+        // Adobe's behavior.
-          warn('Badly formatted number');
+        warn('Badly formatted number: minus sign in the middle');
      } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
        // 'E' can be either a scientific notation or the beginning of a new
-          // operator
+        // operator.
        ch = this.peekChar();
        if (ch === 0x2B || ch === 0x2D) { // '+', '-'
          powerValueSign = (ch === 0x2D) ? -1 : 1;
-            this.nextChar(); // Consume the sign character
+          this.nextChar(); // Consume the sign character.
        } else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
-            // The 'E' must be the beginning of a new operator
+          // The 'E' must be the beginning of a new operator.
          break;
        }
        eNotation = true;
      } else {
-          // the last character doesn't belong to us
+        // The last character doesn't belong to us.
        break;
      }
    }
@ -868,16 +869,17 @@ var Lexer = (function LexerClosure() {
      baseValue *= Math.pow(10, powerValueSign * powerValue);
    }
    return sign * baseValue;
-    },
+  }
-    getString: function Lexer_getString() {
+
-      var numParen = 1;
+  getString() {
-      var done = false;
+    let numParen = 1;
-      var strBuf = this.strBuf;
+    let done = false;
    const strBuf = this.strBuf;
    strBuf.length = 0;
-      var ch = this.nextChar();
+    let ch = this.nextChar();
    while (true) {
-        var charBuffered = false;
+      let charBuffered = false;
      switch (ch | 0) {
        case -1:
          warn('Unterminated string');
@ -924,7 +926,7 @@ var Lexer = (function LexerClosure() {
              break;
            case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
            case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
-                var x = ch & 0x0F;
+              let x = ch & 0x0F;
              ch = this.nextChar();
              charBuffered = true;
              if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
@ -961,11 +963,13 @@ var Lexer = (function LexerClosure() {
      }
    }
    return strBuf.join('');
-    },
+  }
-    getName: function Lexer_getName() {
+
-      var ch, previousCh;
+  getName() {
-      var strBuf = this.strBuf;
+    let ch, previousCh;
    const strBuf = this.strBuf;
    strBuf.length = 0;
    while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
      if (ch === 0x23) { // '#'
        ch = this.nextChar();
@ -975,14 +979,14 @@ var Lexer = (function LexerClosure() {
          strBuf.push('#');
          break;
        }
-          var x = toHexDigit(ch);
+        const x = toHexDigit(ch);
        if (x !== -1) {
          previousCh = ch;
          ch = this.nextChar();
-            var x2 = toHexDigit(ch);
+          const x2 = toHexDigit(ch);
          if (x2 === -1) {
-              warn('Lexer_getName: Illegal digit (' +
+            warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
-                   String.fromCharCode(ch) + ') in hexadecimal number.');
+                 'in hexadecimal number.');
            strBuf.push('#', String.fromCharCode(previousCh));
            if (specialChars[ch]) {
              break;
@ -999,17 +1003,18 @@ var Lexer = (function LexerClosure() {
      }
    }
    if (strBuf.length > 127) {
-        warn('name token is longer than allowed by the spec: ' + strBuf.length);
+      warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
    }
    return Name.get(strBuf.join(''));
-    },
+  }
-    getHexString: function Lexer_getHexString() {
+
-      var strBuf = this.strBuf;
+  getHexString() {
    const strBuf = this.strBuf;
    strBuf.length = 0;
-      var ch = this.currentChar;
+    let ch = this.currentChar;
-      var isFirstHex = true;
+    let isFirstHex = true;
-      var firstDigit;
+    let firstDigit, secondDigit;
-      var secondDigit;
+
    while (true) {
      if (ch < 0) {
        warn('Unterminated hex string');
@ -1024,14 +1029,14 @@ var Lexer = (function LexerClosure() {
        if (isFirstHex) {
          firstDigit = toHexDigit(ch);
          if (firstDigit === -1) {
-              warn('Ignoring invalid character "' + ch + '" in hex string');
+            warn(`Ignoring invalid character "${ch}" in hex string`);
            ch = this.nextChar();
            continue;
          }
        } else {
          secondDigit = toHexDigit(ch);
          if (secondDigit === -1) {
-              warn('Ignoring invalid character "' + ch + '" in hex string');
+            warn(`Ignoring invalid character "${ch}" in hex string`);
            ch = this.nextChar();
            continue;
          }
@ -1042,11 +1047,12 @@ var Lexer = (function LexerClosure() {
      }
    }
    return strBuf.join('');
-    },
+  }
-    getObj: function Lexer_getObj() {
+
-      // skip whitespace and comments
+  getObj() {
-      var comment = false;
+    // Skip whitespace and comments.
-      var ch = this.currentChar;
+    let comment = false;
    let ch = this.currentChar;
    while (true) {
      if (ch < 0) {
        return EOF;
@ -1063,7 +1069,7 @@ var Lexer = (function LexerClosure() {
      ch = this.nextChar();
    }
-      // start reading token
+    // Start reading a token.
    switch (ch | 0) {
      case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
      case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
@ -1112,14 +1118,14 @@ var Lexer = (function LexerClosure() {
        throw new FormatError(`Illegal character: ${ch}`);
    }
-      // command
+    // Start reading a command.
-      var str = String.fromCharCode(ch);
+    let str = String.fromCharCode(ch);
-      var knownCommands = this.knownCommands;
+    const knownCommands = this.knownCommands;
-      var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
+    let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
    while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
-        // stop if known command is found and next character does not make
+      // Stop if a known command is found and next character does not make
-        // the str a command
+      // the string a command.
-        var possibleCommand = str + String.fromCharCode(ch);
+      const possibleCommand = str + String.fromCharCode(ch);
      if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
        break;
      }
@ -1146,9 +1152,10 @@ var Lexer = (function LexerClosure() {
    }
    return Cmd.get(str);
-    },
+  }
-    skipToNextLine: function Lexer_skipToNextLine() {
+
-      var ch = this.currentChar;
+  skipToNextLine() {
    let ch = this.currentChar;
    while (ch >= 0) {
      if (ch === 0x0D) { // CR
        ch = this.nextChar();
@ -1162,11 +1169,8 @@ var Lexer = (function LexerClosure() {
      }
      ch = this.nextChar();
    }
-    },
+  }
-  };
+}
  return Lexer;
 })();
 var Linearization = {
  create: function LinearizationCreate(stream) {