Convert the Lexer class in src/core/parser.js to ES6 syntax

This commit is contained in:
Tim van der Meij 2019-03-10 14:27:26 +01:00
parent 7d0ecee771
commit 8d4d7dbf58
No known key found for this signature in database
GPG Key ID: 8C3FD2925A5F2762

View File

@ -714,33 +714,9 @@ class Parser {
} }
} }
var Lexer = (function LexerClosure() {
function Lexer(stream, knownCommands) {
this.stream = stream;
this.nextChar();
// While lexing, we build up many strings one char at a time. Using += for
// this can result in lots of garbage strings. It's better to build an
// array of single-char strings and then join() them together at the end.
// And reusing a single array (i.e. |this.strBuf|) over and over for this
// purpose uses less memory than using a new array for each string.
this.strBuf = [];
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
this.beginInlineImagePos = -1;
}
// A '1' in this array means the character is white space. A '1' or // A '1' in this array means the character is white space. A '1' or
// '2' means the character ends a name or command. // '2' means the character ends a name or command.
var specialChars = [ const specialChars = [
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
@ -770,18 +746,43 @@ var Lexer = (function LexerClosure() {
return -1; return -1;
} }
Lexer.prototype = { class Lexer {
nextChar: function Lexer_nextChar() { constructor(stream, knownCommands) {
this.stream = stream;
this.nextChar();
// While lexing, we build up many strings one char at a time. Using += for
// this can result in lots of garbage strings. It's better to build an
// array of single-char strings and then join() them together at the end.
// And reusing a single array (i.e. |this.strBuf|) over and over for this
// purpose uses less memory than using a new array for each string.
this.strBuf = [];
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
this.beginInlineImagePos = -1;
}
nextChar() {
return (this.currentChar = this.stream.getByte()); return (this.currentChar = this.stream.getByte());
}, }
peekChar: function Lexer_peekChar() {
peekChar() {
return this.stream.peekByte(); return this.stream.peekByte();
}, }
getNumber: function Lexer_getNumber() {
var ch = this.currentChar; getNumber() {
var eNotation = false; let ch = this.currentChar;
var divideBy = 0; // different from 0 if it's a floating point value let eNotation = false;
var sign = 0; let divideBy = 0; // Different from 0 if it's a floating point value.
let sign = 0;
if (ch === 0x2D) { // '-' if (ch === 0x2D) { // '-'
sign = -1; sign = -1;
@ -817,17 +818,17 @@ var Lexer = (function LexerClosure() {
} }
sign = sign || 1; sign = sign || 1;
var baseValue = ch - 0x30; // '0' let baseValue = ch - 0x30; // '0'
var powerValue = 0; let powerValue = 0;
var powerValueSign = 1; let powerValueSign = 1;
while ((ch = this.nextChar()) >= 0) { while ((ch = this.nextChar()) >= 0) {
if (0x30 <= ch && ch <= 0x39) { // '0' - '9' if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
var currentDigit = ch - 0x30; // '0' const currentDigit = ch - 0x30; // '0'
if (eNotation) { // We are after an 'e' or 'E' if (eNotation) { // We are after an 'e' or 'E'.
powerValue = powerValue * 10 + currentDigit; powerValue = powerValue * 10 + currentDigit;
} else { } else {
if (divideBy !== 0) { // We are after a point if (divideBy !== 0) { // We are after a point.
divideBy *= 10; divideBy *= 10;
} }
baseValue = baseValue * 10 + currentDigit; baseValue = baseValue * 10 + currentDigit;
@ -836,27 +837,27 @@ var Lexer = (function LexerClosure() {
if (divideBy === 0) { if (divideBy === 0) {
divideBy = 1; divideBy = 1;
} else { } else {
// A number can have only one '.' // A number can have only one dot.
break; break;
} }
} else if (ch === 0x2D) { // '-' } else if (ch === 0x2D) { // '-'
// ignore minus signs in the middle of numbers to match // Ignore minus signs in the middle of numbers to match
// Adobe's behavior // Adobe's behavior.
warn('Badly formatted number'); warn('Badly formatted number: minus sign in the middle');
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e' } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
// 'E' can be either a scientific notation or the beginning of a new // 'E' can be either a scientific notation or the beginning of a new
// operator // operator.
ch = this.peekChar(); ch = this.peekChar();
if (ch === 0x2B || ch === 0x2D) { // '+', '-' if (ch === 0x2B || ch === 0x2D) { // '+', '-'
powerValueSign = (ch === 0x2D) ? -1 : 1; powerValueSign = (ch === 0x2D) ? -1 : 1;
this.nextChar(); // Consume the sign character this.nextChar(); // Consume the sign character.
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9' } else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
// The 'E' must be the beginning of a new operator // The 'E' must be the beginning of a new operator.
break; break;
} }
eNotation = true; eNotation = true;
} else { } else {
// the last character doesn't belong to us // The last character doesn't belong to us.
break; break;
} }
} }
@ -868,16 +869,17 @@ var Lexer = (function LexerClosure() {
baseValue *= Math.pow(10, powerValueSign * powerValue); baseValue *= Math.pow(10, powerValueSign * powerValue);
} }
return sign * baseValue; return sign * baseValue;
}, }
getString: function Lexer_getString() {
var numParen = 1; getString() {
var done = false; let numParen = 1;
var strBuf = this.strBuf; let done = false;
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
var ch = this.nextChar(); let ch = this.nextChar();
while (true) { while (true) {
var charBuffered = false; let charBuffered = false;
switch (ch | 0) { switch (ch | 0) {
case -1: case -1:
warn('Unterminated string'); warn('Unterminated string');
@ -924,7 +926,7 @@ var Lexer = (function LexerClosure() {
break; break;
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3' case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7' case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
var x = ch & 0x0F; let x = ch & 0x0F;
ch = this.nextChar(); ch = this.nextChar();
charBuffered = true; charBuffered = true;
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7' if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
@ -961,11 +963,13 @@ var Lexer = (function LexerClosure() {
} }
} }
return strBuf.join(''); return strBuf.join('');
}, }
getName: function Lexer_getName() {
var ch, previousCh; getName() {
var strBuf = this.strBuf; let ch, previousCh;
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) { while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
if (ch === 0x23) { // '#' if (ch === 0x23) { // '#'
ch = this.nextChar(); ch = this.nextChar();
@ -975,14 +979,14 @@ var Lexer = (function LexerClosure() {
strBuf.push('#'); strBuf.push('#');
break; break;
} }
var x = toHexDigit(ch); const x = toHexDigit(ch);
if (x !== -1) { if (x !== -1) {
previousCh = ch; previousCh = ch;
ch = this.nextChar(); ch = this.nextChar();
var x2 = toHexDigit(ch); const x2 = toHexDigit(ch);
if (x2 === -1) { if (x2 === -1) {
warn('Lexer_getName: Illegal digit (' + warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
String.fromCharCode(ch) + ') in hexadecimal number.'); 'in hexadecimal number.');
strBuf.push('#', String.fromCharCode(previousCh)); strBuf.push('#', String.fromCharCode(previousCh));
if (specialChars[ch]) { if (specialChars[ch]) {
break; break;
@ -999,17 +1003,18 @@ var Lexer = (function LexerClosure() {
} }
} }
if (strBuf.length > 127) { if (strBuf.length > 127) {
warn('name token is longer than allowed by the spec: ' + strBuf.length); warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
} }
return Name.get(strBuf.join('')); return Name.get(strBuf.join(''));
}, }
getHexString: function Lexer_getHexString() {
var strBuf = this.strBuf; getHexString() {
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
var ch = this.currentChar; let ch = this.currentChar;
var isFirstHex = true; let isFirstHex = true;
var firstDigit; let firstDigit, secondDigit;
var secondDigit;
while (true) { while (true) {
if (ch < 0) { if (ch < 0) {
warn('Unterminated hex string'); warn('Unterminated hex string');
@ -1024,14 +1029,14 @@ var Lexer = (function LexerClosure() {
if (isFirstHex) { if (isFirstHex) {
firstDigit = toHexDigit(ch); firstDigit = toHexDigit(ch);
if (firstDigit === -1) { if (firstDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string'); warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar(); ch = this.nextChar();
continue; continue;
} }
} else { } else {
secondDigit = toHexDigit(ch); secondDigit = toHexDigit(ch);
if (secondDigit === -1) { if (secondDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string'); warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar(); ch = this.nextChar();
continue; continue;
} }
@ -1042,11 +1047,12 @@ var Lexer = (function LexerClosure() {
} }
} }
return strBuf.join(''); return strBuf.join('');
}, }
getObj: function Lexer_getObj() {
// skip whitespace and comments getObj() {
var comment = false; // Skip whitespace and comments.
var ch = this.currentChar; let comment = false;
let ch = this.currentChar;
while (true) { while (true) {
if (ch < 0) { if (ch < 0) {
return EOF; return EOF;
@ -1063,7 +1069,7 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar(); ch = this.nextChar();
} }
// start reading token // Start reading a token.
switch (ch | 0) { switch (ch | 0) {
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4' case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9' case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
@ -1112,14 +1118,14 @@ var Lexer = (function LexerClosure() {
throw new FormatError(`Illegal character: ${ch}`); throw new FormatError(`Illegal character: ${ch}`);
} }
// command // Start reading a command.
var str = String.fromCharCode(ch); let str = String.fromCharCode(ch);
var knownCommands = this.knownCommands; const knownCommands = this.knownCommands;
var knownCommandFound = knownCommands && knownCommands[str] !== undefined; let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) { while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
// stop if known command is found and next character does not make // Stop if a known command is found and next character does not make
// the str a command // the string a command.
var possibleCommand = str + String.fromCharCode(ch); const possibleCommand = str + String.fromCharCode(ch);
if (knownCommandFound && knownCommands[possibleCommand] === undefined) { if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
break; break;
} }
@ -1146,9 +1152,10 @@ var Lexer = (function LexerClosure() {
} }
return Cmd.get(str); return Cmd.get(str);
}, }
skipToNextLine: function Lexer_skipToNextLine() {
var ch = this.currentChar; skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) { while (ch >= 0) {
if (ch === 0x0D) { // CR if (ch === 0x0D) { // CR
ch = this.nextChar(); ch = this.nextChar();
@ -1162,11 +1169,8 @@ var Lexer = (function LexerClosure() {
} }
ch = this.nextChar(); ch = this.nextChar();
} }
}, }
}; }
return Lexer;
})();
var Linearization = { var Linearization = {
create: function LinearizationCreate(stream) { create: function LinearizationCreate(stream) {