Convert the Lexer
class in src/core/parser.js
to ES6 syntax
This commit is contained in:
parent
7d0ecee771
commit
8d4d7dbf58
@ -714,33 +714,9 @@ class Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var Lexer = (function LexerClosure() {
|
|
||||||
function Lexer(stream, knownCommands) {
|
|
||||||
this.stream = stream;
|
|
||||||
this.nextChar();
|
|
||||||
|
|
||||||
// While lexing, we build up many strings one char at a time. Using += for
|
|
||||||
// this can result in lots of garbage strings. It's better to build an
|
|
||||||
// array of single-char strings and then join() them together at the end.
|
|
||||||
// And reusing a single array (i.e. |this.strBuf|) over and over for this
|
|
||||||
// purpose uses less memory than using a new array for each string.
|
|
||||||
this.strBuf = [];
|
|
||||||
|
|
||||||
// The PDFs might have "glued" commands with other commands, operands or
|
|
||||||
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
|
|
||||||
// commands and their prefixes. The prefixes are built the following way:
|
|
||||||
// if there a command that is a prefix of the other valid command or
|
|
||||||
// literal (e.g. 'f' and 'false') the following prefixes must be included,
|
|
||||||
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
|
|
||||||
// other commands or literals as a prefix. The knowCommands is optional.
|
|
||||||
this.knownCommands = knownCommands;
|
|
||||||
|
|
||||||
this.beginInlineImagePos = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A '1' in this array means the character is white space. A '1' or
|
// A '1' in this array means the character is white space. A '1' or
|
||||||
// '2' means the character ends a name or command.
|
// '2' means the character ends a name or command.
|
||||||
var specialChars = [
|
const specialChars = [
|
||||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
||||||
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
|
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
|
||||||
@ -770,18 +746,43 @@ var Lexer = (function LexerClosure() {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Lexer.prototype = {
|
class Lexer {
|
||||||
nextChar: function Lexer_nextChar() {
|
constructor(stream, knownCommands) {
|
||||||
|
this.stream = stream;
|
||||||
|
this.nextChar();
|
||||||
|
|
||||||
|
// While lexing, we build up many strings one char at a time. Using += for
|
||||||
|
// this can result in lots of garbage strings. It's better to build an
|
||||||
|
// array of single-char strings and then join() them together at the end.
|
||||||
|
// And reusing a single array (i.e. |this.strBuf|) over and over for this
|
||||||
|
// purpose uses less memory than using a new array for each string.
|
||||||
|
this.strBuf = [];
|
||||||
|
|
||||||
|
// The PDFs might have "glued" commands with other commands, operands or
|
||||||
|
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
|
||||||
|
// commands and their prefixes. The prefixes are built the following way:
|
||||||
|
// if there a command that is a prefix of the other valid command or
|
||||||
|
// literal (e.g. 'f' and 'false') the following prefixes must be included,
|
||||||
|
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
|
||||||
|
// other commands or literals as a prefix. The knowCommands is optional.
|
||||||
|
this.knownCommands = knownCommands;
|
||||||
|
|
||||||
|
this.beginInlineImagePos = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
nextChar() {
|
||||||
return (this.currentChar = this.stream.getByte());
|
return (this.currentChar = this.stream.getByte());
|
||||||
},
|
}
|
||||||
peekChar: function Lexer_peekChar() {
|
|
||||||
|
peekChar() {
|
||||||
return this.stream.peekByte();
|
return this.stream.peekByte();
|
||||||
},
|
}
|
||||||
getNumber: function Lexer_getNumber() {
|
|
||||||
var ch = this.currentChar;
|
getNumber() {
|
||||||
var eNotation = false;
|
let ch = this.currentChar;
|
||||||
var divideBy = 0; // different from 0 if it's a floating point value
|
let eNotation = false;
|
||||||
var sign = 0;
|
let divideBy = 0; // Different from 0 if it's a floating point value.
|
||||||
|
let sign = 0;
|
||||||
|
|
||||||
if (ch === 0x2D) { // '-'
|
if (ch === 0x2D) { // '-'
|
||||||
sign = -1;
|
sign = -1;
|
||||||
@ -817,17 +818,17 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sign = sign || 1;
|
sign = sign || 1;
|
||||||
var baseValue = ch - 0x30; // '0'
|
let baseValue = ch - 0x30; // '0'
|
||||||
var powerValue = 0;
|
let powerValue = 0;
|
||||||
var powerValueSign = 1;
|
let powerValueSign = 1;
|
||||||
|
|
||||||
while ((ch = this.nextChar()) >= 0) {
|
while ((ch = this.nextChar()) >= 0) {
|
||||||
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
|
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
|
||||||
var currentDigit = ch - 0x30; // '0'
|
const currentDigit = ch - 0x30; // '0'
|
||||||
if (eNotation) { // We are after an 'e' or 'E'
|
if (eNotation) { // We are after an 'e' or 'E'.
|
||||||
powerValue = powerValue * 10 + currentDigit;
|
powerValue = powerValue * 10 + currentDigit;
|
||||||
} else {
|
} else {
|
||||||
if (divideBy !== 0) { // We are after a point
|
if (divideBy !== 0) { // We are after a point.
|
||||||
divideBy *= 10;
|
divideBy *= 10;
|
||||||
}
|
}
|
||||||
baseValue = baseValue * 10 + currentDigit;
|
baseValue = baseValue * 10 + currentDigit;
|
||||||
@ -836,27 +837,27 @@ var Lexer = (function LexerClosure() {
|
|||||||
if (divideBy === 0) {
|
if (divideBy === 0) {
|
||||||
divideBy = 1;
|
divideBy = 1;
|
||||||
} else {
|
} else {
|
||||||
// A number can have only one '.'
|
// A number can have only one dot.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (ch === 0x2D) { // '-'
|
} else if (ch === 0x2D) { // '-'
|
||||||
// ignore minus signs in the middle of numbers to match
|
// Ignore minus signs in the middle of numbers to match
|
||||||
// Adobe's behavior
|
// Adobe's behavior.
|
||||||
warn('Badly formatted number');
|
warn('Badly formatted number: minus sign in the middle');
|
||||||
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
||||||
// 'E' can be either a scientific notation or the beginning of a new
|
// 'E' can be either a scientific notation or the beginning of a new
|
||||||
// operator
|
// operator.
|
||||||
ch = this.peekChar();
|
ch = this.peekChar();
|
||||||
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
|
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
|
||||||
powerValueSign = (ch === 0x2D) ? -1 : 1;
|
powerValueSign = (ch === 0x2D) ? -1 : 1;
|
||||||
this.nextChar(); // Consume the sign character
|
this.nextChar(); // Consume the sign character.
|
||||||
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
// The 'E' must be the beginning of a new operator
|
// The 'E' must be the beginning of a new operator.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
eNotation = true;
|
eNotation = true;
|
||||||
} else {
|
} else {
|
||||||
// the last character doesn't belong to us
|
// The last character doesn't belong to us.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -868,16 +869,17 @@ var Lexer = (function LexerClosure() {
|
|||||||
baseValue *= Math.pow(10, powerValueSign * powerValue);
|
baseValue *= Math.pow(10, powerValueSign * powerValue);
|
||||||
}
|
}
|
||||||
return sign * baseValue;
|
return sign * baseValue;
|
||||||
},
|
}
|
||||||
getString: function Lexer_getString() {
|
|
||||||
var numParen = 1;
|
getString() {
|
||||||
var done = false;
|
let numParen = 1;
|
||||||
var strBuf = this.strBuf;
|
let done = false;
|
||||||
|
const strBuf = this.strBuf;
|
||||||
strBuf.length = 0;
|
strBuf.length = 0;
|
||||||
|
|
||||||
var ch = this.nextChar();
|
let ch = this.nextChar();
|
||||||
while (true) {
|
while (true) {
|
||||||
var charBuffered = false;
|
let charBuffered = false;
|
||||||
switch (ch | 0) {
|
switch (ch | 0) {
|
||||||
case -1:
|
case -1:
|
||||||
warn('Unterminated string');
|
warn('Unterminated string');
|
||||||
@ -924,7 +926,7 @@ var Lexer = (function LexerClosure() {
|
|||||||
break;
|
break;
|
||||||
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
|
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
|
||||||
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
|
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
|
||||||
var x = ch & 0x0F;
|
let x = ch & 0x0F;
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
charBuffered = true;
|
charBuffered = true;
|
||||||
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
|
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
|
||||||
@ -961,11 +963,13 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return strBuf.join('');
|
return strBuf.join('');
|
||||||
},
|
}
|
||||||
getName: function Lexer_getName() {
|
|
||||||
var ch, previousCh;
|
getName() {
|
||||||
var strBuf = this.strBuf;
|
let ch, previousCh;
|
||||||
|
const strBuf = this.strBuf;
|
||||||
strBuf.length = 0;
|
strBuf.length = 0;
|
||||||
|
|
||||||
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
||||||
if (ch === 0x23) { // '#'
|
if (ch === 0x23) { // '#'
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
@ -975,14 +979,14 @@ var Lexer = (function LexerClosure() {
|
|||||||
strBuf.push('#');
|
strBuf.push('#');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
var x = toHexDigit(ch);
|
const x = toHexDigit(ch);
|
||||||
if (x !== -1) {
|
if (x !== -1) {
|
||||||
previousCh = ch;
|
previousCh = ch;
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
var x2 = toHexDigit(ch);
|
const x2 = toHexDigit(ch);
|
||||||
if (x2 === -1) {
|
if (x2 === -1) {
|
||||||
warn('Lexer_getName: Illegal digit (' +
|
warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
|
||||||
String.fromCharCode(ch) + ') in hexadecimal number.');
|
'in hexadecimal number.');
|
||||||
strBuf.push('#', String.fromCharCode(previousCh));
|
strBuf.push('#', String.fromCharCode(previousCh));
|
||||||
if (specialChars[ch]) {
|
if (specialChars[ch]) {
|
||||||
break;
|
break;
|
||||||
@ -999,17 +1003,18 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (strBuf.length > 127) {
|
if (strBuf.length > 127) {
|
||||||
warn('name token is longer than allowed by the spec: ' + strBuf.length);
|
warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
|
||||||
}
|
}
|
||||||
return Name.get(strBuf.join(''));
|
return Name.get(strBuf.join(''));
|
||||||
},
|
}
|
||||||
getHexString: function Lexer_getHexString() {
|
|
||||||
var strBuf = this.strBuf;
|
getHexString() {
|
||||||
|
const strBuf = this.strBuf;
|
||||||
strBuf.length = 0;
|
strBuf.length = 0;
|
||||||
var ch = this.currentChar;
|
let ch = this.currentChar;
|
||||||
var isFirstHex = true;
|
let isFirstHex = true;
|
||||||
var firstDigit;
|
let firstDigit, secondDigit;
|
||||||
var secondDigit;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (ch < 0) {
|
if (ch < 0) {
|
||||||
warn('Unterminated hex string');
|
warn('Unterminated hex string');
|
||||||
@ -1024,14 +1029,14 @@ var Lexer = (function LexerClosure() {
|
|||||||
if (isFirstHex) {
|
if (isFirstHex) {
|
||||||
firstDigit = toHexDigit(ch);
|
firstDigit = toHexDigit(ch);
|
||||||
if (firstDigit === -1) {
|
if (firstDigit === -1) {
|
||||||
warn('Ignoring invalid character "' + ch + '" in hex string');
|
warn(`Ignoring invalid character "${ch}" in hex string`);
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
secondDigit = toHexDigit(ch);
|
secondDigit = toHexDigit(ch);
|
||||||
if (secondDigit === -1) {
|
if (secondDigit === -1) {
|
||||||
warn('Ignoring invalid character "' + ch + '" in hex string');
|
warn(`Ignoring invalid character "${ch}" in hex string`);
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1042,11 +1047,12 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return strBuf.join('');
|
return strBuf.join('');
|
||||||
},
|
}
|
||||||
getObj: function Lexer_getObj() {
|
|
||||||
// skip whitespace and comments
|
getObj() {
|
||||||
var comment = false;
|
// Skip whitespace and comments.
|
||||||
var ch = this.currentChar;
|
let comment = false;
|
||||||
|
let ch = this.currentChar;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (ch < 0) {
|
if (ch < 0) {
|
||||||
return EOF;
|
return EOF;
|
||||||
@ -1063,7 +1069,7 @@ var Lexer = (function LexerClosure() {
|
|||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
// start reading token
|
// Start reading a token.
|
||||||
switch (ch | 0) {
|
switch (ch | 0) {
|
||||||
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
|
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
|
||||||
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
|
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
|
||||||
@ -1112,14 +1118,14 @@ var Lexer = (function LexerClosure() {
|
|||||||
throw new FormatError(`Illegal character: ${ch}`);
|
throw new FormatError(`Illegal character: ${ch}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// command
|
// Start reading a command.
|
||||||
var str = String.fromCharCode(ch);
|
let str = String.fromCharCode(ch);
|
||||||
var knownCommands = this.knownCommands;
|
const knownCommands = this.knownCommands;
|
||||||
var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
||||||
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
||||||
// stop if known command is found and next character does not make
|
// Stop if a known command is found and next character does not make
|
||||||
// the str a command
|
// the string a command.
|
||||||
var possibleCommand = str + String.fromCharCode(ch);
|
const possibleCommand = str + String.fromCharCode(ch);
|
||||||
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
|
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1146,9 +1152,10 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return Cmd.get(str);
|
return Cmd.get(str);
|
||||||
},
|
}
|
||||||
skipToNextLine: function Lexer_skipToNextLine() {
|
|
||||||
var ch = this.currentChar;
|
skipToNextLine() {
|
||||||
|
let ch = this.currentChar;
|
||||||
while (ch >= 0) {
|
while (ch >= 0) {
|
||||||
if (ch === 0x0D) { // CR
|
if (ch === 0x0D) { // CR
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
@ -1162,11 +1169,8 @@ var Lexer = (function LexerClosure() {
|
|||||||
}
|
}
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
return Lexer;
|
|
||||||
})();
|
|
||||||
|
|
||||||
var Linearization = {
|
var Linearization = {
|
||||||
create: function LinearizationCreate(stream) {
|
create: function LinearizationCreate(stream) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user