b5be515375
- the language specifications are: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.364.2157&rep=rep1&type=pdf#page=1049 - it can be used to: * as a scripting language for calculation, validations, ... * in SOM expressions to select nodes: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.364.2157&rep=rep1&type=pdf#page=101
386 lines
8.5 KiB
JavaScript
386 lines
8.5 KiB
JavaScript
/* Copyright 2021 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
const KEYWORDS = new Set([
|
|
"and",
|
|
"break",
|
|
"continue",
|
|
"do",
|
|
"downto",
|
|
"else",
|
|
"elseif",
|
|
"end",
|
|
"endfor",
|
|
"endfunc",
|
|
"endif",
|
|
"endwhile",
|
|
"eq",
|
|
"exit",
|
|
"for",
|
|
"foreach",
|
|
"func",
|
|
"ge",
|
|
"gt",
|
|
"if",
|
|
"in",
|
|
"infinity",
|
|
"le",
|
|
"lt",
|
|
"nan",
|
|
"ne",
|
|
"not",
|
|
"null",
|
|
"or",
|
|
"return",
|
|
"step",
|
|
"then",
|
|
"this",
|
|
"throw",
|
|
"upto",
|
|
"var",
|
|
"while",
|
|
]);
|
|
|
|
const TOKEN = {
|
|
/* Appears in expression */
|
|
and: 0,
|
|
divide: 1,
|
|
dot: 2,
|
|
dotDot: 3,
|
|
dotHash: 4,
|
|
dotStar: 5,
|
|
eq: 6,
|
|
ge: 7,
|
|
gt: 8,
|
|
le: 9,
|
|
leftBracket: 10,
|
|
leftParen: 11,
|
|
lt: 12,
|
|
minus: 13,
|
|
ne: 14,
|
|
not: 15,
|
|
null: 16,
|
|
number: 17,
|
|
or: 18,
|
|
plus: 19,
|
|
rightBracket: 20,
|
|
rightParen: 21,
|
|
string: 22,
|
|
this: 23,
|
|
times: 24,
|
|
identifier: 25, // in main statments too
|
|
|
|
/* Main statements */
|
|
break: 26,
|
|
continue: 27,
|
|
do: 28,
|
|
for: 29,
|
|
foreach: 30,
|
|
func: 31,
|
|
if: 32,
|
|
var: 33,
|
|
while: 34,
|
|
|
|
/* Others */
|
|
assign: 35,
|
|
comma: 36,
|
|
downto: 37,
|
|
else: 38,
|
|
elseif: 39,
|
|
end: 40,
|
|
endif: 41,
|
|
endfor: 42,
|
|
endfunc: 43,
|
|
endwhile: 44,
|
|
eof: 45,
|
|
exit: 46,
|
|
in: 47,
|
|
infinity: 48,
|
|
nan: 49,
|
|
return: 50,
|
|
step: 51,
|
|
then: 52,
|
|
throw: 53,
|
|
upto: 54,
|
|
};
|
|
|
|
const hexPattern = /^[uU]([0-9a-fA-F]{4,8})/;
|
|
const numberPattern = /^[0-9]*(?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?/;
|
|
const dotNumberPattern = /^[0-9]*(?:[Ee][+-]?[0-9]+)?/;
|
|
const eolPattern = /[\r\n]+/;
|
|
const identifierPattern = new RegExp("^[\\p{L}_$!][\\p{L}\\p{N}_$]*", "u");
|
|
|
|
class Token {
|
|
constructor(id, value = null) {
|
|
this.id = id;
|
|
this.value = value;
|
|
}
|
|
}
|
|
|
|
const Singletons = (function () {
|
|
const obj = Object.create(null);
|
|
const nonSingleton = new Set([
|
|
"identifier",
|
|
"string",
|
|
"number",
|
|
"nan",
|
|
"infinity",
|
|
]);
|
|
for (const [name, id] of Object.entries(TOKEN)) {
|
|
if (!nonSingleton.has(name)) {
|
|
obj[name] = new Token(id);
|
|
}
|
|
}
|
|
obj.nan = new Token(TOKEN.number, NaN);
|
|
obj.infinity = new Token(TOKEN.number, Infinity);
|
|
|
|
return obj;
|
|
})();
|
|
|
|
class Lexer {
|
|
constructor(data) {
|
|
this.data = data;
|
|
this.pos = 0;
|
|
this.len = data.length;
|
|
this.strBuf = [];
|
|
}
|
|
|
|
skipUntilEOL() {
|
|
const match = this.data.slice(this.pos).match(eolPattern);
|
|
if (match) {
|
|
this.pos += match.index + match[0].length;
|
|
} else {
|
|
// No eol so consume all the chars.
|
|
this.pos = this.len;
|
|
}
|
|
}
|
|
|
|
getIdentifier() {
|
|
this.pos--;
|
|
const match = this.data.slice(this.pos).match(identifierPattern);
|
|
if (!match) {
|
|
throw new Error(
|
|
`Invalid token in FormCalc expression at position ${this.pos}.`
|
|
);
|
|
}
|
|
|
|
const identifier = this.data.slice(this.pos, this.pos + match[0].length);
|
|
this.pos += match[0].length;
|
|
|
|
const lower = identifier.toLowerCase();
|
|
if (!KEYWORDS.has(lower)) {
|
|
return new Token(TOKEN.identifier, identifier);
|
|
}
|
|
|
|
return Singletons[lower];
|
|
}
|
|
|
|
getString() {
|
|
const str = this.strBuf;
|
|
const data = this.data;
|
|
let start = this.pos;
|
|
while (this.pos < this.len) {
|
|
const char = data.charCodeAt(this.pos++);
|
|
if (char === 0x22 /* = " */) {
|
|
if (data.charCodeAt(this.pos) === 0x22 /* = " */) {
|
|
// Escaped quote.
|
|
str.push(data.slice(start, this.pos++));
|
|
start = this.pos;
|
|
continue;
|
|
}
|
|
// End of string
|
|
break;
|
|
}
|
|
|
|
if (char === 0x5c /* = \ */) {
|
|
const match = data.substring(this.pos, this.pos + 10).match(hexPattern);
|
|
if (!match) {
|
|
continue;
|
|
}
|
|
|
|
str.push(data.slice(start, this.pos - 1));
|
|
const code = match[1];
|
|
if (code.length === 4) {
|
|
str.push(String.fromCharCode(parseInt(code, 16)));
|
|
start = this.pos += 5;
|
|
} else if (code.length !== 8) {
|
|
str.push(String.fromCharCode(parseInt(code.slice(0, 4), 16)));
|
|
start = this.pos += 5;
|
|
} else {
|
|
str.push(String.fromCharCode(parseInt(code, 16)));
|
|
start = this.pos += 9;
|
|
}
|
|
}
|
|
}
|
|
|
|
const lastChunk = data.slice(start, this.pos - 1);
|
|
if (str.length === 0) {
|
|
return new Token(TOKEN.string, lastChunk);
|
|
}
|
|
|
|
str.push(lastChunk);
|
|
const string = str.join("");
|
|
str.length = 0;
|
|
|
|
return new Token(TOKEN.string, string);
|
|
}
|
|
|
|
getNumber(first) {
|
|
const match = this.data.substring(this.pos).match(numberPattern);
|
|
if (!match) {
|
|
return first - 0x30 /* = 0 */;
|
|
}
|
|
const number = parseFloat(
|
|
this.data.substring(this.pos - 1, this.pos + match[0].length)
|
|
);
|
|
|
|
this.pos += match[0].length;
|
|
return new Token(TOKEN.number, number);
|
|
}
|
|
|
|
getCompOperator(alt1, alt2) {
|
|
if (this.data.charCodeAt(this.pos) === 0x3d /* = = */) {
|
|
this.pos++;
|
|
return alt1;
|
|
}
|
|
return alt2;
|
|
}
|
|
|
|
getLower() {
|
|
const char = this.data.charCodeAt(this.pos);
|
|
if (char === 0x3d /* = = */) {
|
|
this.pos++;
|
|
return Singletons.le;
|
|
}
|
|
|
|
if (char === 0x3e /* = > */) {
|
|
this.pos++;
|
|
return Singletons.ne;
|
|
}
|
|
|
|
return Singletons.lt;
|
|
}
|
|
|
|
getSlash() {
|
|
if (this.data.charCodeAt(this.pos) === 0x2f /* = / */) {
|
|
this.skipUntilEOL();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
getDot() {
|
|
const char = this.data.charCodeAt(this.pos);
|
|
if (char === 0x2e /* = . */) {
|
|
this.pos++;
|
|
return Singletons.dotDot;
|
|
}
|
|
|
|
if (char === 0x2a /* = * */) {
|
|
this.pos++;
|
|
return Singletons.dotStar;
|
|
}
|
|
|
|
if (char === 0x23 /* = # */) {
|
|
this.pos++;
|
|
return Singletons.dotHash;
|
|
}
|
|
|
|
if (0x30 /* = 0 */ <= char && char <= 0x39 /* = 9 */) {
|
|
this.pos++;
|
|
const match = this.data.substring(this.pos).match(dotNumberPattern);
|
|
if (!match) {
|
|
return new Token(TOKEN.number, (char - 0x30) /* = 0 */ / 10);
|
|
}
|
|
const end = this.pos + match[0].length;
|
|
const number = parseFloat(this.data.substring(this.pos - 2, end));
|
|
this.pos = end;
|
|
return new Token(TOKEN.number, number);
|
|
}
|
|
|
|
return Singletons.dot;
|
|
}
|
|
|
|
next() {
|
|
while (this.pos < this.len) {
|
|
const char = this.data.charCodeAt(this.pos++);
|
|
switch (char) {
|
|
case 0x09 /* = \t */:
|
|
case 0x0a /* = \n */:
|
|
case 0x0b /* = \v */:
|
|
case 0x0c /* = \f */:
|
|
case 0x0d /* = \r */:
|
|
case 0x20 /* = */:
|
|
break;
|
|
case 0x22 /* = " */:
|
|
return this.getString();
|
|
case 0x26 /* = & */:
|
|
return Singletons.and;
|
|
case 0x28 /* = ( */:
|
|
return Singletons.leftParen;
|
|
case 0x29 /* = ) */:
|
|
return Singletons.rightParen;
|
|
case 0x2a /* = * */:
|
|
return Singletons.times;
|
|
case 0x2b /* = + */:
|
|
return Singletons.plus;
|
|
case 0x2c /* = , */:
|
|
return Singletons.comma;
|
|
case 0x2d /* = - */:
|
|
return Singletons.minus;
|
|
case 0x2e /* = . */:
|
|
return this.getDot();
|
|
case 0x2f /* = / */:
|
|
if (this.getSlash()) {
|
|
return Singletons.divide;
|
|
}
|
|
// It was a comment.
|
|
break;
|
|
case 0x30 /* = 0 */:
|
|
case 0x31 /* = 1 */:
|
|
case 0x32 /* = 2 */:
|
|
case 0x33 /* = 3 */:
|
|
case 0x34 /* = 4 */:
|
|
case 0x35 /* = 5 */:
|
|
case 0x36 /* = 6 */:
|
|
case 0x37 /* = 7 */:
|
|
case 0x38 /* = 8 */:
|
|
case 0x39 /* = 9 */:
|
|
return this.getNumber(char);
|
|
case 0x3b /* = ; */:
|
|
this.skipUntilEOL();
|
|
break;
|
|
case 0x3c /* = < */:
|
|
return this.getLower();
|
|
case 0x3d /* = = */:
|
|
return this.getCompOperator(Singletons.eq, Singletons.assign);
|
|
case 0x3e /* = > */:
|
|
return this.getCompOperator(Singletons.ge, Singletons.gt);
|
|
case 0x5b /* = [ */:
|
|
return Singletons.leftBracket;
|
|
case 0x5d /* = ] */:
|
|
return Singletons.rightBracket;
|
|
case 0x7c /* = | */:
|
|
return Singletons.or;
|
|
default:
|
|
return this.getIdentifier();
|
|
}
|
|
}
|
|
return Singletons.eof;
|
|
}
|
|
}
|
|
|
|
export { Lexer, Token, TOKEN };
|