Merge pull request #4236 from lovasoa/master
Lexer_getNumber faster number parsing
This commit is contained in:
commit
2451968bac
1
AUTHORS
1
AUTHORS
@ -15,6 +15,7 @@ Jonas Jenwald <jonas.jenwald@gmail.com>
|
|||||||
Julian Viereck
|
Julian Viereck
|
||||||
Justin D'Arcangelo <justindarc@gmail.com>
|
Justin D'Arcangelo <justindarc@gmail.com>
|
||||||
Kalervo Kujala
|
Kalervo Kujala
|
||||||
|
Ophir Lojkine <@lovasoa>
|
||||||
Rob Wu <gwnRob@gmail.com>
|
Rob Wu <gwnRob@gmail.com>
|
||||||
Shaon Barman <shaon.barman@gmail.com>
|
Shaon Barman <shaon.barman@gmail.com>
|
||||||
Tim van der Meij <info@timvandermeij.nl>
|
Tim van der Meij <info@timvandermeij.nl>
|
||||||
|
@ -393,47 +393,85 @@ var Lexer = (function LexerClosure() {
|
|||||||
nextChar: function Lexer_nextChar() {
|
nextChar: function Lexer_nextChar() {
|
||||||
return (this.currentChar = this.stream.getByte());
|
return (this.currentChar = this.stream.getByte());
|
||||||
},
|
},
|
||||||
|
peekChar: function Lexer_peekChar() {
|
||||||
|
return this.stream.peekBytes(1)[0];
|
||||||
|
},
|
||||||
getNumber: function Lexer_getNumber() {
|
getNumber: function Lexer_getNumber() {
|
||||||
var floating = false;
|
|
||||||
var ch = this.currentChar;
|
var ch = this.currentChar;
|
||||||
var allDigits = ch >= 0x30 && ch <= 0x39;
|
var eNotation = false;
|
||||||
var strBuf = this.strBuf;
|
var divideBy = 0; // different from 0 if it's a floating point value
|
||||||
strBuf.length = 0;
|
|
||||||
strBuf.push(String.fromCharCode(ch));
|
var sign = 1;
|
||||||
|
|
||||||
|
|
||||||
|
if (ch === 0x2D) { // '-'
|
||||||
|
sign = -1;
|
||||||
|
ch = this.nextChar();
|
||||||
|
} else if (ch === 0x2B) { // '+'
|
||||||
|
ch = this.nextChar();
|
||||||
|
}
|
||||||
|
if (ch === 0x2E) { // '.'
|
||||||
|
divideBy = 10;
|
||||||
|
ch = this.nextChar();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
|
error('Invalid number: ' + String.fromCharCode(ch));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
var baseValue = ch - 0x30; // '0'
|
||||||
|
var powerValue = 0;
|
||||||
|
var powerValueSign = 1;
|
||||||
|
|
||||||
while ((ch = this.nextChar()) >= 0) {
|
while ((ch = this.nextChar()) >= 0) {
|
||||||
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
|
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
|
||||||
strBuf.push(String.fromCharCode(ch));
|
var currentDigit = ch - 0x30; // '0'
|
||||||
} else if (ch === 0x2E && !floating) { // '.'
|
if (eNotation) { // We are after an 'e' or 'E'
|
||||||
strBuf.push('.');
|
powerValue = powerValue * 10 + currentDigit;
|
||||||
floating = true;
|
} else {
|
||||||
allDigits = false;
|
if (divideBy !== 0) { // We are after a point
|
||||||
|
divideBy *= 10;
|
||||||
|
}
|
||||||
|
baseValue = baseValue * 10 + currentDigit;
|
||||||
|
}
|
||||||
|
} else if (ch === 0x2E) { // '.'
|
||||||
|
if (divideBy === 0) {
|
||||||
|
divideBy = 1;
|
||||||
|
} else {
|
||||||
|
// A number can have only one '.'
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else if (ch === 0x2D) { // '-'
|
} else if (ch === 0x2D) { // '-'
|
||||||
// ignore minus signs in the middle of numbers to match
|
// ignore minus signs in the middle of numbers to match
|
||||||
// Adobe's behavior
|
// Adobe's behavior
|
||||||
warn('Badly formated number');
|
warn('Badly formated number');
|
||||||
allDigits = false;
|
|
||||||
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
||||||
floating = true;
|
// 'E' can be either a scientific notation or the beginning of a new
|
||||||
allDigits = false;
|
// operator
|
||||||
|
var hasE = true;
|
||||||
|
ch = this.peekChar();
|
||||||
|
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
|
||||||
|
powerValueSign = (ch === 0x2D) ? -1 : 1;
|
||||||
|
this.nextChar(); // Consume the sign character
|
||||||
|
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
|
// The 'E' must be the beginning of a new operator
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
eNotation = true;
|
||||||
} else {
|
} else {
|
||||||
// the last character doesn't belong to us
|
// the last character doesn't belong to us
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var value;
|
|
||||||
if (allDigits) {
|
if (divideBy !== 0) {
|
||||||
value = 0;
|
baseValue /= divideBy;
|
||||||
var charCodeOfZero = 48; // '0'
|
|
||||||
for (var i = 0, ii = strBuf.length; i < ii; i++) {
|
|
||||||
value = value * 10 + (strBuf[i].charCodeAt(0) - charCodeOfZero);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
value = parseFloat(strBuf.join(''));
|
|
||||||
if (isNaN(value)) {
|
|
||||||
error('Invalid floating point number: ' + value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return value;
|
if (eNotation) {
|
||||||
|
baseValue *= Math.pow(10, powerValueSign * powerValue);
|
||||||
|
}
|
||||||
|
return sign * baseValue;
|
||||||
},
|
},
|
||||||
getString: function Lexer_getString() {
|
getString: function Lexer_getString() {
|
||||||
var numParen = 1;
|
var numParen = 1;
|
||||||
|
@ -14,6 +14,30 @@ describe('parser', function() {
|
|||||||
expect(result).toEqual(11.234);
|
expect(result).toEqual(11.234);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should parse PostScript numbers', function() {
|
||||||
|
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
|
||||||
|
'123', '-98', '43445', '0', '+17'];
|
||||||
|
for (var i=0, ii=numbers.length; i<ii; i++) {
|
||||||
|
var num = numbers[i];
|
||||||
|
var input = new StringStream(num);
|
||||||
|
var lexer = new Lexer(input);
|
||||||
|
var result = lexer.getNumber();
|
||||||
|
|
||||||
|
expect(result).toEqual(parseFloat(num));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
it('should handle glued numbers and operators', function() {
|
||||||
|
var input = new StringStream('123ET');
|
||||||
|
var lexer = new Lexer(input);
|
||||||
|
var value = lexer.getNumber();
|
||||||
|
|
||||||
|
expect(value).toEqual(123);
|
||||||
|
// The lexer must not have consumed the 'E'
|
||||||
|
expect(lexer.currentChar).toEqual(0x45); // 'E'
|
||||||
|
});
|
||||||
|
|
||||||
it('should stop parsing strings at the end of stream', function() {
|
it('should stop parsing strings at the end of stream', function() {
|
||||||
var input = new StringStream('(1$4)');
|
var input = new StringStream('(1$4)');
|
||||||
input.getByte = function(super_getByte) {
|
input.getByte = function(super_getByte) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user