Rewrite Lexer_getNumber.
Now, it computes the numbers with only basic arithmetic operations, without first creating a string and then calling parseFloat. The new function doesn't behave exactly the same as the old one. In particular, the old behaviour was that when there was a number immediatly followed by an 'E', the 'E' was consumed. Now it's not. It allows for "glued" numbers and operators. Also, the new function is faster and consumes less memory.
This commit is contained in:
parent
591bd91482
commit
4a66eccedc
@ -393,47 +393,85 @@ var Lexer = (function LexerClosure() {
|
|||||||
nextChar: function Lexer_nextChar() {
|
nextChar: function Lexer_nextChar() {
|
||||||
return (this.currentChar = this.stream.getByte());
|
return (this.currentChar = this.stream.getByte());
|
||||||
},
|
},
|
||||||
|
peekChar: function Lexer_peekChar() {
|
||||||
|
return this.stream.peekBytes(1)[0];
|
||||||
|
},
|
||||||
getNumber: function Lexer_getNumber() {
|
getNumber: function Lexer_getNumber() {
|
||||||
var floating = false;
|
|
||||||
var ch = this.currentChar;
|
var ch = this.currentChar;
|
||||||
var allDigits = ch >= 0x30 && ch <= 0x39;
|
var eNotation = false;
|
||||||
var strBuf = this.strBuf;
|
var divideBy = 0; // different from 0 if it's a floating point value
|
||||||
strBuf.length = 0;
|
|
||||||
strBuf.push(String.fromCharCode(ch));
|
var sign = 1;
|
||||||
|
|
||||||
|
|
||||||
|
if (ch === 0x2D) { // '-'
|
||||||
|
sign = -1;
|
||||||
|
ch = this.nextChar();
|
||||||
|
} else if (ch === 0x2B) { // '+'
|
||||||
|
ch = this.nextChar();
|
||||||
|
}
|
||||||
|
if (ch === 0x2E) { // '.'
|
||||||
|
divideBy = 10;
|
||||||
|
ch = this.nextChar();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
|
error('Invalid number: ' + String.fromCharCode(ch));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
var baseValue = ch - 0x30; // '0'
|
||||||
|
var powerValue = 0;
|
||||||
|
var powerValueSign = 1;
|
||||||
|
|
||||||
while ((ch = this.nextChar()) >= 0) {
|
while ((ch = this.nextChar()) >= 0) {
|
||||||
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
|
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
|
||||||
strBuf.push(String.fromCharCode(ch));
|
var currentDigit = ch - 0x30; // '0'
|
||||||
} else if (ch === 0x2E && !floating) { // '.'
|
if (eNotation) { // We are after an 'e' or 'E'
|
||||||
strBuf.push('.');
|
powerValue = powerValue * 10 + currentDigit;
|
||||||
floating = true;
|
} else {
|
||||||
allDigits = false;
|
if (divideBy !== 0) { // We are after a point
|
||||||
|
divideBy *= 10;
|
||||||
|
}
|
||||||
|
baseValue = baseValue * 10 + currentDigit;
|
||||||
|
}
|
||||||
|
} else if (ch === 0x2E) { // '.'
|
||||||
|
if (divideBy === 0) {
|
||||||
|
divideBy = 1;
|
||||||
|
} else {
|
||||||
|
// A number can have only one '.'
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else if (ch === 0x2D) { // '-'
|
} else if (ch === 0x2D) { // '-'
|
||||||
// ignore minus signs in the middle of numbers to match
|
// ignore minus signs in the middle of numbers to match
|
||||||
// Adobe's behavior
|
// Adobe's behavior
|
||||||
warn('Badly formated number');
|
warn('Badly formated number');
|
||||||
allDigits = false;
|
|
||||||
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
||||||
floating = true;
|
// 'E' can be either a scientific notation or the beginning of a new
|
||||||
allDigits = false;
|
// operator
|
||||||
|
var hasE = true;
|
||||||
|
ch = this.peekChar();
|
||||||
|
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
|
||||||
|
powerValueSign = (ch === 0x2D) ? -1 : 1;
|
||||||
|
this.nextChar(); // Consume the sign character
|
||||||
|
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
|
// The 'E' must be the beginning of a new operator
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
eNotation = true;
|
||||||
} else {
|
} else {
|
||||||
// the last character doesn't belong to us
|
// the last character doesn't belong to us
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var value;
|
|
||||||
if (allDigits) {
|
if (divideBy !== 0) {
|
||||||
value = 0;
|
baseValue /= divideBy;
|
||||||
var charCodeOfZero = 48; // '0'
|
|
||||||
for (var i = 0, ii = strBuf.length; i < ii; i++) {
|
|
||||||
value = value * 10 + (strBuf[i].charCodeAt(0) - charCodeOfZero);
|
|
||||||
}
|
}
|
||||||
} else {
|
if (eNotation) {
|
||||||
value = parseFloat(strBuf.join(''));
|
baseValue *= Math.pow(10, powerValueSign * powerValue);
|
||||||
if (isNaN(value)) {
|
|
||||||
error('Invalid floating point number: ' + value);
|
|
||||||
}
|
}
|
||||||
}
|
return sign * baseValue;
|
||||||
return value;
|
|
||||||
},
|
},
|
||||||
getString: function Lexer_getString() {
|
getString: function Lexer_getString() {
|
||||||
var numParen = 1;
|
var numParen = 1;
|
||||||
|
@ -14,6 +14,30 @@ describe('parser', function() {
|
|||||||
expect(result).toEqual(11.234);
|
expect(result).toEqual(11.234);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should parse PostScript numbers', function() {
|
||||||
|
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
|
||||||
|
'123', '-98', '43445', '0', '+17'];
|
||||||
|
for (var i=0, ii=numbers.length; i<ii; i++) {
|
||||||
|
var num = numbers[i];
|
||||||
|
var input = new StringStream(num);
|
||||||
|
var lexer = new Lexer(input);
|
||||||
|
var result = lexer.getNumber();
|
||||||
|
|
||||||
|
expect(result).toEqual(parseFloat(num));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
it('should handle glued numbers and operators', function() {
|
||||||
|
var input = new StringStream('123ET');
|
||||||
|
var lexer = new Lexer(input);
|
||||||
|
var value = lexer.getNumber();
|
||||||
|
|
||||||
|
expect(value).toEqual(123);
|
||||||
|
// The lexer must not have consumed the 'E'
|
||||||
|
expect(lexer.currentChar).toEqual(0x45); // 'E'
|
||||||
|
});
|
||||||
|
|
||||||
it('should stop parsing strings at the end of stream', function() {
|
it('should stop parsing strings at the end of stream', function() {
|
||||||
var input = new StringStream('(1$4)');
|
var input = new StringStream('(1$4)');
|
||||||
input.getByte = function(super_getByte) {
|
input.getByte = function(super_getByte) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user