Merge pull request #10635 from timvandermeij/lexer-parser
Convert `src/core/parser.js` to ES6 syntax and write more unit tests for the lexer and the parser
This commit is contained in:
commit
33bfbef6ba
2098
src/core/parser.js
2098
src/core/parser.js
File diff suppressed because it is too large
Load Diff
@ -12,144 +12,193 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
/* eslint no-var: error */
|
||||||
|
|
||||||
import { Lexer, Linearization } from '../../src/core/parser';
|
import { Lexer, Linearization, Parser } from '../../src/core/parser';
|
||||||
import { FormatError } from '../../src/shared/util';
|
import { FormatError } from '../../src/shared/util';
|
||||||
import { Name } from '../../src/core/primitives';
|
import { Name } from '../../src/core/primitives';
|
||||||
import { StringStream } from '../../src/core/stream';
|
import { StringStream } from '../../src/core/stream';
|
||||||
|
|
||||||
describe('parser', function() {
|
describe('parser', function() {
|
||||||
|
describe('Parser', function() {
|
||||||
|
describe('inlineStreamSkipEI', function() {
|
||||||
|
it('should skip over the EI marker if it is found', function() {
|
||||||
|
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
|
||||||
|
'/F /A85 ID abc123~> EI Q';
|
||||||
|
const input = new StringStream(string);
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
const parser = new Parser(lexer, /* allowStreams = */ true,
|
||||||
|
/* xref = */ null);
|
||||||
|
parser.inlineStreamSkipEI(input);
|
||||||
|
expect(input.pos).toEqual(string.indexOf('Q'));
|
||||||
|
expect(input.peekByte()).toEqual(0x51); // 'Q'
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should skip to the end of stream if the EI marker is not found',
|
||||||
|
function() {
|
||||||
|
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
|
||||||
|
'/F /A85 ID abc123~> Q';
|
||||||
|
const input = new StringStream(string);
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
const parser = new Parser(lexer, /* allowStreams = */ true,
|
||||||
|
/* xref = */ null);
|
||||||
|
parser.inlineStreamSkipEI(input);
|
||||||
|
expect(input.pos).toEqual(string.length);
|
||||||
|
expect(input.peekByte()).toEqual(-1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('Lexer', function() {
|
describe('Lexer', function() {
|
||||||
it('should stop parsing numbers at the end of stream', function() {
|
describe('nextChar', function() {
|
||||||
var input = new StringStream('11.234');
|
it('should return and set -1 when the end of the stream is reached',
|
||||||
var lexer = new Lexer(input);
|
function() {
|
||||||
var result = lexer.getNumber();
|
const input = new StringStream('');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.nextChar()).toEqual(-1);
|
||||||
|
expect(lexer.currentChar).toEqual(-1);
|
||||||
|
});
|
||||||
|
|
||||||
expect(result).toEqual(11.234);
|
it('should return and set the character after the current position',
|
||||||
|
function() {
|
||||||
|
const input = new StringStream('123');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.nextChar()).toEqual(0x32); // '2'
|
||||||
|
expect(lexer.currentChar).toEqual(0x32); // '2'
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse PostScript numbers', function() {
|
describe('peekChar', function() {
|
||||||
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
|
it('should only return -1 when the end of the stream is reached',
|
||||||
'123', '-98', '43445', '0', '+17'];
|
function() {
|
||||||
for (var i = 0, ii = numbers.length; i < ii; i++) {
|
const input = new StringStream('');
|
||||||
var num = numbers[i];
|
const lexer = new Lexer(input);
|
||||||
var input = new StringStream(num);
|
expect(lexer.peekChar()).toEqual(-1);
|
||||||
var lexer = new Lexer(input);
|
expect(lexer.currentChar).toEqual(-1);
|
||||||
var result = lexer.getNumber();
|
});
|
||||||
|
|
||||||
expect(result).toEqual(parseFloat(num));
|
it('should only return the character after the current position',
|
||||||
}
|
function() {
|
||||||
|
const input = new StringStream('123');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.peekChar()).toEqual(0x32); // '2'
|
||||||
|
expect(lexer.currentChar).toEqual(0x31); // '1'
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should ignore double negative before number', function() {
|
describe('getNumber', function() {
|
||||||
var input = new StringStream('--205.88');
|
it('should stop parsing numbers at the end of stream', function() {
|
||||||
var lexer = new Lexer(input);
|
const input = new StringStream('11.234');
|
||||||
var result = lexer.getNumber();
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(11.234);
|
||||||
|
});
|
||||||
|
|
||||||
expect(result).toEqual(-205.88);
|
it('should parse PostScript numbers', function() {
|
||||||
|
const numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.',
|
||||||
|
'0.0', '123', '-98', '43445', '0', '+17'];
|
||||||
|
for (const number of numbers) {
|
||||||
|
const input = new StringStream(number);
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(parseFloat(number));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should ignore double negative before number', function() {
|
||||||
|
const input = new StringStream('--205.88');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(-205.88);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should ignore minus signs in the middle of number', function() {
|
||||||
|
const input = new StringStream('205--.88');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(205.88);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should ignore line-breaks between operator and digit in number',
|
||||||
|
function() {
|
||||||
|
const minusInput = new StringStream('-\r\n205.88');
|
||||||
|
const minusLexer = new Lexer(minusInput);
|
||||||
|
expect(minusLexer.getNumber()).toEqual(-205.88);
|
||||||
|
|
||||||
|
const plusInput = new StringStream('+\r\n205.88');
|
||||||
|
const plusLexer = new Lexer(plusInput);
|
||||||
|
expect(plusLexer.getNumber()).toEqual(205.88);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should treat a single decimal point as zero', function() {
|
||||||
|
const input = new StringStream('.');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(0);
|
||||||
|
|
||||||
|
const numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
|
||||||
|
for (const number of numbers) {
|
||||||
|
const input = new StringStream(number);
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
|
||||||
|
expect(function() {
|
||||||
|
return lexer.getNumber();
|
||||||
|
}).toThrowError(FormatError, /^Invalid number:\s/);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle glued numbers and operators', function() {
|
||||||
|
const input = new StringStream('123ET');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getNumber()).toEqual(123);
|
||||||
|
// The lexer must not have consumed the 'E'
|
||||||
|
expect(lexer.currentChar).toEqual(0x45); // 'E'
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should ignore minus signs in the middle of number', function() {
|
describe('getString', function() {
|
||||||
var input = new StringStream('205--.88');
|
it('should stop parsing strings at the end of stream', function() {
|
||||||
var lexer = new Lexer(input);
|
const input = new StringStream('(1$4)');
|
||||||
var result = lexer.getNumber();
|
input.getByte = function(super_getByte) {
|
||||||
|
// Simulating end of file using null (see issue 2766).
|
||||||
|
const ch = super_getByte.call(input);
|
||||||
|
return (ch === 0x24 /* '$' */ ? -1 : ch);
|
||||||
|
}.bind(input, input.getByte);
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getString()).toEqual('1');
|
||||||
|
});
|
||||||
|
|
||||||
expect(result).toEqual(205.88);
|
it('should ignore escaped CR and LF', function() {
|
||||||
|
// '(\101\<CR><LF>\102)' should be parsed as 'AB'.
|
||||||
|
const input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
|
expect(lexer.getString()).toEqual('ABCD');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should ignore line-breaks between operator and digit in number',
|
describe('getHexString', function() {
|
||||||
function() {
|
it('should not throw exception on bad input', function() {
|
||||||
let minusInput = new StringStream('-\r\n205.88');
|
// '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
|
||||||
let minusLexer = new Lexer(minusInput);
|
const input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
|
||||||
|
const lexer = new Lexer(input);
|
||||||
expect(minusLexer.getNumber()).toEqual(-205.88);
|
expect(lexer.getHexString()).toEqual('p!U"$2');
|
||||||
|
});
|
||||||
let plusInput = new StringStream('+\r\n205.88');
|
|
||||||
let plusLexer = new Lexer(plusInput);
|
|
||||||
|
|
||||||
expect(plusLexer.getNumber()).toEqual(205.88);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should treat a single decimal point as zero', function() {
|
describe('getName', function() {
|
||||||
let input = new StringStream('.');
|
it('should handle Names with invalid usage of NUMBER SIGN (#)',
|
||||||
let lexer = new Lexer(input);
|
function() {
|
||||||
|
const inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
|
||||||
|
const expectedNames = ['#', '#AQwerty', '#A'];
|
||||||
|
|
||||||
expect(lexer.getNumber()).toEqual(0);
|
for (let i = 0, ii = inputNames.length; i < ii; i++) {
|
||||||
|
const input = new StringStream(inputNames[i]);
|
||||||
let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
|
const lexer = new Lexer(input);
|
||||||
for (let number of numbers) {
|
expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
|
||||||
let input = new StringStream(number);
|
}
|
||||||
let lexer = new Lexer(input);
|
});
|
||||||
|
|
||||||
expect(function() {
|
|
||||||
return lexer.getNumber();
|
|
||||||
}).toThrowError(FormatError, /^Invalid number:\s/);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should handle glued numbers and operators', function() {
|
|
||||||
var input = new StringStream('123ET');
|
|
||||||
var lexer = new Lexer(input);
|
|
||||||
var value = lexer.getNumber();
|
|
||||||
|
|
||||||
expect(value).toEqual(123);
|
|
||||||
// The lexer must not have consumed the 'E'
|
|
||||||
expect(lexer.currentChar).toEqual(0x45); // 'E'
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should stop parsing strings at the end of stream', function() {
|
|
||||||
var input = new StringStream('(1$4)');
|
|
||||||
input.getByte = function(super_getByte) {
|
|
||||||
// simulating end of file using null (see issue 2766)
|
|
||||||
var ch = super_getByte.call(input);
|
|
||||||
return (ch === 0x24 /* '$' */ ? -1 : ch);
|
|
||||||
}.bind(input, input.getByte);
|
|
||||||
var lexer = new Lexer(input);
|
|
||||||
var result = lexer.getString();
|
|
||||||
|
|
||||||
expect(result).toEqual('1');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should not throw exception on bad input', function() {
|
|
||||||
// '8 0 2 15 5 2 2 2 4 3 2 4'
|
|
||||||
// should be parsed as
|
|
||||||
// '80 21 55 22 24 32'
|
|
||||||
var input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
|
|
||||||
var lexer = new Lexer(input);
|
|
||||||
var result = lexer.getHexString();
|
|
||||||
|
|
||||||
expect(result).toEqual('p!U"$2');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should ignore escaped CR and LF', function() {
|
|
||||||
// '(\101\<CR><LF>\102)'
|
|
||||||
// should be parsed as
|
|
||||||
// "AB"
|
|
||||||
var input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
|
|
||||||
var lexer = new Lexer(input);
|
|
||||||
var result = lexer.getString();
|
|
||||||
|
|
||||||
expect(result).toEqual('ABCD');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should handle Names with invalid usage of NUMBER SIGN (#)', function() {
|
|
||||||
var inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
|
|
||||||
var expectedNames = ['#', '#AQwerty', '#A'];
|
|
||||||
|
|
||||||
for (var i = 0, ii = inputNames.length; i < ii; i++) {
|
|
||||||
var input = new StringStream(inputNames[i]);
|
|
||||||
var lexer = new Lexer(input);
|
|
||||||
var result = lexer.getName();
|
|
||||||
|
|
||||||
expect(result).toEqual(Name.get(expectedNames[i]));
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('Linearization', function() {
|
describe('Linearization', function() {
|
||||||
it('should not find a linearization dictionary', function () {
|
it('should not find a linearization dictionary', function() {
|
||||||
// Not an actual linearization dictionary.
|
// Not an actual linearization dictionary.
|
||||||
var stream1 = new StringStream(
|
const stream1 = new StringStream(
|
||||||
'3 0 obj\n' +
|
'3 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Length 4622\n' +
|
'/Length 4622\n' +
|
||||||
@ -160,7 +209,7 @@ describe('parser', function() {
|
|||||||
expect(Linearization.create(stream1)).toEqual(null);
|
expect(Linearization.create(stream1)).toEqual(null);
|
||||||
|
|
||||||
// Linearization dictionary with invalid version number.
|
// Linearization dictionary with invalid version number.
|
||||||
var stream2 = new StringStream(
|
const stream2 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 0\n' +
|
'/Linearized 0\n' +
|
||||||
@ -170,8 +219,8 @@ describe('parser', function() {
|
|||||||
expect(Linearization.create(stream2)).toEqual(null);
|
expect(Linearization.create(stream2)).toEqual(null);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should accept a valid linearization dictionary', function () {
|
it('should accept a valid linearization dictionary', function() {
|
||||||
var stream = new StringStream(
|
const stream = new StringStream(
|
||||||
'131 0 obj\n' +
|
'131 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -184,7 +233,7 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
var expectedLinearizationDict = {
|
const expectedLinearizationDict = {
|
||||||
length: 90,
|
length: 90,
|
||||||
hints: [1388, 863],
|
hints: [1388, 863],
|
||||||
objectNumberFirst: 133,
|
objectNumberFirst: 133,
|
||||||
@ -197,9 +246,9 @@ describe('parser', function() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should reject a linearization dictionary with invalid ' +
|
it('should reject a linearization dictionary with invalid ' +
|
||||||
'integer parameters', function () {
|
'integer parameters', function() {
|
||||||
// The /L parameter should be equal to the stream length.
|
// The /L parameter should be equal to the stream length.
|
||||||
var stream1 = new StringStream(
|
const stream1 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -212,13 +261,13 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream1);
|
return Linearization.create(stream1);
|
||||||
}).toThrow(new Error('The "L" parameter in the linearization ' +
|
}).toThrow(new Error('The "L" parameter in the linearization ' +
|
||||||
'dictionary does not equal the stream length.'));
|
'dictionary does not equal the stream length.'));
|
||||||
|
|
||||||
// The /E parameter should not be zero.
|
// The /E parameter should not be zero.
|
||||||
var stream2 = new StringStream(
|
const stream2 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -231,13 +280,13 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream2);
|
return Linearization.create(stream2);
|
||||||
}).toThrow(new Error('The "E" parameter in the linearization ' +
|
}).toThrow(new Error('The "E" parameter in the linearization ' +
|
||||||
'dictionary is invalid.'));
|
'dictionary is invalid.'));
|
||||||
|
|
||||||
// The /O parameter should be an integer.
|
// The /O parameter should be an integer.
|
||||||
var stream3 = new StringStream(
|
const stream3 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -250,16 +299,16 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream3);
|
return Linearization.create(stream3);
|
||||||
}).toThrow(new Error('The "O" parameter in the linearization ' +
|
}).toThrow(new Error('The "O" parameter in the linearization ' +
|
||||||
'dictionary is invalid.'));
|
'dictionary is invalid.'));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should reject a linearization dictionary with invalid hint parameters',
|
it('should reject a linearization dictionary with invalid hint parameters',
|
||||||
function () {
|
function() {
|
||||||
// The /H parameter should be an array.
|
// The /H parameter should be an array.
|
||||||
var stream1 = new StringStream(
|
const stream1 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -272,13 +321,13 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream1);
|
return Linearization.create(stream1);
|
||||||
}).toThrow(new Error('Hint array in the linearization dictionary ' +
|
}).toThrow(new Error('Hint array in the linearization dictionary ' +
|
||||||
'is invalid.'));
|
'is invalid.'));
|
||||||
|
|
||||||
// The hint array should contain two, or four, elements.
|
// The hint array should contain two, or four, elements.
|
||||||
var stream2 = new StringStream(
|
const stream2 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -291,13 +340,13 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream2);
|
return Linearization.create(stream2);
|
||||||
}).toThrow(new Error('Hint array in the linearization dictionary ' +
|
}).toThrow(new Error('Hint array in the linearization dictionary ' +
|
||||||
'is invalid.'));
|
'is invalid.'));
|
||||||
|
|
||||||
// The hint array should not contain zero.
|
// The hint array should not contain zero.
|
||||||
var stream3 = new StringStream(
|
const stream3 = new StringStream(
|
||||||
'1 0 obj\n' +
|
'1 0 obj\n' +
|
||||||
'<<\n' +
|
'<<\n' +
|
||||||
'/Linearized 1\n' +
|
'/Linearized 1\n' +
|
||||||
@ -310,7 +359,7 @@ describe('parser', function() {
|
|||||||
'>>\n' +
|
'>>\n' +
|
||||||
'endobj'
|
'endobj'
|
||||||
);
|
);
|
||||||
expect(function () {
|
expect(function() {
|
||||||
return Linearization.create(stream3);
|
return Linearization.create(stream3);
|
||||||
}).toThrow(new Error('Hint (2) in the linearization dictionary ' +
|
}).toThrow(new Error('Hint (2) in the linearization dictionary ' +
|
||||||
'is invalid.'));
|
'is invalid.'));
|
||||||
|
Loading…
Reference in New Issue
Block a user