pdf.js/test/unit/parser_spec.js
Tim van der Meij 4a4b197b9d
Write more unit tests for the lexer and the parser
Moreover, group the lexer unit tests per method. This matches what we
do for other classes and makes it more easily visible which methods
we don't or insufficiently unit test.

The parser itself is not unit tested yet, so this patch provides a start
for doing so. The `inlineStreamSkipEI` method is used in other end
marker detection methods, so it's important that its functionality is
correct for proper parsing.
2019-03-17 13:36:23 +01:00

369 lines
12 KiB
JavaScript

/* Copyright 2017 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* eslint no-var: error */
import { Lexer, Linearization, Parser } from '../../src/core/parser';
import { FormatError } from '../../src/shared/util';
import { Name } from '../../src/core/primitives';
import { StringStream } from '../../src/core/stream';
describe('parser', function() {
describe('Parser', function() {
describe('inlineStreamSkipEI', function() {
it('should skip over the EI marker if it is found', function() {
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
'/F /A85 ID abc123~> EI Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.indexOf('Q'));
expect(input.peekByte()).toEqual(0x51); // 'Q'
});
it('should skip to the end of stream if the EI marker is not found',
function() {
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
'/F /A85 ID abc123~> Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.length);
expect(input.peekByte()).toEqual(-1);
});
});
});
describe('Lexer', function() {
describe('nextChar', function() {
it('should return and set -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should return and set the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x32); // '2'
});
});
describe('peekChar', function() {
it('should only return -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should only return the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x31); // '1'
});
});
describe('getNumber', function() {
it('should stop parsing numbers at the end of stream', function() {
const input = new StringStream('11.234');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(11.234);
});
it('should parse PostScript numbers', function() {
const numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.',
'0.0', '123', '-98', '43445', '0', '+17'];
for (const number of numbers) {
const input = new StringStream(number);
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(parseFloat(number));
}
});
it('should ignore double negative before number', function() {
const input = new StringStream('--205.88');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(-205.88);
});
it('should ignore minus signs in the middle of number', function() {
const input = new StringStream('205--.88');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(205.88);
});
it('should ignore line-breaks between operator and digit in number',
function() {
const minusInput = new StringStream('-\r\n205.88');
const minusLexer = new Lexer(minusInput);
expect(minusLexer.getNumber()).toEqual(-205.88);
const plusInput = new StringStream('+\r\n205.88');
const plusLexer = new Lexer(plusInput);
expect(plusLexer.getNumber()).toEqual(205.88);
});
it('should treat a single decimal point as zero', function() {
const input = new StringStream('.');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(0);
const numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (const number of numbers) {
const input = new StringStream(number);
const lexer = new Lexer(input);
expect(function() {
return lexer.getNumber();
}).toThrowError(FormatError, /^Invalid number:\s/);
}
});
it('should handle glued numbers and operators', function() {
const input = new StringStream('123ET');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(123);
// The lexer must not have consumed the 'E'
expect(lexer.currentChar).toEqual(0x45); // 'E'
});
});
describe('getString', function() {
it('should stop parsing strings at the end of stream', function() {
const input = new StringStream('(1$4)');
input.getByte = function(super_getByte) {
// Simulating end of file using null (see issue 2766).
const ch = super_getByte.call(input);
return (ch === 0x24 /* '$' */ ? -1 : ch);
}.bind(input, input.getByte);
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual('1');
});
it('should ignore escaped CR and LF', function() {
// '(\101\<CR><LF>\102)' should be parsed as 'AB'.
const input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual('ABCD');
});
});
describe('getHexString', function() {
it('should not throw exception on bad input', function() {
// '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
const input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
const lexer = new Lexer(input);
expect(lexer.getHexString()).toEqual('p!U"$2');
});
});
describe('getName', function() {
it('should handle Names with invalid usage of NUMBER SIGN (#)',
function() {
const inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
const expectedNames = ['#', '#AQwerty', '#A'];
for (let i = 0, ii = inputNames.length; i < ii; i++) {
const input = new StringStream(inputNames[i]);
const lexer = new Lexer(input);
expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
}
});
});
});
describe('Linearization', function() {
it('should not find a linearization dictionary', function() {
// Not an actual linearization dictionary.
const stream1 = new StringStream(
'3 0 obj\n' +
'<<\n' +
'/Length 4622\n' +
'/Filter /FlateDecode\n' +
'>>\n' +
'endobj'
);
expect(Linearization.create(stream1)).toEqual(null);
// Linearization dictionary with invalid version number.
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 0\n' +
'>>\n' +
'endobj'
);
expect(Linearization.create(stream2)).toEqual(null);
});
it('should accept a valid linearization dictionary', function() {
const stream = new StringStream(
'131 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H [ 1388 863 ]\n' +
'/L 90\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
const expectedLinearizationDict = {
length: 90,
hints: [1388, 863],
objectNumberFirst: 133,
endFirst: 43573,
numPages: 18,
mainXRefEntriesOffset: 193883,
pageFirst: 0,
};
expect(Linearization.create(stream)).toEqual(expectedLinearizationDict);
});
it('should reject a linearization dictionary with invalid ' +
'integer parameters', function() {
// The /L parameter should be equal to the stream length.
const stream1 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H [ 1388 863 ]\n' +
'/L 196622\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream1);
}).toThrow(new Error('The "L" parameter in the linearization ' +
'dictionary does not equal the stream length.'));
// The /E parameter should not be zero.
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H [ 1388 863 ]\n' +
'/L 84\n' +
'/E 0\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream2);
}).toThrow(new Error('The "E" parameter in the linearization ' +
'dictionary is invalid.'));
// The /O parameter should be an integer.
const stream3 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O /abc\n' +
'/H [ 1388 863 ]\n' +
'/L 89\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream3);
}).toThrow(new Error('The "O" parameter in the linearization ' +
'dictionary is invalid.'));
});
it('should reject a linearization dictionary with invalid hint parameters',
function() {
// The /H parameter should be an array.
const stream1 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H 1388\n' +
'/L 80\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream1);
}).toThrow(new Error('Hint array in the linearization dictionary ' +
'is invalid.'));
// The hint array should contain two, or four, elements.
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H [ 1388 ]\n' +
'/L 84\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream2);
}).toThrow(new Error('Hint array in the linearization dictionary ' +
'is invalid.'));
// The hint array should not contain zero.
const stream3 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
'/O 133\n' +
'/H [ 1388 863 0 234]\n' +
'/L 93\n' +
'/E 43573\n' +
'/N 18\n' +
'/T 193883\n' +
'>>\n' +
'endobj'
);
expect(function() {
return Linearization.create(stream3);
}).toThrow(new Error('Hint (2) in the linearization dictionary ' +
'is invalid.'));
});
});
});