Merge pull request #10635 from timvandermeij/lexer-parser

Convert `src/core/parser.js` to ES6 syntax and write more unit tests for the lexer and the parser
This commit is contained in:
Tim van der Meij 2019-03-19 23:17:34 +01:00 committed by GitHub
commit 33bfbef6ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 1238 additions and 1167 deletions

View File

@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* eslint no-var: error */
import {
Ascii85Stream, AsciiHexStream, FlateStream, LZWStream, NullStream,
@ -34,7 +35,7 @@ const MAX_LENGTH_TO_CACHE = 1000;
const MAX_ADLER32_LENGTH = 5552;
function computeAdler32(bytes) {
let bytesLength = bytes.length;
const bytesLength = bytes.length;
if (typeof PDFJSDev === 'undefined' ||
PDFJSDev.test('!PRODUCTION || TESTING')) {
assert(bytesLength < MAX_ADLER32_LENGTH,
@ -49,22 +50,23 @@ function computeAdler32(bytes) {
return ((b % 65521) << 16) | (a % 65521);
}
var Parser = (function ParserClosure() {
function Parser(lexer, allowStreams, xref, recoveryMode) {
class Parser {
constructor(lexer, allowStreams, xref, recoveryMode = false) {
this.lexer = lexer;
this.allowStreams = allowStreams;
this.xref = xref;
this.recoveryMode = recoveryMode || false;
this.recoveryMode = recoveryMode;
this.imageCache = Object.create(null);
this.refill();
}
Parser.prototype = {
refill: function Parser_refill() {
refill() {
this.buf1 = this.lexer.getObj();
this.buf2 = this.lexer.getObj();
},
shift: function Parser_shift() {
}
shift() {
if (isCmd(this.buf2, 'ID')) {
this.buf1 = this.buf2;
this.buf2 = null;
@ -72,8 +74,9 @@ var Parser = (function ParserClosure() {
this.buf1 = this.buf2;
this.buf2 = this.lexer.getObj();
}
},
tryShift: function Parser_tryShift() {
}
tryShift() {
try {
this.shift();
return true;
@ -85,9 +88,10 @@ var Parser = (function ParserClosure() {
// state and call this.shift() twice to reset the buffers.
return false;
}
},
getObj: function Parser_getObj(cipherTransform) {
var buf1 = this.buf1;
}
getObj(cipherTransform) {
const buf1 = this.buf1;
this.shift();
if (buf1 instanceof Cmd) {
@ -95,7 +99,7 @@ var Parser = (function ParserClosure() {
case 'BI': // inline image
return this.makeInlineImage(cipherTransform);
case '[': // array
var array = [];
const array = [];
while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
array.push(this.getObj(cipherTransform));
}
@ -108,7 +112,7 @@ var Parser = (function ParserClosure() {
this.shift();
return array;
case '<<': // dictionary or stream
var dict = new Dict(this.xref);
const dict = new Dict(this.xref);
while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) {
info('Malformed dictionary: key must be a name object');
@ -116,7 +120,7 @@ var Parser = (function ParserClosure() {
continue;
}
var key = this.buf1.name;
const key = this.buf1.name;
this.shift();
if (isEOF(this.buf1)) {
break;
@ -144,9 +148,9 @@ var Parser = (function ParserClosure() {
}
if (Number.isInteger(buf1)) { // indirect reference or integer
var num = buf1;
const num = buf1;
if (Number.isInteger(this.buf1) && isCmd(this.buf2, 'R')) {
var ref = new Ref(num, this.buf1);
const ref = new Ref(num, this.buf1);
this.shift();
this.shift();
return ref;
@ -155,7 +159,7 @@ var Parser = (function ParserClosure() {
}
if (isString(buf1)) { // string
var str = buf1;
let str = buf1;
if (cipherTransform) {
str = cipherTransform.decryptString(str);
}
@ -164,7 +168,8 @@ var Parser = (function ParserClosure() {
// simple object
return buf1;
},
}
/**
* Find the end of the stream by searching for the /EI\s/.
* @returns {number} The inline stream length.
@ -183,7 +188,7 @@ var Parser = (function ParserClosure() {
if (ch === SPACE || ch === LF || ch === CR) {
maybeEIPos = stream.pos;
// Let's check that the next `n` bytes are ASCII... just to be sure.
let followingBytes = stream.peekBytes(n);
const followingBytes = stream.peekBytes(n);
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
ch = followingBytes[i];
if (ch === NUL && followingBytes[i + 1] !== NUL) {
@ -235,14 +240,14 @@ var Parser = (function ParserClosure() {
endOffset--;
}
return ((stream.pos - endOffset) - startPos);
},
}
/**
* Find the EOI (end-of-image) marker 0xFFD9 of the stream.
* @returns {number} The inline stream length.
*/
findDCTDecodeInlineStreamEnd:
function Parser_findDCTDecodeInlineStreamEnd(stream) {
var startPos = stream.pos, foundEOI = false, b, markerLength, length;
findDCTDecodeInlineStreamEnd(stream) {
let startPos = stream.pos, foundEOI = false, b, markerLength, length;
while ((b = stream.getByte()) !== -1) {
if (b !== 0xFF) { // Not a valid marker.
continue;
@ -331,14 +336,15 @@ var Parser = (function ParserClosure() {
}
this.inlineStreamSkipEI(stream);
return length;
},
}
/**
* Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
* @returns {number} The inline stream length.
*/
findASCII85DecodeInlineStreamEnd(stream) {
var TILDE = 0x7E, GT = 0x3E;
var startPos = stream.pos, ch, length;
const TILDE = 0x7E, GT = 0x3E;
let startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) {
if (ch === TILDE) {
ch = stream.peekByte();
@ -363,15 +369,15 @@ var Parser = (function ParserClosure() {
}
this.inlineStreamSkipEI(stream);
return length;
},
}
/**
* Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
* @returns {number} The inline stream length.
*/
findASCIIHexDecodeInlineStreamEnd:
function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
var GT = 0x3E;
var startPos = stream.pos, ch, length;
findASCIIHexDecodeInlineStreamEnd(stream) {
const GT = 0x3E;
let startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) {
if (ch === GT) {
break;
@ -386,13 +392,14 @@ var Parser = (function ParserClosure() {
}
this.inlineStreamSkipEI(stream);
return length;
},
}
/**
* Skip over the /EI/ for streams where we search for an EOD marker.
*/
inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
var E = 0x45, I = 0x49;
var state = 0, ch;
inlineStreamSkipEI(stream) {
const E = 0x45, I = 0x49;
let state = 0, ch;
while ((ch = stream.getByte()) !== -1) {
if (state === 0) {
state = (ch === E) ? 1 : 0;
@ -402,18 +409,20 @@ var Parser = (function ParserClosure() {
break;
}
}
},
makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
var lexer = this.lexer;
var stream = lexer.stream;
}
makeInlineImage(cipherTransform) {
const lexer = this.lexer;
const stream = lexer.stream;
// Parse dictionary.
let dict = new Dict(this.xref), dictLength;
const dict = new Dict(this.xref);
let dictLength;
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) {
throw new FormatError('Dictionary key must be a name object');
}
var key = this.buf1.name;
const key = this.buf1.name;
this.shift();
if (isEOF(this.buf1)) {
break;
@ -425,18 +434,20 @@ var Parser = (function ParserClosure() {
}
// Extract the name of the first (i.e. the current) image filter.
var filter = dict.get('Filter', 'F'), filterName;
const filter = dict.get('Filter', 'F');
let filterName;
if (isName(filter)) {
filterName = filter.name;
} else if (Array.isArray(filter)) {
var filterZero = this.xref.fetchIfRef(filter[0]);
const filterZero = this.xref.fetchIfRef(filter[0]);
if (isName(filterZero)) {
filterName = filterZero.name;
}
}
// Parse image stream.
let startPos = stream.pos, length;
const startPos = stream.pos;
let length;
if (filterName === 'DCTDecode' || filterName === 'DCT') {
length = this.findDCTDecodeInlineStreamEnd(stream);
} else if (filterName === 'ASCII85Decode' || filterName === 'A85') {
@ -446,26 +457,26 @@ var Parser = (function ParserClosure() {
} else {
length = this.findDefaultInlineStreamEnd(stream);
}
var imageStream = stream.makeSubStream(startPos, length, dict);
let imageStream = stream.makeSubStream(startPos, length, dict);
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum.
let cacheKey;
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
var imageBytes = imageStream.getBytes();
const imageBytes = imageStream.getBytes();
imageStream.reset();
const initialStreamPos = stream.pos;
// Set the stream position to the beginning of the dictionary data...
stream.pos = lexer.beginInlineImagePos;
// ... and fetch the bytes of the *entire* dictionary.
let dictBytes = stream.getBytes(dictLength);
const dictBytes = stream.getBytes(dictLength);
// Finally, don't forget to reset the stream position.
stream.pos = initialStreamPos;
cacheKey = computeAdler32(imageBytes) + '_' + computeAdler32(dictBytes);
let cacheEntry = this.imageCache[cacheKey];
const cacheEntry = this.imageCache[cacheKey];
if (cacheEntry !== undefined) {
this.buf2 = Cmd.get('EI');
this.shift();
@ -482,7 +493,7 @@ var Parser = (function ParserClosure() {
imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict;
if (cacheKey !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + cacheKey;
imageStream.cacheKey = `inline_${length}_${cacheKey}`;
this.imageCache[cacheKey] = imageStream;
}
@ -490,7 +501,7 @@ var Parser = (function ParserClosure() {
this.shift();
return imageStream;
},
}
_findStreamLength(startPos, signature) {
const { stream, } = this.lexer;
@ -521,28 +532,28 @@ var Parser = (function ParserClosure() {
stream.pos += scanLength;
}
return -1;
},
}
makeStream: function Parser_makeStream(dict, cipherTransform) {
var lexer = this.lexer;
var stream = lexer.stream;
makeStream(dict, cipherTransform) {
const lexer = this.lexer;
let stream = lexer.stream;
// get stream start position
// Get the stream's start position.
lexer.skipToNextLine();
const startPos = stream.pos - 1;
// get length
var length = dict.get('Length');
// Get the length.
let length = dict.get('Length');
if (!Number.isInteger(length)) {
info('Bad ' + length + ' attribute in stream');
info(`Bad length "${length}" in stream`);
length = 0;
}
// skip over the stream data
// Skip over the stream data.
stream.pos = startPos + length;
lexer.nextChar();
// Shift '>>' and check whether the new object marks the end of the stream
// Shift '>>' and check whether the new object marks the end of the stream.
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream'
} else {
@ -561,7 +572,7 @@ var Parser = (function ParserClosure() {
const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
let maybeLength = this._findStreamLength(startPos,
const maybeLength = this._findStreamLength(startPos,
TRUNCATED_SIGNATURE);
if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated
@ -596,10 +607,12 @@ var Parser = (function ParserClosure() {
stream = this.filter(stream, dict, length);
stream.dict = dict;
return stream;
},
filter: function Parser_filter(stream, dict, length) {
var filter = dict.get('Filter', 'F');
var params = dict.get('DecodeParms', 'DP');
}
filter(stream, dict, length) {
let filter = dict.get('Filter', 'F');
let params = dict.get('DecodeParms', 'DP');
if (isName(filter)) {
if (Array.isArray(params)) {
warn('/DecodeParms should not contain an Array, ' +
@ -608,14 +621,14 @@ var Parser = (function ParserClosure() {
return this.makeFilter(stream, filter.name, length, params);
}
var maybeLength = length;
let maybeLength = length;
if (Array.isArray(filter)) {
var filterArray = filter;
var paramsArray = params;
for (var i = 0, ii = filterArray.length; i < ii; ++i) {
let filterArray = filter;
let paramsArray = params;
for (let i = 0, ii = filterArray.length; i < ii; ++i) {
filter = this.xref.fetchIfRef(filterArray[i]);
if (!isName(filter)) {
throw new FormatError('Bad filter name: ' + filter);
throw new FormatError(`Bad filter name "${filter}"`);
}
params = null;
@ -623,22 +636,24 @@ var Parser = (function ParserClosure() {
params = this.xref.fetchIfRef(paramsArray[i]);
}
stream = this.makeFilter(stream, filter.name, maybeLength, params);
// after the first stream the length variable is invalid
// After the first stream the `length` variable is invalid.
maybeLength = null;
}
}
return stream;
},
makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
}
makeFilter(stream, name, maybeLength, params) {
// Since the 'Length' entry in the stream dictionary can be completely
// wrong, e.g. zero for non-empty streams, only skip parsing the stream
// when we can be absolutely certain that it actually is empty.
if (maybeLength === 0) {
warn('Empty "' + name + '" stream.');
warn(`Empty "${name}" stream.`);
return new NullStream();
}
try {
var xrefStreamStats = this.xref.stats.streamTypes;
const xrefStreamStats = this.xref.stats.streamTypes;
if (name === 'FlateDecode' || name === 'Fl') {
xrefStreamStats[StreamType.FLATE] = true;
if (params) {
@ -649,7 +664,7 @@ var Parser = (function ParserClosure() {
}
if (name === 'LZWDecode' || name === 'LZW') {
xrefStreamStats[StreamType.LZW] = true;
var earlyChange = 1;
let earlyChange = 1;
if (params) {
if (params.has('EarlyChange')) {
earlyChange = params.get('EarlyChange');
@ -688,48 +703,21 @@ var Parser = (function ParserClosure() {
xrefStreamStats[StreamType.JBIG] = true;
return new Jbig2Stream(stream, maybeLength, stream.dict, params);
}
warn('filter "' + name + '" not supported yet');
warn(`Filter "${name}" is not supported.`);
return stream;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Invalid stream: \"' + ex + '\"');
warn(`Invalid stream: "${ex}"`);
return new NullStream();
}
},
};
return Parser;
})();
var Lexer = (function LexerClosure() {
function Lexer(stream, knownCommands) {
this.stream = stream;
this.nextChar();
// While lexing, we build up many strings one char at a time. Using += for
// this can result in lots of garbage strings. It's better to build an
// array of single-char strings and then join() them together at the end.
// And reusing a single array (i.e. |this.strBuf|) over and over for this
// purpose uses less memory than using a new array for each string.
this.strBuf = [];
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
this.beginInlineImagePos = -1;
}
}
// A '1' in this array means the character is white space. A '1' or
// '2' means the character ends a name or command.
var specialChars = [
const specialChars = [
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
@ -759,18 +747,43 @@ var Lexer = (function LexerClosure() {
return -1;
}
Lexer.prototype = {
nextChar: function Lexer_nextChar() {
class Lexer {
constructor(stream, knownCommands) {
this.stream = stream;
this.nextChar();
// While lexing, we build up many strings one char at a time. Using += for
// this can result in lots of garbage strings. It's better to build an
// array of single-char strings and then join() them together at the end.
// And reusing a single array (i.e. |this.strBuf|) over and over for this
// purpose uses less memory than using a new array for each string.
this.strBuf = [];
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
this.beginInlineImagePos = -1;
}
nextChar() {
return (this.currentChar = this.stream.getByte());
},
peekChar: function Lexer_peekChar() {
}
peekChar() {
return this.stream.peekByte();
},
getNumber: function Lexer_getNumber() {
var ch = this.currentChar;
var eNotation = false;
var divideBy = 0; // different from 0 if it's a floating point value
var sign = 0;
}
getNumber() {
let ch = this.currentChar;
let eNotation = false;
let divideBy = 0; // Different from 0 if it's a floating point value.
let sign = 0;
if (ch === 0x2D) { // '-'
sign = -1;
@ -806,17 +819,17 @@ var Lexer = (function LexerClosure() {
}
sign = sign || 1;
var baseValue = ch - 0x30; // '0'
var powerValue = 0;
var powerValueSign = 1;
let baseValue = ch - 0x30; // '0'
let powerValue = 0;
let powerValueSign = 1;
while ((ch = this.nextChar()) >= 0) {
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
var currentDigit = ch - 0x30; // '0'
if (eNotation) { // We are after an 'e' or 'E'
const currentDigit = ch - 0x30; // '0'
if (eNotation) { // We are after an 'e' or 'E'.
powerValue = powerValue * 10 + currentDigit;
} else {
if (divideBy !== 0) { // We are after a point
if (divideBy !== 0) { // We are after a point.
divideBy *= 10;
}
baseValue = baseValue * 10 + currentDigit;
@ -825,27 +838,27 @@ var Lexer = (function LexerClosure() {
if (divideBy === 0) {
divideBy = 1;
} else {
// A number can have only one '.'
// A number can have only one dot.
break;
}
} else if (ch === 0x2D) { // '-'
// ignore minus signs in the middle of numbers to match
// Adobe's behavior
warn('Badly formatted number');
// Ignore minus signs in the middle of numbers to match
// Adobe's behavior.
warn('Badly formatted number: minus sign in the middle');
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
// 'E' can be either a scientific notation or the beginning of a new
// operator
// operator.
ch = this.peekChar();
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
powerValueSign = (ch === 0x2D) ? -1 : 1;
this.nextChar(); // Consume the sign character
this.nextChar(); // Consume the sign character.
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
// The 'E' must be the beginning of a new operator
// The 'E' must be the beginning of a new operator.
break;
}
eNotation = true;
} else {
// the last character doesn't belong to us
// The last character doesn't belong to us.
break;
}
}
@ -857,16 +870,17 @@ var Lexer = (function LexerClosure() {
baseValue *= Math.pow(10, powerValueSign * powerValue);
}
return sign * baseValue;
},
getString: function Lexer_getString() {
var numParen = 1;
var done = false;
var strBuf = this.strBuf;
}
getString() {
let numParen = 1;
let done = false;
const strBuf = this.strBuf;
strBuf.length = 0;
var ch = this.nextChar();
let ch = this.nextChar();
while (true) {
var charBuffered = false;
let charBuffered = false;
switch (ch | 0) {
case -1:
warn('Unterminated string');
@ -913,7 +927,7 @@ var Lexer = (function LexerClosure() {
break;
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
var x = ch & 0x0F;
let x = ch & 0x0F;
ch = this.nextChar();
charBuffered = true;
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
@ -950,11 +964,13 @@ var Lexer = (function LexerClosure() {
}
}
return strBuf.join('');
},
getName: function Lexer_getName() {
var ch, previousCh;
var strBuf = this.strBuf;
}
getName() {
let ch, previousCh;
const strBuf = this.strBuf;
strBuf.length = 0;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
if (ch === 0x23) { // '#'
ch = this.nextChar();
@ -964,14 +980,14 @@ var Lexer = (function LexerClosure() {
strBuf.push('#');
break;
}
var x = toHexDigit(ch);
const x = toHexDigit(ch);
if (x !== -1) {
previousCh = ch;
ch = this.nextChar();
var x2 = toHexDigit(ch);
const x2 = toHexDigit(ch);
if (x2 === -1) {
warn('Lexer_getName: Illegal digit (' +
String.fromCharCode(ch) + ') in hexadecimal number.');
warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
'in hexadecimal number.');
strBuf.push('#', String.fromCharCode(previousCh));
if (specialChars[ch]) {
break;
@ -988,17 +1004,18 @@ var Lexer = (function LexerClosure() {
}
}
if (strBuf.length > 127) {
warn('name token is longer than allowed by the spec: ' + strBuf.length);
warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
}
return Name.get(strBuf.join(''));
},
getHexString: function Lexer_getHexString() {
var strBuf = this.strBuf;
}
getHexString() {
const strBuf = this.strBuf;
strBuf.length = 0;
var ch = this.currentChar;
var isFirstHex = true;
var firstDigit;
var secondDigit;
let ch = this.currentChar;
let isFirstHex = true;
let firstDigit, secondDigit;
while (true) {
if (ch < 0) {
warn('Unterminated hex string');
@ -1013,14 +1030,14 @@ var Lexer = (function LexerClosure() {
if (isFirstHex) {
firstDigit = toHexDigit(ch);
if (firstDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string');
warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar();
continue;
}
} else {
secondDigit = toHexDigit(ch);
if (secondDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string');
warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar();
continue;
}
@ -1031,11 +1048,12 @@ var Lexer = (function LexerClosure() {
}
}
return strBuf.join('');
},
getObj: function Lexer_getObj() {
// skip whitespace and comments
var comment = false;
var ch = this.currentChar;
}
getObj() {
// Skip whitespace and comments.
let comment = false;
let ch = this.currentChar;
while (true) {
if (ch < 0) {
return EOF;
@ -1052,7 +1070,7 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar();
}
// start reading token
// Start reading a token.
switch (ch | 0) {
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
@ -1101,14 +1119,14 @@ var Lexer = (function LexerClosure() {
throw new FormatError(`Illegal character: ${ch}`);
}
// command
var str = String.fromCharCode(ch);
var knownCommands = this.knownCommands;
var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
// Start reading a command.
let str = String.fromCharCode(ch);
const knownCommands = this.knownCommands;
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
// stop if known command is found and next character does not make
// the str a command
var possibleCommand = str + String.fromCharCode(ch);
// Stop if a known command is found and next character does not make
// the string a command.
const possibleCommand = str + String.fromCharCode(ch);
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
break;
}
@ -1135,9 +1153,10 @@ var Lexer = (function LexerClosure() {
}
return Cmd.get(str);
},
skipToNextLine: function Lexer_skipToNextLine() {
var ch = this.currentChar;
}
skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) {
if (ch === 0x0D) { // CR
ch = this.nextChar();
@ -1151,61 +1170,64 @@ var Lexer = (function LexerClosure() {
}
ch = this.nextChar();
}
},
};
}
}
return Lexer;
})();
var Linearization = {
create: function LinearizationCreate(stream) {
function getInt(name, allowZeroValue) {
var obj = linDict.get(name);
class Linearization {
static create(stream) {
function getInt(linDict, name, allowZeroValue = false) {
const obj = linDict.get(name);
if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
return obj;
}
throw new Error('The "' + name + '" parameter in the linearization ' +
throw new Error(`The "${name}" parameter in the linearization ` +
'dictionary is invalid.');
}
function getHints() {
var hints = linDict.get('H'), hintsLength, item;
function getHints(linDict) {
const hints = linDict.get('H');
let hintsLength;
if (Array.isArray(hints) &&
((hintsLength = hints.length) === 2 || hintsLength === 4)) {
for (var index = 0; index < hintsLength; index++) {
if (!(Number.isInteger(item = hints[index]) && item > 0)) {
throw new Error('Hint (' + index +
') in the linearization dictionary is invalid.');
for (let index = 0; index < hintsLength; index++) {
const hint = hints[index];
if (!(Number.isInteger(hint) && hint > 0)) {
throw new Error(`Hint (${index}) in the linearization dictionary ` +
'is invalid.');
}
}
return hints;
}
throw new Error('Hint array in the linearization dictionary is invalid.');
}
var parser = new Parser(new Lexer(stream), false, null);
var obj1 = parser.getObj();
var obj2 = parser.getObj();
var obj3 = parser.getObj();
var linDict = parser.getObj();
var obj, length;
const parser = new Parser(new Lexer(stream), false, null);
const obj1 = parser.getObj();
const obj2 = parser.getObj();
const obj3 = parser.getObj();
const linDict = parser.getObj();
let obj, length;
if (!(Number.isInteger(obj1) && Number.isInteger(obj2) &&
isCmd(obj3, 'obj') && isDict(linDict) &&
isNum(obj = linDict.get('Linearized')) && obj > 0)) {
return null; // No valid linearization dictionary found.
} else if ((length = getInt('L')) !== stream.length) {
} else if ((length = getInt(linDict, 'L')) !== stream.length) {
throw new Error('The "L" parameter in the linearization dictionary ' +
'does not equal the stream length.');
}
return {
length,
hints: getHints(),
objectNumberFirst: getInt('O'),
endFirst: getInt('E'),
numPages: getInt('N'),
mainXRefEntriesOffset: getInt('T'),
pageFirst: (linDict.has('P') ? getInt('P', true) : 0),
};
},
hints: getHints(linDict),
objectNumberFirst: getInt(linDict, 'O'),
endFirst: getInt(linDict, 'E'),
numPages: getInt(linDict, 'N'),
mainXRefEntriesOffset: getInt(linDict, 'T'),
pageFirst: (linDict.has('P') ?
getInt(linDict, 'P', /* allowZeroValue = */ true) : 0),
};
}
}
export {
Lexer,

View File

@ -12,74 +12,129 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* eslint no-var: error */
import { Lexer, Linearization } from '../../src/core/parser';
import { Lexer, Linearization, Parser } from '../../src/core/parser';
import { FormatError } from '../../src/shared/util';
import { Name } from '../../src/core/primitives';
import { StringStream } from '../../src/core/stream';
describe('parser', function() {
describe('Lexer', function() {
it('should stop parsing numbers at the end of stream', function() {
var input = new StringStream('11.234');
var lexer = new Lexer(input);
var result = lexer.getNumber();
describe('Parser', function() {
describe('inlineStreamSkipEI', function() {
it('should skip over the EI marker if it is found', function() {
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
'/F /A85 ID abc123~> EI Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.indexOf('Q'));
expect(input.peekByte()).toEqual(0x51); // 'Q'
});
expect(result).toEqual(11.234);
it('should skip to the end of stream if the EI marker is not found',
function() {
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
'/F /A85 ID abc123~> Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.length);
expect(input.peekByte()).toEqual(-1);
});
});
});
describe('Lexer', function() {
describe('nextChar', function() {
it('should return and set -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should return and set the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x32); // '2'
});
});
describe('peekChar', function() {
it('should only return -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should only return the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x31); // '1'
});
});
describe('getNumber', function() {
it('should stop parsing numbers at the end of stream', function() {
const input = new StringStream('11.234');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(11.234);
});
it('should parse PostScript numbers', function() {
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
'123', '-98', '43445', '0', '+17'];
for (var i = 0, ii = numbers.length; i < ii; i++) {
var num = numbers[i];
var input = new StringStream(num);
var lexer = new Lexer(input);
var result = lexer.getNumber();
expect(result).toEqual(parseFloat(num));
const numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.',
'0.0', '123', '-98', '43445', '0', '+17'];
for (const number of numbers) {
const input = new StringStream(number);
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(parseFloat(number));
}
});
it('should ignore double negative before number', function() {
var input = new StringStream('--205.88');
var lexer = new Lexer(input);
var result = lexer.getNumber();
expect(result).toEqual(-205.88);
const input = new StringStream('--205.88');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(-205.88);
});
it('should ignore minus signs in the middle of number', function() {
var input = new StringStream('205--.88');
var lexer = new Lexer(input);
var result = lexer.getNumber();
expect(result).toEqual(205.88);
const input = new StringStream('205--.88');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(205.88);
});
it('should ignore line-breaks between operator and digit in number',
function() {
let minusInput = new StringStream('-\r\n205.88');
let minusLexer = new Lexer(minusInput);
const minusInput = new StringStream('-\r\n205.88');
const minusLexer = new Lexer(minusInput);
expect(minusLexer.getNumber()).toEqual(-205.88);
let plusInput = new StringStream('+\r\n205.88');
let plusLexer = new Lexer(plusInput);
const plusInput = new StringStream('+\r\n205.88');
const plusLexer = new Lexer(plusInput);
expect(plusLexer.getNumber()).toEqual(205.88);
});
it('should treat a single decimal point as zero', function() {
let input = new StringStream('.');
let lexer = new Lexer(input);
const input = new StringStream('.');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(0);
let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (let number of numbers) {
let input = new StringStream(number);
let lexer = new Lexer(input);
const numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (const number of numbers) {
const input = new StringStream(number);
const lexer = new Lexer(input);
expect(function() {
return lexer.getNumber();
@ -88,68 +143,62 @@ describe('parser', function() {
});
it('should handle glued numbers and operators', function() {
var input = new StringStream('123ET');
var lexer = new Lexer(input);
var value = lexer.getNumber();
expect(value).toEqual(123);
const input = new StringStream('123ET');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(123);
// The lexer must not have consumed the 'E'
expect(lexer.currentChar).toEqual(0x45); // 'E'
});
it('should stop parsing strings at the end of stream', function() {
var input = new StringStream('(1$4)');
input.getByte = function(super_getByte) {
// simulating end of file using null (see issue 2766)
var ch = super_getByte.call(input);
return (ch === 0x24 /* '$' */ ? -1 : ch);
}.bind(input, input.getByte);
var lexer = new Lexer(input);
var result = lexer.getString();
expect(result).toEqual('1');
});
it('should not throw exception on bad input', function() {
// '8 0 2 15 5 2 2 2 4 3 2 4'
// should be parsed as
// '80 21 55 22 24 32'
var input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
var lexer = new Lexer(input);
var result = lexer.getHexString();
expect(result).toEqual('p!U"$2');
describe('getString', function() {
it('should stop parsing strings at the end of stream', function() {
const input = new StringStream('(1$4)');
input.getByte = function(super_getByte) {
// Simulating end of file using null (see issue 2766).
const ch = super_getByte.call(input);
return (ch === 0x24 /* '$' */ ? -1 : ch);
}.bind(input, input.getByte);
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual('1');
});
it('should ignore escaped CR and LF', function() {
// '(\101\<CR><LF>\102)'
// should be parsed as
// "AB"
var input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
var lexer = new Lexer(input);
var result = lexer.getString();
expect(result).toEqual('ABCD');
// '(\101\<CR><LF>\102)' should be parsed as 'AB'.
const input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual('ABCD');
});
});
it('should handle Names with invalid usage of NUMBER SIGN (#)', function() {
var inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
var expectedNames = ['#', '#AQwerty', '#A'];
describe('getHexString', function() {
it('should not throw exception on bad input', function() {
// '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
const input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
const lexer = new Lexer(input);
expect(lexer.getHexString()).toEqual('p!U"$2');
});
});
for (var i = 0, ii = inputNames.length; i < ii; i++) {
var input = new StringStream(inputNames[i]);
var lexer = new Lexer(input);
var result = lexer.getName();
describe('getName', function() {
it('should handle Names with invalid usage of NUMBER SIGN (#)',
function() {
const inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
const expectedNames = ['#', '#AQwerty', '#A'];
expect(result).toEqual(Name.get(expectedNames[i]));
for (let i = 0, ii = inputNames.length; i < ii; i++) {
const input = new StringStream(inputNames[i]);
const lexer = new Lexer(input);
expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
}
});
});
});
describe('Linearization', function() {
it('should not find a linearization dictionary', function() {
// Not an actual linearization dictionary.
var stream1 = new StringStream(
const stream1 = new StringStream(
'3 0 obj\n' +
'<<\n' +
'/Length 4622\n' +
@ -160,7 +209,7 @@ describe('parser', function() {
expect(Linearization.create(stream1)).toEqual(null);
// Linearization dictionary with invalid version number.
var stream2 = new StringStream(
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 0\n' +
@ -171,7 +220,7 @@ describe('parser', function() {
});
it('should accept a valid linearization dictionary', function() {
var stream = new StringStream(
const stream = new StringStream(
'131 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -184,7 +233,7 @@ describe('parser', function() {
'>>\n' +
'endobj'
);
var expectedLinearizationDict = {
const expectedLinearizationDict = {
length: 90,
hints: [1388, 863],
objectNumberFirst: 133,
@ -199,7 +248,7 @@ describe('parser', function() {
it('should reject a linearization dictionary with invalid ' +
'integer parameters', function() {
// The /L parameter should be equal to the stream length.
var stream1 = new StringStream(
const stream1 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -218,7 +267,7 @@ describe('parser', function() {
'dictionary does not equal the stream length.'));
// The /E parameter should not be zero.
var stream2 = new StringStream(
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -237,7 +286,7 @@ describe('parser', function() {
'dictionary is invalid.'));
// The /O parameter should be an integer.
var stream3 = new StringStream(
const stream3 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -259,7 +308,7 @@ describe('parser', function() {
it('should reject a linearization dictionary with invalid hint parameters',
function() {
// The /H parameter should be an array.
var stream1 = new StringStream(
const stream1 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -278,7 +327,7 @@ describe('parser', function() {
'is invalid.'));
// The hint array should contain two, or four, elements.
var stream2 = new StringStream(
const stream2 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +
@ -297,7 +346,7 @@ describe('parser', function() {
'is invalid.'));
// The hint array should not contain zero.
var stream3 = new StringStream(
const stream3 = new StringStream(
'1 0 obj\n' +
'<<\n' +
'/Linearized 1\n' +