Merge pull request #10635 from timvandermeij/lexer-parser

Convert `src/core/parser.js` to ES6 syntax and write more unit tests for the lexer and the parser
This commit is contained in:
Tim van der Meij 2019-03-19 23:17:34 +01:00 committed by GitHub
commit 33bfbef6ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 1238 additions and 1167 deletions

View File

@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* eslint no-var: error */
import { import {
Ascii85Stream, AsciiHexStream, FlateStream, LZWStream, NullStream, Ascii85Stream, AsciiHexStream, FlateStream, LZWStream, NullStream,
@ -34,7 +35,7 @@ const MAX_LENGTH_TO_CACHE = 1000;
const MAX_ADLER32_LENGTH = 5552; const MAX_ADLER32_LENGTH = 5552;
function computeAdler32(bytes) { function computeAdler32(bytes) {
let bytesLength = bytes.length; const bytesLength = bytes.length;
if (typeof PDFJSDev === 'undefined' || if (typeof PDFJSDev === 'undefined' ||
PDFJSDev.test('!PRODUCTION || TESTING')) { PDFJSDev.test('!PRODUCTION || TESTING')) {
assert(bytesLength < MAX_ADLER32_LENGTH, assert(bytesLength < MAX_ADLER32_LENGTH,
@ -49,22 +50,23 @@ function computeAdler32(bytes) {
return ((b % 65521) << 16) | (a % 65521); return ((b % 65521) << 16) | (a % 65521);
} }
var Parser = (function ParserClosure() { class Parser {
function Parser(lexer, allowStreams, xref, recoveryMode) { constructor(lexer, allowStreams, xref, recoveryMode = false) {
this.lexer = lexer; this.lexer = lexer;
this.allowStreams = allowStreams; this.allowStreams = allowStreams;
this.xref = xref; this.xref = xref;
this.recoveryMode = recoveryMode || false; this.recoveryMode = recoveryMode;
this.imageCache = Object.create(null); this.imageCache = Object.create(null);
this.refill(); this.refill();
} }
Parser.prototype = { refill() {
refill: function Parser_refill() {
this.buf1 = this.lexer.getObj(); this.buf1 = this.lexer.getObj();
this.buf2 = this.lexer.getObj(); this.buf2 = this.lexer.getObj();
}, }
shift: function Parser_shift() {
shift() {
if (isCmd(this.buf2, 'ID')) { if (isCmd(this.buf2, 'ID')) {
this.buf1 = this.buf2; this.buf1 = this.buf2;
this.buf2 = null; this.buf2 = null;
@ -72,8 +74,9 @@ var Parser = (function ParserClosure() {
this.buf1 = this.buf2; this.buf1 = this.buf2;
this.buf2 = this.lexer.getObj(); this.buf2 = this.lexer.getObj();
} }
}, }
tryShift: function Parser_tryShift() {
tryShift() {
try { try {
this.shift(); this.shift();
return true; return true;
@ -85,9 +88,10 @@ var Parser = (function ParserClosure() {
// state and call this.shift() twice to reset the buffers. // state and call this.shift() twice to reset the buffers.
return false; return false;
} }
}, }
getObj: function Parser_getObj(cipherTransform) {
var buf1 = this.buf1; getObj(cipherTransform) {
const buf1 = this.buf1;
this.shift(); this.shift();
if (buf1 instanceof Cmd) { if (buf1 instanceof Cmd) {
@ -95,7 +99,7 @@ var Parser = (function ParserClosure() {
case 'BI': // inline image case 'BI': // inline image
return this.makeInlineImage(cipherTransform); return this.makeInlineImage(cipherTransform);
case '[': // array case '[': // array
var array = []; const array = [];
while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) { while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
array.push(this.getObj(cipherTransform)); array.push(this.getObj(cipherTransform));
} }
@ -108,7 +112,7 @@ var Parser = (function ParserClosure() {
this.shift(); this.shift();
return array; return array;
case '<<': // dictionary or stream case '<<': // dictionary or stream
var dict = new Dict(this.xref); const dict = new Dict(this.xref);
while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) { while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) { if (!isName(this.buf1)) {
info('Malformed dictionary: key must be a name object'); info('Malformed dictionary: key must be a name object');
@ -116,7 +120,7 @@ var Parser = (function ParserClosure() {
continue; continue;
} }
var key = this.buf1.name; const key = this.buf1.name;
this.shift(); this.shift();
if (isEOF(this.buf1)) { if (isEOF(this.buf1)) {
break; break;
@ -144,9 +148,9 @@ var Parser = (function ParserClosure() {
} }
if (Number.isInteger(buf1)) { // indirect reference or integer if (Number.isInteger(buf1)) { // indirect reference or integer
var num = buf1; const num = buf1;
if (Number.isInteger(this.buf1) && isCmd(this.buf2, 'R')) { if (Number.isInteger(this.buf1) && isCmd(this.buf2, 'R')) {
var ref = new Ref(num, this.buf1); const ref = new Ref(num, this.buf1);
this.shift(); this.shift();
this.shift(); this.shift();
return ref; return ref;
@ -155,7 +159,7 @@ var Parser = (function ParserClosure() {
} }
if (isString(buf1)) { // string if (isString(buf1)) { // string
var str = buf1; let str = buf1;
if (cipherTransform) { if (cipherTransform) {
str = cipherTransform.decryptString(str); str = cipherTransform.decryptString(str);
} }
@ -164,7 +168,8 @@ var Parser = (function ParserClosure() {
// simple object // simple object
return buf1; return buf1;
}, }
/** /**
* Find the end of the stream by searching for the /EI\s/. * Find the end of the stream by searching for the /EI\s/.
* @returns {number} The inline stream length. * @returns {number} The inline stream length.
@ -183,7 +188,7 @@ var Parser = (function ParserClosure() {
if (ch === SPACE || ch === LF || ch === CR) { if (ch === SPACE || ch === LF || ch === CR) {
maybeEIPos = stream.pos; maybeEIPos = stream.pos;
// Let's check that the next `n` bytes are ASCII... just to be sure. // Let's check that the next `n` bytes are ASCII... just to be sure.
let followingBytes = stream.peekBytes(n); const followingBytes = stream.peekBytes(n);
for (let i = 0, ii = followingBytes.length; i < ii; i++) { for (let i = 0, ii = followingBytes.length; i < ii; i++) {
ch = followingBytes[i]; ch = followingBytes[i];
if (ch === NUL && followingBytes[i + 1] !== NUL) { if (ch === NUL && followingBytes[i + 1] !== NUL) {
@ -235,14 +240,14 @@ var Parser = (function ParserClosure() {
endOffset--; endOffset--;
} }
return ((stream.pos - endOffset) - startPos); return ((stream.pos - endOffset) - startPos);
}, }
/** /**
* Find the EOI (end-of-image) marker 0xFFD9 of the stream. * Find the EOI (end-of-image) marker 0xFFD9 of the stream.
* @returns {number} The inline stream length. * @returns {number} The inline stream length.
*/ */
findDCTDecodeInlineStreamEnd: findDCTDecodeInlineStreamEnd(stream) {
function Parser_findDCTDecodeInlineStreamEnd(stream) { let startPos = stream.pos, foundEOI = false, b, markerLength, length;
var startPos = stream.pos, foundEOI = false, b, markerLength, length;
while ((b = stream.getByte()) !== -1) { while ((b = stream.getByte()) !== -1) {
if (b !== 0xFF) { // Not a valid marker. if (b !== 0xFF) { // Not a valid marker.
continue; continue;
@ -331,14 +336,15 @@ var Parser = (function ParserClosure() {
} }
this.inlineStreamSkipEI(stream); this.inlineStreamSkipEI(stream);
return length; return length;
}, }
/** /**
* Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream. * Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
* @returns {number} The inline stream length. * @returns {number} The inline stream length.
*/ */
findASCII85DecodeInlineStreamEnd(stream) { findASCII85DecodeInlineStreamEnd(stream) {
var TILDE = 0x7E, GT = 0x3E; const TILDE = 0x7E, GT = 0x3E;
var startPos = stream.pos, ch, length; let startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) { while ((ch = stream.getByte()) !== -1) {
if (ch === TILDE) { if (ch === TILDE) {
ch = stream.peekByte(); ch = stream.peekByte();
@ -363,15 +369,15 @@ var Parser = (function ParserClosure() {
} }
this.inlineStreamSkipEI(stream); this.inlineStreamSkipEI(stream);
return length; return length;
}, }
/** /**
* Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream. * Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
* @returns {number} The inline stream length. * @returns {number} The inline stream length.
*/ */
findASCIIHexDecodeInlineStreamEnd: findASCIIHexDecodeInlineStreamEnd(stream) {
function Parser_findASCIIHexDecodeInlineStreamEnd(stream) { const GT = 0x3E;
var GT = 0x3E; let startPos = stream.pos, ch, length;
var startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) { while ((ch = stream.getByte()) !== -1) {
if (ch === GT) { if (ch === GT) {
break; break;
@ -386,13 +392,14 @@ var Parser = (function ParserClosure() {
} }
this.inlineStreamSkipEI(stream); this.inlineStreamSkipEI(stream);
return length; return length;
}, }
/** /**
* Skip over the /EI/ for streams where we search for an EOD marker. * Skip over the /EI/ for streams where we search for an EOD marker.
*/ */
inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) { inlineStreamSkipEI(stream) {
var E = 0x45, I = 0x49; const E = 0x45, I = 0x49;
var state = 0, ch; let state = 0, ch;
while ((ch = stream.getByte()) !== -1) { while ((ch = stream.getByte()) !== -1) {
if (state === 0) { if (state === 0) {
state = (ch === E) ? 1 : 0; state = (ch === E) ? 1 : 0;
@ -402,18 +409,20 @@ var Parser = (function ParserClosure() {
break; break;
} }
} }
}, }
makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
var lexer = this.lexer; makeInlineImage(cipherTransform) {
var stream = lexer.stream; const lexer = this.lexer;
const stream = lexer.stream;
// Parse dictionary. // Parse dictionary.
let dict = new Dict(this.xref), dictLength; const dict = new Dict(this.xref);
let dictLength;
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) { while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) { if (!isName(this.buf1)) {
throw new FormatError('Dictionary key must be a name object'); throw new FormatError('Dictionary key must be a name object');
} }
var key = this.buf1.name; const key = this.buf1.name;
this.shift(); this.shift();
if (isEOF(this.buf1)) { if (isEOF(this.buf1)) {
break; break;
@ -425,18 +434,20 @@ var Parser = (function ParserClosure() {
} }
// Extract the name of the first (i.e. the current) image filter. // Extract the name of the first (i.e. the current) image filter.
var filter = dict.get('Filter', 'F'), filterName; const filter = dict.get('Filter', 'F');
let filterName;
if (isName(filter)) { if (isName(filter)) {
filterName = filter.name; filterName = filter.name;
} else if (Array.isArray(filter)) { } else if (Array.isArray(filter)) {
var filterZero = this.xref.fetchIfRef(filter[0]); const filterZero = this.xref.fetchIfRef(filter[0]);
if (isName(filterZero)) { if (isName(filterZero)) {
filterName = filterZero.name; filterName = filterZero.name;
} }
} }
// Parse image stream. // Parse image stream.
let startPos = stream.pos, length; const startPos = stream.pos;
let length;
if (filterName === 'DCTDecode' || filterName === 'DCT') { if (filterName === 'DCTDecode' || filterName === 'DCT') {
length = this.findDCTDecodeInlineStreamEnd(stream); length = this.findDCTDecodeInlineStreamEnd(stream);
} else if (filterName === 'ASCII85Decode' || filterName === 'A85') { } else if (filterName === 'ASCII85Decode' || filterName === 'A85') {
@ -446,26 +457,26 @@ var Parser = (function ParserClosure() {
} else { } else {
length = this.findDefaultInlineStreamEnd(stream); length = this.findDefaultInlineStreamEnd(stream);
} }
var imageStream = stream.makeSubStream(startPos, length, dict); let imageStream = stream.makeSubStream(startPos, length, dict);
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum. // adler32 checksum.
let cacheKey; let cacheKey;
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) { if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
var imageBytes = imageStream.getBytes(); const imageBytes = imageStream.getBytes();
imageStream.reset(); imageStream.reset();
const initialStreamPos = stream.pos; const initialStreamPos = stream.pos;
// Set the stream position to the beginning of the dictionary data... // Set the stream position to the beginning of the dictionary data...
stream.pos = lexer.beginInlineImagePos; stream.pos = lexer.beginInlineImagePos;
// ... and fetch the bytes of the *entire* dictionary. // ... and fetch the bytes of the *entire* dictionary.
let dictBytes = stream.getBytes(dictLength); const dictBytes = stream.getBytes(dictLength);
// Finally, don't forget to reset the stream position. // Finally, don't forget to reset the stream position.
stream.pos = initialStreamPos; stream.pos = initialStreamPos;
cacheKey = computeAdler32(imageBytes) + '_' + computeAdler32(dictBytes); cacheKey = computeAdler32(imageBytes) + '_' + computeAdler32(dictBytes);
let cacheEntry = this.imageCache[cacheKey]; const cacheEntry = this.imageCache[cacheKey];
if (cacheEntry !== undefined) { if (cacheEntry !== undefined) {
this.buf2 = Cmd.get('EI'); this.buf2 = Cmd.get('EI');
this.shift(); this.shift();
@ -482,7 +493,7 @@ var Parser = (function ParserClosure() {
imageStream = this.filter(imageStream, dict, length); imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict; imageStream.dict = dict;
if (cacheKey !== undefined) { if (cacheKey !== undefined) {
imageStream.cacheKey = 'inline_' + length + '_' + cacheKey; imageStream.cacheKey = `inline_${length}_${cacheKey}`;
this.imageCache[cacheKey] = imageStream; this.imageCache[cacheKey] = imageStream;
} }
@ -490,7 +501,7 @@ var Parser = (function ParserClosure() {
this.shift(); this.shift();
return imageStream; return imageStream;
}, }
_findStreamLength(startPos, signature) { _findStreamLength(startPos, signature) {
const { stream, } = this.lexer; const { stream, } = this.lexer;
@ -521,28 +532,28 @@ var Parser = (function ParserClosure() {
stream.pos += scanLength; stream.pos += scanLength;
} }
return -1; return -1;
}, }
makeStream: function Parser_makeStream(dict, cipherTransform) { makeStream(dict, cipherTransform) {
var lexer = this.lexer; const lexer = this.lexer;
var stream = lexer.stream; let stream = lexer.stream;
// get stream start position // Get the stream's start position.
lexer.skipToNextLine(); lexer.skipToNextLine();
const startPos = stream.pos - 1; const startPos = stream.pos - 1;
// get length // Get the length.
var length = dict.get('Length'); let length = dict.get('Length');
if (!Number.isInteger(length)) { if (!Number.isInteger(length)) {
info('Bad ' + length + ' attribute in stream'); info(`Bad length "${length}" in stream`);
length = 0; length = 0;
} }
// skip over the stream data // Skip over the stream data.
stream.pos = startPos + length; stream.pos = startPos + length;
lexer.nextChar(); lexer.nextChar();
// Shift '>>' and check whether the new object marks the end of the stream // Shift '>>' and check whether the new object marks the end of the stream.
if (this.tryShift() && isCmd(this.buf2, 'endstream')) { if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream' this.shift(); // 'stream'
} else { } else {
@ -561,7 +572,7 @@ var Parser = (function ParserClosure() {
const end = ENDSTREAM_SIGNATURE.length - i; const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end); const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
let maybeLength = this._findStreamLength(startPos, const maybeLength = this._findStreamLength(startPos,
TRUNCATED_SIGNATURE); TRUNCATED_SIGNATURE);
if (maybeLength >= 0) { if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated // Ensure that the byte immediately following the truncated
@ -596,10 +607,12 @@ var Parser = (function ParserClosure() {
stream = this.filter(stream, dict, length); stream = this.filter(stream, dict, length);
stream.dict = dict; stream.dict = dict;
return stream; return stream;
}, }
filter: function Parser_filter(stream, dict, length) {
var filter = dict.get('Filter', 'F'); filter(stream, dict, length) {
var params = dict.get('DecodeParms', 'DP'); let filter = dict.get('Filter', 'F');
let params = dict.get('DecodeParms', 'DP');
if (isName(filter)) { if (isName(filter)) {
if (Array.isArray(params)) { if (Array.isArray(params)) {
warn('/DecodeParms should not contain an Array, ' + warn('/DecodeParms should not contain an Array, ' +
@ -608,14 +621,14 @@ var Parser = (function ParserClosure() {
return this.makeFilter(stream, filter.name, length, params); return this.makeFilter(stream, filter.name, length, params);
} }
var maybeLength = length; let maybeLength = length;
if (Array.isArray(filter)) { if (Array.isArray(filter)) {
var filterArray = filter; let filterArray = filter;
var paramsArray = params; let paramsArray = params;
for (var i = 0, ii = filterArray.length; i < ii; ++i) { for (let i = 0, ii = filterArray.length; i < ii; ++i) {
filter = this.xref.fetchIfRef(filterArray[i]); filter = this.xref.fetchIfRef(filterArray[i]);
if (!isName(filter)) { if (!isName(filter)) {
throw new FormatError('Bad filter name: ' + filter); throw new FormatError(`Bad filter name "${filter}"`);
} }
params = null; params = null;
@ -623,22 +636,24 @@ var Parser = (function ParserClosure() {
params = this.xref.fetchIfRef(paramsArray[i]); params = this.xref.fetchIfRef(paramsArray[i]);
} }
stream = this.makeFilter(stream, filter.name, maybeLength, params); stream = this.makeFilter(stream, filter.name, maybeLength, params);
// after the first stream the length variable is invalid // After the first stream the `length` variable is invalid.
maybeLength = null; maybeLength = null;
} }
} }
return stream; return stream;
}, }
makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
makeFilter(stream, name, maybeLength, params) {
// Since the 'Length' entry in the stream dictionary can be completely // Since the 'Length' entry in the stream dictionary can be completely
// wrong, e.g. zero for non-empty streams, only skip parsing the stream // wrong, e.g. zero for non-empty streams, only skip parsing the stream
// when we can be absolutely certain that it actually is empty. // when we can be absolutely certain that it actually is empty.
if (maybeLength === 0) { if (maybeLength === 0) {
warn('Empty "' + name + '" stream.'); warn(`Empty "${name}" stream.`);
return new NullStream(); return new NullStream();
} }
try { try {
var xrefStreamStats = this.xref.stats.streamTypes; const xrefStreamStats = this.xref.stats.streamTypes;
if (name === 'FlateDecode' || name === 'Fl') { if (name === 'FlateDecode' || name === 'Fl') {
xrefStreamStats[StreamType.FLATE] = true; xrefStreamStats[StreamType.FLATE] = true;
if (params) { if (params) {
@ -649,7 +664,7 @@ var Parser = (function ParserClosure() {
} }
if (name === 'LZWDecode' || name === 'LZW') { if (name === 'LZWDecode' || name === 'LZW') {
xrefStreamStats[StreamType.LZW] = true; xrefStreamStats[StreamType.LZW] = true;
var earlyChange = 1; let earlyChange = 1;
if (params) { if (params) {
if (params.has('EarlyChange')) { if (params.has('EarlyChange')) {
earlyChange = params.get('EarlyChange'); earlyChange = params.get('EarlyChange');
@ -688,23 +703,52 @@ var Parser = (function ParserClosure() {
xrefStreamStats[StreamType.JBIG] = true; xrefStreamStats[StreamType.JBIG] = true;
return new Jbig2Stream(stream, maybeLength, stream.dict, params); return new Jbig2Stream(stream, maybeLength, stream.dict, params);
} }
warn('filter "' + name + '" not supported yet'); warn(`Filter "${name}" is not supported.`);
return stream; return stream;
} catch (ex) { } catch (ex) {
if (ex instanceof MissingDataException) { if (ex instanceof MissingDataException) {
throw ex; throw ex;
} }
warn('Invalid stream: \"' + ex + '\"'); warn(`Invalid stream: "${ex}"`);
return new NullStream(); return new NullStream();
} }
}, }
}; }
return Parser; // A '1' in this array means the character is white space. A '1' or
})(); // '2' means the character ends a name or command.
const specialChars = [
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
];
var Lexer = (function LexerClosure() { function toHexDigit(ch) {
function Lexer(stream, knownCommands) { if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
return ch & 0x0F;
}
if ((ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) {
// 'A'-'F', 'a'-'f'
return (ch & 0x0F) + 9;
}
return -1;
}
class Lexer {
constructor(stream, knownCommands) {
this.stream = stream; this.stream = stream;
this.nextChar(); this.nextChar();
@ -727,50 +771,19 @@ var Lexer = (function LexerClosure() {
this.beginInlineImagePos = -1; this.beginInlineImagePos = -1;
} }
// A '1' in this array means the character is white space. A '1' or nextChar() {
// '2' means the character ends a name or command.
var specialChars = [
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
];
function toHexDigit(ch) {
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
return ch & 0x0F;
}
if ((ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) {
// 'A'-'F', 'a'-'f'
return (ch & 0x0F) + 9;
}
return -1;
}
Lexer.prototype = {
nextChar: function Lexer_nextChar() {
return (this.currentChar = this.stream.getByte()); return (this.currentChar = this.stream.getByte());
}, }
peekChar: function Lexer_peekChar() {
peekChar() {
return this.stream.peekByte(); return this.stream.peekByte();
}, }
getNumber: function Lexer_getNumber() {
var ch = this.currentChar; getNumber() {
var eNotation = false; let ch = this.currentChar;
var divideBy = 0; // different from 0 if it's a floating point value let eNotation = false;
var sign = 0; let divideBy = 0; // Different from 0 if it's a floating point value.
let sign = 0;
if (ch === 0x2D) { // '-' if (ch === 0x2D) { // '-'
sign = -1; sign = -1;
@ -806,17 +819,17 @@ var Lexer = (function LexerClosure() {
} }
sign = sign || 1; sign = sign || 1;
var baseValue = ch - 0x30; // '0' let baseValue = ch - 0x30; // '0'
var powerValue = 0; let powerValue = 0;
var powerValueSign = 1; let powerValueSign = 1;
while ((ch = this.nextChar()) >= 0) { while ((ch = this.nextChar()) >= 0) {
if (0x30 <= ch && ch <= 0x39) { // '0' - '9' if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
var currentDigit = ch - 0x30; // '0' const currentDigit = ch - 0x30; // '0'
if (eNotation) { // We are after an 'e' or 'E' if (eNotation) { // We are after an 'e' or 'E'.
powerValue = powerValue * 10 + currentDigit; powerValue = powerValue * 10 + currentDigit;
} else { } else {
if (divideBy !== 0) { // We are after a point if (divideBy !== 0) { // We are after a point.
divideBy *= 10; divideBy *= 10;
} }
baseValue = baseValue * 10 + currentDigit; baseValue = baseValue * 10 + currentDigit;
@ -825,27 +838,27 @@ var Lexer = (function LexerClosure() {
if (divideBy === 0) { if (divideBy === 0) {
divideBy = 1; divideBy = 1;
} else { } else {
// A number can have only one '.' // A number can have only one dot.
break; break;
} }
} else if (ch === 0x2D) { // '-' } else if (ch === 0x2D) { // '-'
// ignore minus signs in the middle of numbers to match // Ignore minus signs in the middle of numbers to match
// Adobe's behavior // Adobe's behavior.
warn('Badly formatted number'); warn('Badly formatted number: minus sign in the middle');
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e' } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
// 'E' can be either a scientific notation or the beginning of a new // 'E' can be either a scientific notation or the beginning of a new
// operator // operator.
ch = this.peekChar(); ch = this.peekChar();
if (ch === 0x2B || ch === 0x2D) { // '+', '-' if (ch === 0x2B || ch === 0x2D) { // '+', '-'
powerValueSign = (ch === 0x2D) ? -1 : 1; powerValueSign = (ch === 0x2D) ? -1 : 1;
this.nextChar(); // Consume the sign character this.nextChar(); // Consume the sign character.
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9' } else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
// The 'E' must be the beginning of a new operator // The 'E' must be the beginning of a new operator.
break; break;
} }
eNotation = true; eNotation = true;
} else { } else {
// the last character doesn't belong to us // The last character doesn't belong to us.
break; break;
} }
} }
@ -857,16 +870,17 @@ var Lexer = (function LexerClosure() {
baseValue *= Math.pow(10, powerValueSign * powerValue); baseValue *= Math.pow(10, powerValueSign * powerValue);
} }
return sign * baseValue; return sign * baseValue;
}, }
getString: function Lexer_getString() {
var numParen = 1; getString() {
var done = false; let numParen = 1;
var strBuf = this.strBuf; let done = false;
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
var ch = this.nextChar(); let ch = this.nextChar();
while (true) { while (true) {
var charBuffered = false; let charBuffered = false;
switch (ch | 0) { switch (ch | 0) {
case -1: case -1:
warn('Unterminated string'); warn('Unterminated string');
@ -913,7 +927,7 @@ var Lexer = (function LexerClosure() {
break; break;
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3' case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7' case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
var x = ch & 0x0F; let x = ch & 0x0F;
ch = this.nextChar(); ch = this.nextChar();
charBuffered = true; charBuffered = true;
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7' if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
@ -950,11 +964,13 @@ var Lexer = (function LexerClosure() {
} }
} }
return strBuf.join(''); return strBuf.join('');
}, }
getName: function Lexer_getName() {
var ch, previousCh; getName() {
var strBuf = this.strBuf; let ch, previousCh;
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) { while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
if (ch === 0x23) { // '#' if (ch === 0x23) { // '#'
ch = this.nextChar(); ch = this.nextChar();
@ -964,14 +980,14 @@ var Lexer = (function LexerClosure() {
strBuf.push('#'); strBuf.push('#');
break; break;
} }
var x = toHexDigit(ch); const x = toHexDigit(ch);
if (x !== -1) { if (x !== -1) {
previousCh = ch; previousCh = ch;
ch = this.nextChar(); ch = this.nextChar();
var x2 = toHexDigit(ch); const x2 = toHexDigit(ch);
if (x2 === -1) { if (x2 === -1) {
warn('Lexer_getName: Illegal digit (' + warn(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
String.fromCharCode(ch) + ') in hexadecimal number.'); 'in hexadecimal number.');
strBuf.push('#', String.fromCharCode(previousCh)); strBuf.push('#', String.fromCharCode(previousCh));
if (specialChars[ch]) { if (specialChars[ch]) {
break; break;
@ -988,17 +1004,18 @@ var Lexer = (function LexerClosure() {
} }
} }
if (strBuf.length > 127) { if (strBuf.length > 127) {
warn('name token is longer than allowed by the spec: ' + strBuf.length); warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
} }
return Name.get(strBuf.join('')); return Name.get(strBuf.join(''));
}, }
getHexString: function Lexer_getHexString() {
var strBuf = this.strBuf; getHexString() {
const strBuf = this.strBuf;
strBuf.length = 0; strBuf.length = 0;
var ch = this.currentChar; let ch = this.currentChar;
var isFirstHex = true; let isFirstHex = true;
var firstDigit; let firstDigit, secondDigit;
var secondDigit;
while (true) { while (true) {
if (ch < 0) { if (ch < 0) {
warn('Unterminated hex string'); warn('Unterminated hex string');
@ -1013,14 +1030,14 @@ var Lexer = (function LexerClosure() {
if (isFirstHex) { if (isFirstHex) {
firstDigit = toHexDigit(ch); firstDigit = toHexDigit(ch);
if (firstDigit === -1) { if (firstDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string'); warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar(); ch = this.nextChar();
continue; continue;
} }
} else { } else {
secondDigit = toHexDigit(ch); secondDigit = toHexDigit(ch);
if (secondDigit === -1) { if (secondDigit === -1) {
warn('Ignoring invalid character "' + ch + '" in hex string'); warn(`Ignoring invalid character "${ch}" in hex string`);
ch = this.nextChar(); ch = this.nextChar();
continue; continue;
} }
@ -1031,11 +1048,12 @@ var Lexer = (function LexerClosure() {
} }
} }
return strBuf.join(''); return strBuf.join('');
}, }
getObj: function Lexer_getObj() {
// skip whitespace and comments getObj() {
var comment = false; // Skip whitespace and comments.
var ch = this.currentChar; let comment = false;
let ch = this.currentChar;
while (true) { while (true) {
if (ch < 0) { if (ch < 0) {
return EOF; return EOF;
@ -1052,7 +1070,7 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar(); ch = this.nextChar();
} }
// start reading token // Start reading a token.
switch (ch | 0) { switch (ch | 0) {
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4' case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9' case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
@ -1101,14 +1119,14 @@ var Lexer = (function LexerClosure() {
throw new FormatError(`Illegal character: ${ch}`); throw new FormatError(`Illegal character: ${ch}`);
} }
// command // Start reading a command.
var str = String.fromCharCode(ch); let str = String.fromCharCode(ch);
var knownCommands = this.knownCommands; const knownCommands = this.knownCommands;
var knownCommandFound = knownCommands && knownCommands[str] !== undefined; let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) { while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
// stop if known command is found and next character does not make // Stop if a known command is found and next character does not make
// the str a command // the string a command.
var possibleCommand = str + String.fromCharCode(ch); const possibleCommand = str + String.fromCharCode(ch);
if (knownCommandFound && knownCommands[possibleCommand] === undefined) { if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
break; break;
} }
@ -1135,9 +1153,10 @@ var Lexer = (function LexerClosure() {
} }
return Cmd.get(str); return Cmd.get(str);
}, }
skipToNextLine: function Lexer_skipToNextLine() {
var ch = this.currentChar; skipToNextLine() {
let ch = this.currentChar;
while (ch >= 0) { while (ch >= 0) {
if (ch === 0x0D) { // CR if (ch === 0x0D) { // CR
ch = this.nextChar(); ch = this.nextChar();
@ -1151,61 +1170,64 @@ var Lexer = (function LexerClosure() {
} }
ch = this.nextChar(); ch = this.nextChar();
} }
}, }
}; }
return Lexer; class Linearization {
})(); static create(stream) {
function getInt(linDict, name, allowZeroValue = false) {
var Linearization = { const obj = linDict.get(name);
create: function LinearizationCreate(stream) {
function getInt(name, allowZeroValue) {
var obj = linDict.get(name);
if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) { if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
return obj; return obj;
} }
throw new Error('The "' + name + '" parameter in the linearization ' + throw new Error(`The "${name}" parameter in the linearization ` +
'dictionary is invalid.'); 'dictionary is invalid.');
} }
function getHints() {
var hints = linDict.get('H'), hintsLength, item; function getHints(linDict) {
const hints = linDict.get('H');
let hintsLength;
if (Array.isArray(hints) && if (Array.isArray(hints) &&
((hintsLength = hints.length) === 2 || hintsLength === 4)) { ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
for (var index = 0; index < hintsLength; index++) { for (let index = 0; index < hintsLength; index++) {
if (!(Number.isInteger(item = hints[index]) && item > 0)) { const hint = hints[index];
throw new Error('Hint (' + index + if (!(Number.isInteger(hint) && hint > 0)) {
') in the linearization dictionary is invalid.'); throw new Error(`Hint (${index}) in the linearization dictionary ` +
'is invalid.');
} }
} }
return hints; return hints;
} }
throw new Error('Hint array in the linearization dictionary is invalid.'); throw new Error('Hint array in the linearization dictionary is invalid.');
} }
var parser = new Parser(new Lexer(stream), false, null);
var obj1 = parser.getObj(); const parser = new Parser(new Lexer(stream), false, null);
var obj2 = parser.getObj(); const obj1 = parser.getObj();
var obj3 = parser.getObj(); const obj2 = parser.getObj();
var linDict = parser.getObj(); const obj3 = parser.getObj();
var obj, length; const linDict = parser.getObj();
let obj, length;
if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && if (!(Number.isInteger(obj1) && Number.isInteger(obj2) &&
isCmd(obj3, 'obj') && isDict(linDict) && isCmd(obj3, 'obj') && isDict(linDict) &&
isNum(obj = linDict.get('Linearized')) && obj > 0)) { isNum(obj = linDict.get('Linearized')) && obj > 0)) {
return null; // No valid linearization dictionary found. return null; // No valid linearization dictionary found.
} else if ((length = getInt('L')) !== stream.length) { } else if ((length = getInt(linDict, 'L')) !== stream.length) {
throw new Error('The "L" parameter in the linearization dictionary ' + throw new Error('The "L" parameter in the linearization dictionary ' +
'does not equal the stream length.'); 'does not equal the stream length.');
} }
return { return {
length, length,
hints: getHints(), hints: getHints(linDict),
objectNumberFirst: getInt('O'), objectNumberFirst: getInt(linDict, 'O'),
endFirst: getInt('E'), endFirst: getInt(linDict, 'E'),
numPages: getInt('N'), numPages: getInt(linDict, 'N'),
mainXRefEntriesOffset: getInt('T'), mainXRefEntriesOffset: getInt(linDict, 'T'),
pageFirst: (linDict.has('P') ? getInt('P', true) : 0), pageFirst: (linDict.has('P') ?
getInt(linDict, 'P', /* allowZeroValue = */ true) : 0),
}; };
}, }
}; }
export { export {
Lexer, Lexer,

View File

@ -12,74 +12,129 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* eslint no-var: error */
import { Lexer, Linearization } from '../../src/core/parser'; import { Lexer, Linearization, Parser } from '../../src/core/parser';
import { FormatError } from '../../src/shared/util'; import { FormatError } from '../../src/shared/util';
import { Name } from '../../src/core/primitives'; import { Name } from '../../src/core/primitives';
import { StringStream } from '../../src/core/stream'; import { StringStream } from '../../src/core/stream';
describe('parser', function() { describe('parser', function() {
describe('Lexer', function() { describe('Parser', function() {
it('should stop parsing numbers at the end of stream', function() { describe('inlineStreamSkipEI', function() {
var input = new StringStream('11.234'); it('should skip over the EI marker if it is found', function() {
var lexer = new Lexer(input); const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
var result = lexer.getNumber(); '/F /A85 ID abc123~> EI Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.indexOf('Q'));
expect(input.peekByte()).toEqual(0x51); // 'Q'
});
expect(result).toEqual(11.234); it('should skip to the end of stream if the EI marker is not found',
function() {
const string = 'q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 ' +
'/F /A85 ID abc123~> Q';
const input = new StringStream(string);
const lexer = new Lexer(input);
const parser = new Parser(lexer, /* allowStreams = */ true,
/* xref = */ null);
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.length);
expect(input.peekByte()).toEqual(-1);
});
});
});
describe('Lexer', function() {
describe('nextChar', function() {
it('should return and set -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should return and set the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x32); // '2'
});
});
describe('peekChar', function() {
it('should only return -1 when the end of the stream is reached',
function() {
const input = new StringStream('');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it('should only return the character after the current position',
function() {
const input = new StringStream('123');
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x31); // '1'
});
});
describe('getNumber', function() {
it('should stop parsing numbers at the end of stream', function() {
const input = new StringStream('11.234');
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(11.234);
}); });
it('should parse PostScript numbers', function() { it('should parse PostScript numbers', function() {
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0', const numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.',
'123', '-98', '43445', '0', '+17']; '0.0', '123', '-98', '43445', '0', '+17'];
for (var i = 0, ii = numbers.length; i < ii; i++) { for (const number of numbers) {
var num = numbers[i]; const input = new StringStream(number);
var input = new StringStream(num); const lexer = new Lexer(input);
var lexer = new Lexer(input); expect(lexer.getNumber()).toEqual(parseFloat(number));
var result = lexer.getNumber();
expect(result).toEqual(parseFloat(num));
} }
}); });
it('should ignore double negative before number', function() { it('should ignore double negative before number', function() {
var input = new StringStream('--205.88'); const input = new StringStream('--205.88');
var lexer = new Lexer(input); const lexer = new Lexer(input);
var result = lexer.getNumber(); expect(lexer.getNumber()).toEqual(-205.88);
expect(result).toEqual(-205.88);
}); });
it('should ignore minus signs in the middle of number', function() { it('should ignore minus signs in the middle of number', function() {
var input = new StringStream('205--.88'); const input = new StringStream('205--.88');
var lexer = new Lexer(input); const lexer = new Lexer(input);
var result = lexer.getNumber(); expect(lexer.getNumber()).toEqual(205.88);
expect(result).toEqual(205.88);
}); });
it('should ignore line-breaks between operator and digit in number', it('should ignore line-breaks between operator and digit in number',
function() { function() {
let minusInput = new StringStream('-\r\n205.88'); const minusInput = new StringStream('-\r\n205.88');
let minusLexer = new Lexer(minusInput); const minusLexer = new Lexer(minusInput);
expect(minusLexer.getNumber()).toEqual(-205.88); expect(minusLexer.getNumber()).toEqual(-205.88);
let plusInput = new StringStream('+\r\n205.88'); const plusInput = new StringStream('+\r\n205.88');
let plusLexer = new Lexer(plusInput); const plusLexer = new Lexer(plusInput);
expect(plusLexer.getNumber()).toEqual(205.88); expect(plusLexer.getNumber()).toEqual(205.88);
}); });
it('should treat a single decimal point as zero', function() { it('should treat a single decimal point as zero', function() {
let input = new StringStream('.'); const input = new StringStream('.');
let lexer = new Lexer(input); const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(0); expect(lexer.getNumber()).toEqual(0);
let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.']; const numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (let number of numbers) { for (const number of numbers) {
let input = new StringStream(number); const input = new StringStream(number);
let lexer = new Lexer(input); const lexer = new Lexer(input);
expect(function() { expect(function() {
return lexer.getNumber(); return lexer.getNumber();
@ -88,68 +143,62 @@ describe('parser', function() {
}); });
it('should handle glued numbers and operators', function() { it('should handle glued numbers and operators', function() {
var input = new StringStream('123ET'); const input = new StringStream('123ET');
var lexer = new Lexer(input); const lexer = new Lexer(input);
var value = lexer.getNumber(); expect(lexer.getNumber()).toEqual(123);
expect(value).toEqual(123);
// The lexer must not have consumed the 'E' // The lexer must not have consumed the 'E'
expect(lexer.currentChar).toEqual(0x45); // 'E' expect(lexer.currentChar).toEqual(0x45); // 'E'
}); });
it('should stop parsing strings at the end of stream', function() {
var input = new StringStream('(1$4)');
input.getByte = function(super_getByte) {
// simulating end of file using null (see issue 2766)
var ch = super_getByte.call(input);
return (ch === 0x24 /* '$' */ ? -1 : ch);
}.bind(input, input.getByte);
var lexer = new Lexer(input);
var result = lexer.getString();
expect(result).toEqual('1');
}); });
it('should not throw exception on bad input', function() { describe('getString', function() {
// '8 0 2 15 5 2 2 2 4 3 2 4' it('should stop parsing strings at the end of stream', function() {
// should be parsed as const input = new StringStream('(1$4)');
// '80 21 55 22 24 32' input.getByte = function(super_getByte) {
var input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>'); // Simulating end of file using null (see issue 2766).
var lexer = new Lexer(input); const ch = super_getByte.call(input);
var result = lexer.getHexString(); return (ch === 0x24 /* '$' */ ? -1 : ch);
}.bind(input, input.getByte);
expect(result).toEqual('p!U"$2'); const lexer = new Lexer(input);
expect(lexer.getString()).toEqual('1');
}); });
it('should ignore escaped CR and LF', function() { it('should ignore escaped CR and LF', function() {
// '(\101\<CR><LF>\102)' // '(\101\<CR><LF>\102)' should be parsed as 'AB'.
// should be parsed as const input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)');
// "AB" const lexer = new Lexer(input);
var input = new StringStream('(\\101\\\r\n\\102\\\r\\103\\\n\\104)'); expect(lexer.getString()).toEqual('ABCD');
var lexer = new Lexer(input); });
var result = lexer.getString();
expect(result).toEqual('ABCD');
}); });
it('should handle Names with invalid usage of NUMBER SIGN (#)', function() { describe('getHexString', function() {
var inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B']; it('should not throw exception on bad input', function() {
var expectedNames = ['#', '#AQwerty', '#A']; // '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
const input = new StringStream('<7 0 2 15 5 2 2 2 4 3 2 4>');
const lexer = new Lexer(input);
expect(lexer.getHexString()).toEqual('p!U"$2');
});
});
for (var i = 0, ii = inputNames.length; i < ii; i++) { describe('getName', function() {
var input = new StringStream(inputNames[i]); it('should handle Names with invalid usage of NUMBER SIGN (#)',
var lexer = new Lexer(input); function() {
var result = lexer.getName(); const inputNames = ['/# 680 0 R', '/#AQwerty', '/#A<</B'];
const expectedNames = ['#', '#AQwerty', '#A'];
expect(result).toEqual(Name.get(expectedNames[i])); for (let i = 0, ii = inputNames.length; i < ii; i++) {
const input = new StringStream(inputNames[i]);
const lexer = new Lexer(input);
expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
} }
}); });
}); });
});
describe('Linearization', function() { describe('Linearization', function() {
it('should not find a linearization dictionary', function () { it('should not find a linearization dictionary', function() {
// Not an actual linearization dictionary. // Not an actual linearization dictionary.
var stream1 = new StringStream( const stream1 = new StringStream(
'3 0 obj\n' + '3 0 obj\n' +
'<<\n' + '<<\n' +
'/Length 4622\n' + '/Length 4622\n' +
@ -160,7 +209,7 @@ describe('parser', function() {
expect(Linearization.create(stream1)).toEqual(null); expect(Linearization.create(stream1)).toEqual(null);
// Linearization dictionary with invalid version number. // Linearization dictionary with invalid version number.
var stream2 = new StringStream( const stream2 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 0\n' + '/Linearized 0\n' +
@ -170,8 +219,8 @@ describe('parser', function() {
expect(Linearization.create(stream2)).toEqual(null); expect(Linearization.create(stream2)).toEqual(null);
}); });
it('should accept a valid linearization dictionary', function () { it('should accept a valid linearization dictionary', function() {
var stream = new StringStream( const stream = new StringStream(
'131 0 obj\n' + '131 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -184,7 +233,7 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
var expectedLinearizationDict = { const expectedLinearizationDict = {
length: 90, length: 90,
hints: [1388, 863], hints: [1388, 863],
objectNumberFirst: 133, objectNumberFirst: 133,
@ -197,9 +246,9 @@ describe('parser', function() {
}); });
it('should reject a linearization dictionary with invalid ' + it('should reject a linearization dictionary with invalid ' +
'integer parameters', function () { 'integer parameters', function() {
// The /L parameter should be equal to the stream length. // The /L parameter should be equal to the stream length.
var stream1 = new StringStream( const stream1 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -212,13 +261,13 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream1); return Linearization.create(stream1);
}).toThrow(new Error('The "L" parameter in the linearization ' + }).toThrow(new Error('The "L" parameter in the linearization ' +
'dictionary does not equal the stream length.')); 'dictionary does not equal the stream length.'));
// The /E parameter should not be zero. // The /E parameter should not be zero.
var stream2 = new StringStream( const stream2 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -231,13 +280,13 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream2); return Linearization.create(stream2);
}).toThrow(new Error('The "E" parameter in the linearization ' + }).toThrow(new Error('The "E" parameter in the linearization ' +
'dictionary is invalid.')); 'dictionary is invalid.'));
// The /O parameter should be an integer. // The /O parameter should be an integer.
var stream3 = new StringStream( const stream3 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -250,16 +299,16 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream3); return Linearization.create(stream3);
}).toThrow(new Error('The "O" parameter in the linearization ' + }).toThrow(new Error('The "O" parameter in the linearization ' +
'dictionary is invalid.')); 'dictionary is invalid.'));
}); });
it('should reject a linearization dictionary with invalid hint parameters', it('should reject a linearization dictionary with invalid hint parameters',
function () { function() {
// The /H parameter should be an array. // The /H parameter should be an array.
var stream1 = new StringStream( const stream1 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -272,13 +321,13 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream1); return Linearization.create(stream1);
}).toThrow(new Error('Hint array in the linearization dictionary ' + }).toThrow(new Error('Hint array in the linearization dictionary ' +
'is invalid.')); 'is invalid.'));
// The hint array should contain two, or four, elements. // The hint array should contain two, or four, elements.
var stream2 = new StringStream( const stream2 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -291,13 +340,13 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream2); return Linearization.create(stream2);
}).toThrow(new Error('Hint array in the linearization dictionary ' + }).toThrow(new Error('Hint array in the linearization dictionary ' +
'is invalid.')); 'is invalid.'));
// The hint array should not contain zero. // The hint array should not contain zero.
var stream3 = new StringStream( const stream3 = new StringStream(
'1 0 obj\n' + '1 0 obj\n' +
'<<\n' + '<<\n' +
'/Linearized 1\n' + '/Linearized 1\n' +
@ -310,7 +359,7 @@ describe('parser', function() {
'>>\n' + '>>\n' +
'endobj' 'endobj'
); );
expect(function () { expect(function() {
return Linearization.create(stream3); return Linearization.create(stream3);
}).toThrow(new Error('Hint (2) in the linearization dictionary ' + }).toThrow(new Error('Hint (2) in the linearization dictionary ' +
'is invalid.')); 'is invalid.'));