pdf.js/src/core/parser.js

1125 lines
36 KiB
JavaScript
Raw Normal View History

2012-09-01 07:48:21 +09:00
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2011-10-26 10:18:22 +09:00
'use strict';
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('pdfjs/core/parser', ['exports', 'pdfjs/shared/util',
'pdfjs/core/primitives', 'pdfjs/core/stream'], factory);
} else if (typeof exports !== 'undefined') {
factory(exports, require('../shared/util.js'), require('./primitives.js'),
require('./stream.js'));
} else {
factory((root.pdfjsCoreParser = {}), root.pdfjsSharedUtil,
root.pdfjsCorePrimitives, root.pdfjsCoreStream);
}
}(this, function (exports, sharedUtil, corePrimitives, coreStream) {
var MissingDataException = sharedUtil.MissingDataException;
var StreamType = sharedUtil.StreamType;
var assert = sharedUtil.assert;
var error = sharedUtil.error;
var info = sharedUtil.info;
var isArray = sharedUtil.isArray;
var isInt = sharedUtil.isInt;
var isNum = sharedUtil.isNum;
var isString = sharedUtil.isString;
var warn = sharedUtil.warn;
var Cmd = corePrimitives.Cmd;
var Dict = corePrimitives.Dict;
var Name = corePrimitives.Name;
var Ref = corePrimitives.Ref;
var isCmd = corePrimitives.isCmd;
var isDict = corePrimitives.isDict;
var isName = corePrimitives.isName;
var Ascii85Stream = coreStream.Ascii85Stream;
var AsciiHexStream = coreStream.AsciiHexStream;
var CCITTFaxStream = coreStream.CCITTFaxStream;
var FlateStream = coreStream.FlateStream;
var Jbig2Stream = coreStream.Jbig2Stream;
var JpegStream = coreStream.JpegStream;
var JpxStream = coreStream.JpxStream;
var LZWStream = coreStream.LZWStream;
var NullStream = coreStream.NullStream;
var PredictorStream = coreStream.PredictorStream;
var RunLengthStream = coreStream.RunLengthStream;
2011-10-25 08:55:23 +09:00
var EOF = {};
function isEOF(v) {
return (v === EOF);
2011-10-25 08:55:23 +09:00
}
var MAX_LENGTH_TO_CACHE = 1000;
2011-12-09 07:18:43 +09:00
var Parser = (function ParserClosure() {
function Parser(lexer, allowStreams, xref, recoveryMode) {
2011-10-25 08:55:23 +09:00
this.lexer = lexer;
this.allowStreams = allowStreams;
this.xref = xref;
this.recoveryMode = recoveryMode || false;
this.imageCache = Object.create(null);
2011-10-25 08:55:23 +09:00
this.refill();
}
2011-12-09 07:18:43 +09:00
Parser.prototype = {
refill: function Parser_refill() {
2011-10-25 08:55:23 +09:00
this.buf1 = this.lexer.getObj();
this.buf2 = this.lexer.getObj();
},
shift: function Parser_shift() {
2011-10-25 08:55:23 +09:00
if (isCmd(this.buf2, 'ID')) {
this.buf1 = this.buf2;
this.buf2 = null;
} else {
this.buf1 = this.buf2;
this.buf2 = this.lexer.getObj();
}
},
tryShift: function Parser_tryShift() {
try {
this.shift();
return true;
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
// Upon failure, the caller should reset this.lexer.pos to a known good
// state and call this.shift() twice to reset the buffers.
return false;
}
},
getObj: function Parser_getObj(cipherTransform) {
var buf1 = this.buf1;
this.shift();
if (buf1 instanceof Cmd) {
switch (buf1.cmd) {
case 'BI': // inline image
return this.makeInlineImage(cipherTransform);
case '[': // array
var array = [];
while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
array.push(this.getObj(cipherTransform));
}
if (isEOF(this.buf1)) {
if (!this.recoveryMode) {
error('End of file inside array');
}
return array;
}
2013-05-01 07:29:25 +09:00
this.shift();
return array;
case '<<': // dictionary or stream
var dict = new Dict(this.xref);
while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) {
info('Malformed dictionary: key must be a name object');
this.shift();
continue;
}
var key = this.buf1.name;
this.shift();
if (isEOF(this.buf1)) {
break;
}
dict.set(key, this.getObj(cipherTransform));
}
if (isEOF(this.buf1)) {
if (!this.recoveryMode) {
error('End of file inside dictionary');
}
return dict;
}
2011-10-25 08:55:23 +09:00
// Stream objects are not allowed inside content streams or
// object streams.
if (isCmd(this.buf2, 'stream')) {
return (this.allowStreams ?
this.makeStream(dict, cipherTransform) : dict);
}
this.shift();
return dict;
default: // simple object
return buf1;
2011-10-25 08:55:23 +09:00
}
}
if (isInt(buf1)) { // indirect reference or integer
var num = buf1;
2011-10-25 08:55:23 +09:00
if (isInt(this.buf1) && isCmd(this.buf2, 'R')) {
var ref = new Ref(num, this.buf1);
this.shift();
this.shift();
return ref;
}
return num;
}
if (isString(buf1)) { // string
var str = buf1;
if (cipherTransform) {
2011-10-25 08:55:23 +09:00
str = cipherTransform.decryptString(str);
}
2011-10-25 08:55:23 +09:00
return str;
}
// simple object
return buf1;
2011-10-25 08:55:23 +09:00
},
/**
* Find the end of the stream by searching for the /EI\s/.
* @returns {number} The inline stream length.
*/
findDefaultInlineStreamEnd:
function Parser_findDefaultInlineStreamEnd(stream) {
var E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD;
var startPos = stream.pos, state = 0, ch, i, n, followingBytes;
while ((ch = stream.getByte()) !== -1) {
if (state === 0) {
state = (ch === E) ? 1 : 0;
} else if (state === 1) {
state = (ch === I) ? 2 : 0;
} else {
assert(state === 2);
if (ch === SPACE || ch === LF || ch === CR) {
// Let's check the next five bytes are ASCII... just be sure.
n = 5;
followingBytes = stream.peekBytes(n);
for (i = 0; i < n; i++) {
ch = followingBytes[i];
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
// it's binary stuff. Resetting the state.
state = 0;
break;
}
}
if (state === 2) {
break; // Finished!
}
} else {
2011-10-25 08:55:23 +09:00
state = 0;
}
2011-10-25 08:55:23 +09:00
}
}
return ((stream.pos - 4) - startPos);
},
/**
* Find the EOI (end-of-image) marker 0xFFD9 of the stream.
* @returns {number} The inline stream length.
*/
findDCTDecodeInlineStreamEnd:
function Parser_findDCTDecodeInlineStreamEnd(stream) {
var startPos = stream.pos, foundEOI = false, b, markerLength, length;
while ((b = stream.getByte()) !== -1) {
if (b !== 0xFF) { // Not a valid marker.
continue;
}
switch (stream.getByte()) {
case 0x00: // Byte stuffing.
// 0xFF00 appears to be a very common byte sequence in JPEG images.
break;
case 0xFF: // Fill byte.
// Avoid skipping a valid marker, resetting the stream position.
stream.skip(-1);
break;
case 0xD9: // EOI
foundEOI = true;
break;
case 0xC0: // SOF0
case 0xC1: // SOF1
case 0xC2: // SOF2
case 0xC3: // SOF3
case 0xC5: // SOF5
case 0xC6: // SOF6
case 0xC7: // SOF7
case 0xC9: // SOF9
case 0xCA: // SOF10
case 0xCB: // SOF11
case 0xCD: // SOF13
case 0xCE: // SOF14
case 0xCF: // SOF15
case 0xC4: // DHT
case 0xCC: // DAC
case 0xDA: // SOS
case 0xDB: // DQT
case 0xDC: // DNL
case 0xDD: // DRI
case 0xDE: // DHP
case 0xDF: // EXP
case 0xE0: // APP0
case 0xE1: // APP1
case 0xE2: // APP2
case 0xE3: // APP3
case 0xE4: // APP4
case 0xE5: // APP5
case 0xE6: // APP6
case 0xE7: // APP7
case 0xE8: // APP8
case 0xE9: // APP9
case 0xEA: // APP10
case 0xEB: // APP11
case 0xEC: // APP12
case 0xED: // APP13
case 0xEE: // APP14
case 0xEF: // APP15
case 0xFE: // COM
// The marker should be followed by the length of the segment.
markerLength = stream.getUint16();
if (markerLength > 2) {
// |markerLength| contains the byte length of the marker segment,
// including its own length (2 bytes) and excluding the marker.
stream.skip(markerLength - 2); // Jump to the next marker.
} else {
// The marker length is invalid, resetting the stream position.
stream.skip(-2);
}
break;
}
if (foundEOI) {
break;
}
}
length = stream.pos - startPos;
if (b === -1) {
warn('Inline DCTDecode image stream: ' +
'EOI marker not found, searching for /EI/ instead.');
stream.skip(-length); // Reset the stream position.
return this.findDefaultInlineStreamEnd(stream);
}
this.inlineStreamSkipEI(stream);
return length;
},
/**
* Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
* @returns {number} The inline stream length.
*/
findASCII85DecodeInlineStreamEnd:
function Parser_findASCII85DecodeInlineStreamEnd(stream) {
var TILDE = 0x7E, GT = 0x3E;
var startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) {
if (ch === TILDE && stream.peekByte() === GT) {
stream.skip();
break;
}
}
length = stream.pos - startPos;
if (ch === -1) {
warn('Inline ASCII85Decode image stream: ' +
'EOD marker not found, searching for /EI/ instead.');
stream.skip(-length); // Reset the stream position.
return this.findDefaultInlineStreamEnd(stream);
}
this.inlineStreamSkipEI(stream);
return length;
},
/**
* Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
* @returns {number} The inline stream length.
*/
findASCIIHexDecodeInlineStreamEnd:
function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
var GT = 0x3E;
var startPos = stream.pos, ch, length;
while ((ch = stream.getByte()) !== -1) {
if (ch === GT) {
break;
}
}
length = stream.pos - startPos;
if (ch === -1) {
warn('Inline ASCIIHexDecode image stream: ' +
'EOD marker not found, searching for /EI/ instead.');
stream.skip(-length); // Reset the stream position.
return this.findDefaultInlineStreamEnd(stream);
}
this.inlineStreamSkipEI(stream);
return length;
},
/**
* Skip over the /EI/ for streams where we search for an EOD marker.
*/
inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
var E = 0x45, I = 0x49;
var state = 0, ch;
while ((ch = stream.getByte()) !== -1) {
if (state === 0) {
state = (ch === E) ? 1 : 0;
} else if (state === 1) {
state = (ch === I) ? 2 : 0;
} else if (state === 2) {
break;
}
}
},
makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
var lexer = this.lexer;
var stream = lexer.stream;
// Parse dictionary.
var dict = new Dict(this.xref);
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
if (!isName(this.buf1)) {
error('Dictionary key must be a name object');
}
var key = this.buf1.name;
this.shift();
if (isEOF(this.buf1)) {
break;
}
dict.set(key, this.getObj(cipherTransform));
}
2011-10-25 08:55:23 +09:00
// Extract the name of the first (i.e. the current) image filter.
var filter = dict.get('Filter', 'F'), filterName;
if (isName(filter)) {
filterName = filter.name;
} else if (isArray(filter) && isName(filter[0])) {
filterName = filter[0].name;
}
// Parse image stream.
var startPos = stream.pos, length, i, ii;
if (filterName === 'DCTDecode' || filterName === 'DCT') {
length = this.findDCTDecodeInlineStreamEnd(stream);
} else if (filterName === 'ASCII85Decide' || filterName === 'A85') {
length = this.findASCII85DecodeInlineStreamEnd(stream);
} else if (filterName === 'ASCIIHexDecode' || filterName === 'AHx') {
length = this.findASCIIHexDecodeInlineStreamEnd(stream);
} else {
length = this.findDefaultInlineStreamEnd(stream);
}
2011-10-25 08:55:23 +09:00
var imageStream = stream.makeSubStream(startPos, length, dict);
2014-02-25 00:59:02 +09:00
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum.
var adler32;
if (length < MAX_LENGTH_TO_CACHE) {
2014-02-25 00:59:02 +09:00
var imageBytes = imageStream.getBytes();
imageStream.reset();
var a = 1;
var b = 0;
2014-04-08 06:42:54 +09:00
for (i = 0, ii = imageBytes.length; i < ii; ++i) {
// No modulo required in the loop if imageBytes.length < 5552.
a += imageBytes[i] & 0xff;
b += a;
2014-02-25 00:59:02 +09:00
}
adler32 = ((b % 65521) << 16) | (a % 65521);
2014-02-25 00:59:02 +09:00
if (this.imageCache.adler32 === adler32) {
2014-02-25 00:59:02 +09:00
this.buf2 = Cmd.get('EI');
this.shift();
this.imageCache[adler32].reset();
return this.imageCache[adler32];
2014-02-25 00:59:02 +09:00
}
}
if (cipherTransform) {
imageStream = cipherTransform.createStream(imageStream, length);
2014-02-25 00:59:02 +09:00
}
2011-10-25 08:55:23 +09:00
imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict;
if (adler32 !== undefined) {
2014-02-25 00:59:02 +09:00
imageStream.cacheKey = 'inline_' + length + '_' + adler32;
this.imageCache[adler32] = imageStream;
2014-02-25 00:59:02 +09:00
}
2011-10-25 08:55:23 +09:00
this.buf2 = Cmd.get('EI');
2011-10-25 08:55:23 +09:00
this.shift();
return imageStream;
},
makeStream: function Parser_makeStream(dict, cipherTransform) {
2011-10-25 08:55:23 +09:00
var lexer = this.lexer;
var stream = lexer.stream;
// get stream start position
lexer.skipToNextLine();
2013-07-01 05:45:15 +09:00
var pos = stream.pos - 1;
2011-10-25 08:55:23 +09:00
// get length
var length = dict.get('Length');
if (!isInt(length)) {
info('Bad ' + length + ' attribute in stream');
length = 0;
}
2011-10-25 08:55:23 +09:00
// skip over the stream data
stream.pos = pos + length;
2013-07-01 05:45:15 +09:00
lexer.nextChar();
// Shift '>>' and check whether the new object marks the end of the stream
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream'
} else {
// bad stream length, scanning for endstream
stream.pos = pos;
var SCAN_BLOCK_SIZE = 2048;
var ENDSTREAM_SIGNATURE_LENGTH = 9;
var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65,
0x61, 0x6D];
2014-04-08 06:42:54 +09:00
var skipped = 0, found = false, i, j;
while (stream.pos < stream.end) {
var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE);
var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH;
if (scanLength <= 0) {
break;
}
2014-04-08 06:42:54 +09:00
found = false;
i = 0;
while (i < scanLength) {
j = 0;
while (j < ENDSTREAM_SIGNATURE_LENGTH &&
scanBytes[i + j] === ENDSTREAM_SIGNATURE[j]) {
j++;
}
if (j >= ENDSTREAM_SIGNATURE_LENGTH) {
found = true;
break;
}
i++;
}
if (found) {
skipped += i;
stream.pos += i;
break;
}
skipped += scanLength;
stream.pos += scanLength;
}
if (!found) {
error('Missing endstream');
}
length = skipped;
2013-07-01 05:45:15 +09:00
lexer.nextChar();
this.shift();
this.shift();
2013-06-22 07:35:52 +09:00
}
this.shift(); // 'endstream'
2011-10-25 08:55:23 +09:00
stream = stream.makeSubStream(pos, length, dict);
if (cipherTransform) {
stream = cipherTransform.createStream(stream, length);
}
2011-10-25 08:55:23 +09:00
stream = this.filter(stream, dict, length);
stream.dict = dict;
2011-10-25 08:55:23 +09:00
return stream;
},
filter: function Parser_filter(stream, dict, length) {
var filter = dict.get('Filter', 'F');
var params = dict.get('DecodeParms', 'DP');
if (isName(filter)) {
2011-10-25 08:55:23 +09:00
return this.makeFilter(stream, filter.name, length, params);
}
var maybeLength = length;
2011-10-25 08:55:23 +09:00
if (isArray(filter)) {
var filterArray = filter;
var paramsArray = params;
for (var i = 0, ii = filterArray.length; i < ii; ++i) {
filter = filterArray[i];
if (!isName(filter)) {
2011-10-25 08:55:23 +09:00
error('Bad filter name: ' + filter);
}
params = null;
if (isArray(paramsArray) && (i in paramsArray)) {
params = paramsArray[i];
}
stream = this.makeFilter(stream, filter.name, maybeLength, params);
// after the first stream the length variable is invalid
maybeLength = null;
2011-10-25 08:55:23 +09:00
}
}
return stream;
},
makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
// Since the 'Length' entry in the stream dictionary can be completely
// wrong, e.g. zero for non-empty streams, only skip parsing the stream
// when we can be absolutely certain that it actually is empty.
if (maybeLength === 0) {
warn('Empty "' + name + '" stream.');
2012-10-23 00:53:15 +09:00
return new NullStream(stream);
}
try {
if (params && this.xref) {
params = this.xref.fetchIfRef(params);
2014-09-09 22:29:31 +09:00
}
var xrefStreamStats = this.xref.stats.streamTypes;
if (name === 'FlateDecode' || name === 'Fl') {
xrefStreamStats[StreamType.FLATE] = true;
if (params) {
return new PredictorStream(new FlateStream(stream, maybeLength),
maybeLength, params);
}
return new FlateStream(stream, maybeLength);
2011-10-25 08:55:23 +09:00
}
if (name === 'LZWDecode' || name === 'LZW') {
xrefStreamStats[StreamType.LZW] = true;
var earlyChange = 1;
if (params) {
if (params.has('EarlyChange')) {
earlyChange = params.get('EarlyChange');
}
return new PredictorStream(
new LZWStream(stream, maybeLength, earlyChange),
maybeLength, params);
}
return new LZWStream(stream, maybeLength, earlyChange);
2011-10-25 08:55:23 +09:00
}
if (name === 'DCTDecode' || name === 'DCT') {
xrefStreamStats[StreamType.DCT] = true;
return new JpegStream(stream, maybeLength, stream.dict);
}
if (name === 'JPXDecode' || name === 'JPX') {
xrefStreamStats[StreamType.JPX] = true;
return new JpxStream(stream, maybeLength, stream.dict);
}
if (name === 'ASCII85Decode' || name === 'A85') {
xrefStreamStats[StreamType.A85] = true;
return new Ascii85Stream(stream, maybeLength);
}
if (name === 'ASCIIHexDecode' || name === 'AHx') {
xrefStreamStats[StreamType.AHX] = true;
return new AsciiHexStream(stream, maybeLength);
}
if (name === 'CCITTFaxDecode' || name === 'CCF') {
xrefStreamStats[StreamType.CCF] = true;
return new CCITTFaxStream(stream, maybeLength, params);
}
if (name === 'RunLengthDecode' || name === 'RL') {
xrefStreamStats[StreamType.RL] = true;
return new RunLengthStream(stream, maybeLength);
}
if (name === 'JBIG2Decode') {
xrefStreamStats[StreamType.JBIG] = true;
return new Jbig2Stream(stream, maybeLength, stream.dict);
}
warn('filter "' + name + '" not supported yet');
return stream;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Invalid stream: \"' + ex + '\"');
return new NullStream(stream);
2012-04-17 03:34:00 +09:00
}
2011-10-25 08:55:23 +09:00
}
};
2011-12-09 07:18:43 +09:00
return Parser;
2011-10-25 08:55:23 +09:00
})();
2011-12-09 07:18:43 +09:00
var Lexer = (function LexerClosure() {
2012-05-21 03:44:03 +09:00
function Lexer(stream, knownCommands) {
2011-10-25 08:55:23 +09:00
this.stream = stream;
2013-07-01 05:45:15 +09:00
this.nextChar();
// While lexing, we build up many strings one char at a time. Using += for
// this can result in lots of garbage strings. It's better to build an
// array of single-char strings and then join() them together at the end.
// And reusing a single array (i.e. |this.strBuf|) over and over for this
// purpose uses less memory than using a new array for each string.
this.strBuf = [];
2012-05-22 05:23:49 +09:00
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
2012-05-21 03:44:03 +09:00
this.knownCommands = knownCommands;
2011-10-25 08:55:23 +09:00
}
// A '1' in this array means the character is white space. A '1' or
2011-10-25 08:55:23 +09:00
// '2' means the character ends a name or command.
var specialChars = [
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
2011-10-25 08:55:23 +09:00
];
function toHexDigit(ch) {
2013-07-01 05:45:15 +09:00
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
return ch & 0x0F;
}
if ((ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) {
// 'A'-'F', 'a'-'f'
return (ch & 0x0F) + 9;
}
2011-10-25 08:55:23 +09:00
return -1;
}
2011-12-09 07:18:43 +09:00
Lexer.prototype = {
2013-07-01 05:45:15 +09:00
nextChar: function Lexer_nextChar() {
return (this.currentChar = this.stream.getByte());
},
peekChar: function Lexer_peekChar() {
return this.stream.peekByte();
},
2013-07-01 05:45:15 +09:00
getNumber: function Lexer_getNumber() {
var ch = this.currentChar;
var eNotation = false;
var divideBy = 0; // different from 0 if it's a floating point value
var sign = 1;
if (ch === 0x2D) { // '-'
sign = -1;
ch = this.nextChar();
if (ch === 0x2D) { // '-'
// Ignore double negative (this is consistent with Adobe Reader).
ch = this.nextChar();
}
} else if (ch === 0x2B) { // '+'
ch = this.nextChar();
}
if (ch === 0x2E) { // '.'
divideBy = 10;
ch = this.nextChar();
}
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
error('Invalid number: ' + String.fromCharCode(ch));
return 0;
}
var baseValue = ch - 0x30; // '0'
var powerValue = 0;
var powerValueSign = 1;
2013-07-01 05:45:15 +09:00
while ((ch = this.nextChar()) >= 0) {
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
var currentDigit = ch - 0x30; // '0'
if (eNotation) { // We are after an 'e' or 'E'
powerValue = powerValue * 10 + currentDigit;
} else {
if (divideBy !== 0) { // We are after a point
divideBy *= 10;
}
baseValue = baseValue * 10 + currentDigit;
}
} else if (ch === 0x2E) { // '.'
if (divideBy === 0) {
divideBy = 1;
} else {
// A number can have only one '.'
break;
}
2013-07-01 05:45:15 +09:00
} else if (ch === 0x2D) { // '-'
2011-10-25 08:55:23 +09:00
// ignore minus signs in the middle of numbers to match
// Adobe's behavior
2016-07-17 21:33:41 +09:00
warn('Badly formatted number');
2013-07-01 05:45:15 +09:00
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
// 'E' can be either a scientific notation or the beginning of a new
// operator
ch = this.peekChar();
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
powerValueSign = (ch === 0x2D) ? -1 : 1;
this.nextChar(); // Consume the sign character
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
// The 'E' must be the beginning of a new operator
break;
}
eNotation = true;
2011-10-25 08:55:23 +09:00
} else {
// the last character doesn't belong to us
break;
}
}
if (divideBy !== 0) {
baseValue /= divideBy;
}
if (eNotation) {
baseValue *= Math.pow(10, powerValueSign * powerValue);
}
return sign * baseValue;
2011-10-25 08:55:23 +09:00
},
getString: function Lexer_getString() {
2011-10-25 08:55:23 +09:00
var numParen = 1;
var done = false;
var strBuf = this.strBuf;
strBuf.length = 0;
2013-07-01 05:45:15 +09:00
var ch = this.nextChar();
while (true) {
var charBuffered = false;
switch (ch | 0) {
case -1:
2011-10-25 08:55:23 +09:00
warn('Unterminated string');
done = true;
break;
2013-07-01 05:45:15 +09:00
case 0x28: // '('
2011-10-25 08:55:23 +09:00
++numParen;
strBuf.push('(');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x29: // ')'
if (--numParen === 0) {
2013-07-01 05:45:15 +09:00
this.nextChar(); // consume strings ')'
2011-10-25 08:55:23 +09:00
done = true;
} else {
strBuf.push(')');
2011-10-25 08:55:23 +09:00
}
break;
2013-07-01 05:45:15 +09:00
case 0x5C: // '\\'
ch = this.nextChar();
2011-10-25 08:55:23 +09:00
switch (ch) {
2013-07-01 05:45:15 +09:00
case -1:
2011-10-25 08:55:23 +09:00
warn('Unterminated string');
done = true;
break;
2013-07-01 05:45:15 +09:00
case 0x6E: // 'n'
strBuf.push('\n');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x72: // 'r'
strBuf.push('\r');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x74: // 't'
strBuf.push('\t');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x62: // 'b'
strBuf.push('\b');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x66: // 'f'
strBuf.push('\f');
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x5C: // '\'
case 0x28: // '('
case 0x29: // ')'
strBuf.push(String.fromCharCode(ch));
2011-10-25 08:55:23 +09:00
break;
2013-07-01 05:45:15 +09:00
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
var x = ch & 0x0F;
ch = this.nextChar();
charBuffered = true;
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
x = (x << 3) + (ch & 0x0F);
ch = this.nextChar();
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
charBuffered = false;
x = (x << 3) + (ch & 0x0F);
2011-10-25 08:55:23 +09:00
}
}
strBuf.push(String.fromCharCode(x));
2011-10-25 08:55:23 +09:00
break;
2014-03-21 01:50:12 +09:00
case 0x0D: // CR
if (this.peekChar() === 0x0A) { // LF
this.nextChar();
}
break;
case 0x0A: // LF
2011-10-25 08:55:23 +09:00
break;
default:
strBuf.push(String.fromCharCode(ch));
2013-02-24 02:35:18 +09:00
break;
2011-10-25 08:55:23 +09:00
}
break;
default:
strBuf.push(String.fromCharCode(ch));
2013-02-24 02:35:18 +09:00
break;
2011-10-25 08:55:23 +09:00
}
2013-07-01 05:45:15 +09:00
if (done) {
break;
}
if (!charBuffered) {
ch = this.nextChar();
}
}
return strBuf.join('');
2011-10-25 08:55:23 +09:00
},
2013-07-01 05:45:15 +09:00
getName: function Lexer_getName() {
var ch, previousCh;
var strBuf = this.strBuf;
strBuf.length = 0;
2013-07-01 05:45:15 +09:00
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
if (ch === 0x23) { // '#'
ch = this.nextChar();
if (specialChars[ch]) {
warn('Lexer_getName: ' +
'NUMBER SIGN (#) should be followed by a hexadecimal number.');
strBuf.push('#');
break;
}
2011-10-25 08:55:23 +09:00
var x = toHexDigit(ch);
if (x !== -1) {
previousCh = ch;
ch = this.nextChar();
var x2 = toHexDigit(ch);
if (x2 === -1) {
warn('Lexer_getName: Illegal digit (' +
String.fromCharCode(ch) +') in hexadecimal number.');
strBuf.push('#', String.fromCharCode(previousCh));
if (specialChars[ch]) {
break;
}
strBuf.push(String.fromCharCode(ch));
continue;
}
strBuf.push(String.fromCharCode((x << 4) | x2));
2011-10-25 08:55:23 +09:00
} else {
strBuf.push('#', String.fromCharCode(ch));
2011-10-25 08:55:23 +09:00
}
} else {
strBuf.push(String.fromCharCode(ch));
2011-10-25 08:55:23 +09:00
}
}
if (strBuf.length > 127) {
warn('name token is longer than allowed by the spec: ' + strBuf.length);
2013-07-01 05:45:15 +09:00
}
return Name.get(strBuf.join(''));
2011-10-25 08:55:23 +09:00
},
2013-07-01 05:45:15 +09:00
getHexString: function Lexer_getHexString() {
var strBuf = this.strBuf;
strBuf.length = 0;
2013-07-01 05:45:15 +09:00
var ch = this.currentChar;
var isFirstHex = true;
var firstDigit;
var secondDigit;
while (true) {
2013-07-01 05:45:15 +09:00
if (ch < 0) {
2011-10-25 08:55:23 +09:00
warn('Unterminated hex string');
break;
2013-07-01 05:45:15 +09:00
} else if (ch === 0x3E) { // '>'
this.nextChar();
break;
2013-07-01 05:45:15 +09:00
} else if (specialChars[ch] === 1) {
ch = this.nextChar();
continue;
} else {
if (isFirstHex) {
firstDigit = toHexDigit(ch);
if (firstDigit === -1) {
2013-02-03 08:00:13 +09:00
warn('Ignoring invalid character "' + ch + '" in hex string');
2013-07-01 05:45:15 +09:00
ch = this.nextChar();
continue;
}
} else {
secondDigit = toHexDigit(ch);
if (secondDigit === -1) {
2013-02-03 08:00:13 +09:00
warn('Ignoring invalid character "' + ch + '" in hex string');
2013-07-01 05:45:15 +09:00
ch = this.nextChar();
continue;
}
strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
}
isFirstHex = !isFirstHex;
2013-07-01 05:45:15 +09:00
ch = this.nextChar();
2011-10-25 08:55:23 +09:00
}
}
return strBuf.join('');
2011-10-25 08:55:23 +09:00
},
getObj: function Lexer_getObj() {
2011-10-25 08:55:23 +09:00
// skip whitespace and comments
var comment = false;
2013-07-01 05:45:15 +09:00
var ch = this.currentChar;
2011-10-25 08:55:23 +09:00
while (true) {
2013-07-01 05:45:15 +09:00
if (ch < 0) {
2011-10-25 08:55:23 +09:00
return EOF;
2013-07-01 05:45:15 +09:00
}
2011-10-25 08:55:23 +09:00
if (comment) {
if (ch === 0x0A || ch === 0x0D) { // LF, CR
2011-10-25 08:55:23 +09:00
comment = false;
}
2013-07-01 05:45:15 +09:00
} else if (ch === 0x25) { // '%'
2011-10-25 08:55:23 +09:00
comment = true;
2013-07-01 05:45:15 +09:00
} else if (specialChars[ch] !== 1) {
2011-10-25 08:55:23 +09:00
break;
}
2013-07-01 05:45:15 +09:00
ch = this.nextChar();
2011-10-25 08:55:23 +09:00
}
// start reading token
2013-07-01 05:45:15 +09:00
switch (ch | 0) {
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
case 0x2B: case 0x2D: case 0x2E: // '+', '-', '.'
return this.getNumber();
case 0x28: // '('
2011-10-25 08:55:23 +09:00
return this.getString();
2013-07-01 05:45:15 +09:00
case 0x2F: // '/'
return this.getName();
2011-10-25 08:55:23 +09:00
// array punctuation
2013-07-01 05:45:15 +09:00
case 0x5B: // '['
this.nextChar();
return Cmd.get('[');
case 0x5D: // ']'
this.nextChar();
return Cmd.get(']');
2011-10-25 08:55:23 +09:00
// hex string or dict punctuation
2013-07-01 05:45:15 +09:00
case 0x3C: // '<'
ch = this.nextChar();
if (ch === 0x3C) {
2011-10-25 08:55:23 +09:00
// dict punctuation
2013-07-01 05:45:15 +09:00
this.nextChar();
return Cmd.get('<<');
2011-10-25 08:55:23 +09:00
}
2013-07-01 05:45:15 +09:00
return this.getHexString();
2011-10-25 08:55:23 +09:00
// dict punctuation
2013-07-01 05:45:15 +09:00
case 0x3E: // '>'
ch = this.nextChar();
if (ch === 0x3E) {
this.nextChar();
return Cmd.get('>>');
2011-10-25 08:55:23 +09:00
}
2013-07-01 05:45:15 +09:00
return Cmd.get('>');
case 0x7B: // '{'
this.nextChar();
return Cmd.get('{');
case 0x7D: // '}'
this.nextChar();
return Cmd.get('}');
case 0x29: // ')'
2011-10-25 08:55:23 +09:00
error('Illegal character: ' + ch);
2013-07-01 05:45:15 +09:00
break;
2011-10-25 08:55:23 +09:00
}
// command
2013-07-01 05:45:15 +09:00
var str = String.fromCharCode(ch);
2012-05-21 03:44:03 +09:00
var knownCommands = this.knownCommands;
2014-06-02 19:14:53 +09:00
var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
2013-07-01 05:45:15 +09:00
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
2012-05-21 03:44:03 +09:00
// stop if known command is found and next character does not make
// the str a command
2013-07-01 05:45:15 +09:00
var possibleCommand = str + String.fromCharCode(ch);
2014-06-02 19:14:53 +09:00
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
2012-05-21 03:44:03 +09:00
break;
2013-07-01 05:45:15 +09:00
}
if (str.length === 128) {
2011-10-25 08:55:23 +09:00
error('Command token too long: ' + str.length);
}
2013-07-01 05:45:15 +09:00
str = possibleCommand;
2014-06-02 19:14:53 +09:00
knownCommandFound = knownCommands && knownCommands[str] !== undefined;
2011-10-25 08:55:23 +09:00
}
if (str === 'true') {
2011-10-25 08:55:23 +09:00
return true;
}
if (str === 'false') {
2011-10-25 08:55:23 +09:00
return false;
}
if (str === 'null') {
2011-10-25 08:55:23 +09:00
return null;
}
return Cmd.get(str);
2011-10-25 08:55:23 +09:00
},
skipToNextLine: function Lexer_skipToNextLine() {
2013-07-01 05:45:15 +09:00
var ch = this.currentChar;
while (ch >= 0) {
if (ch === 0x0D) { // CR
ch = this.nextChar();
if (ch === 0x0A) { // LF
this.nextChar();
}
break;
} else if (ch === 0x0A) { // LF
this.nextChar();
break;
2011-10-25 08:55:23 +09:00
}
2013-07-01 05:45:15 +09:00
ch = this.nextChar();
2011-10-25 08:55:23 +09:00
}
}
};
2011-12-09 07:18:43 +09:00
return Lexer;
2011-10-25 08:55:23 +09:00
})();
var Linearization = {
create: function LinearizationCreate(stream) {
function getInt(name, allowZeroValue) {
var obj = linDict.get(name);
if (isInt(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
2011-10-25 08:55:23 +09:00
return obj;
}
throw new Error('The "' + name + '" parameter in the linearization ' +
'dictionary is invalid.');
}
function getHints() {
var hints = linDict.get('H'), hintsLength, item;
if (isArray(hints) &&
((hintsLength = hints.length) === 2 || hintsLength === 4)) {
for (var index = 0; index < hintsLength; index++) {
if (!(isInt(item = hints[index]) && item > 0)) {
throw new Error('Hint (' + index +
') in the linearization dictionary is invalid.');
}
}
return hints;
}
throw new Error('Hint array in the linearization dictionary is invalid.');
2011-10-25 08:55:23 +09:00
}
var parser = new Parser(new Lexer(stream), false, null);
var obj1 = parser.getObj();
var obj2 = parser.getObj();
var obj3 = parser.getObj();
var linDict = parser.getObj();
var obj, length;
if (!(isInt(obj1) && isInt(obj2) && isCmd(obj3, 'obj') && isDict(linDict) &&
isNum(obj = linDict.get('Linearized')) && obj > 0)) {
return null; // No valid linearization dictionary found.
} else if ((length = getInt('L')) !== stream.length) {
throw new Error('The "L" parameter in the linearization dictionary ' +
'does not equal the stream length.');
}
return {
length: length,
hints: getHints(),
objectNumberFirst: getInt('O'),
endFirst: getInt('E'),
numPages: getInt('N'),
mainXRefEntriesOffset: getInt('T'),
pageFirst: (linDict.has('P') ? getInt('P', true) : 0)
};
}
};
exports.EOF = EOF;
exports.Lexer = Lexer;
exports.Linearization = Linearization;
exports.Parser = Parser;
exports.isEOF = isEOF;
}));