2012-09-01 07:48:21 +09:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2011-10-26 10:18:22 +09:00
|
|
|
|
|
|
|
'use strict';
|
|
|
|
|
2015-11-22 01:32:47 +09:00
|
|
|
(function (root, factory) {
|
|
|
|
if (typeof define === 'function' && define.amd) {
|
|
|
|
define('pdfjs/core/parser', ['exports', 'pdfjs/shared/util',
|
|
|
|
'pdfjs/core/primitives', 'pdfjs/core/stream'], factory);
|
|
|
|
} else if (typeof exports !== 'undefined') {
|
|
|
|
factory(exports, require('../shared/util.js'), require('./primitives.js'),
|
|
|
|
require('./stream.js'));
|
|
|
|
} else {
|
|
|
|
factory((root.pdfjsCoreParser = {}), root.pdfjsSharedUtil,
|
|
|
|
root.pdfjsCorePrimitives, root.pdfjsCoreStream);
|
|
|
|
}
|
|
|
|
}(this, function (exports, sharedUtil, corePrimitives, coreStream) {
|
|
|
|
|
|
|
|
var MissingDataException = sharedUtil.MissingDataException;
|
|
|
|
var StreamType = sharedUtil.StreamType;
|
|
|
|
var assert = sharedUtil.assert;
|
|
|
|
var error = sharedUtil.error;
|
|
|
|
var info = sharedUtil.info;
|
|
|
|
var isArray = sharedUtil.isArray;
|
|
|
|
var isInt = sharedUtil.isInt;
|
|
|
|
var isNum = sharedUtil.isNum;
|
|
|
|
var isString = sharedUtil.isString;
|
|
|
|
var warn = sharedUtil.warn;
|
2017-01-27 21:34:37 +09:00
|
|
|
var EOF = corePrimitives.EOF;
|
2015-11-22 01:32:47 +09:00
|
|
|
var Cmd = corePrimitives.Cmd;
|
|
|
|
var Dict = corePrimitives.Dict;
|
|
|
|
var Name = corePrimitives.Name;
|
|
|
|
var Ref = corePrimitives.Ref;
|
2017-01-27 21:34:37 +09:00
|
|
|
var isEOF = corePrimitives.isEOF;
|
2015-11-22 01:32:47 +09:00
|
|
|
var isCmd = corePrimitives.isCmd;
|
|
|
|
var isDict = corePrimitives.isDict;
|
|
|
|
var isName = corePrimitives.isName;
|
|
|
|
var Ascii85Stream = coreStream.Ascii85Stream;
|
|
|
|
var AsciiHexStream = coreStream.AsciiHexStream;
|
|
|
|
var CCITTFaxStream = coreStream.CCITTFaxStream;
|
|
|
|
var FlateStream = coreStream.FlateStream;
|
|
|
|
var Jbig2Stream = coreStream.Jbig2Stream;
|
|
|
|
var JpegStream = coreStream.JpegStream;
|
|
|
|
var JpxStream = coreStream.JpxStream;
|
|
|
|
var LZWStream = coreStream.LZWStream;
|
|
|
|
var NullStream = coreStream.NullStream;
|
|
|
|
var PredictorStream = coreStream.PredictorStream;
|
|
|
|
var RunLengthStream = coreStream.RunLengthStream;
|
|
|
|
|
2014-10-27 01:03:44 +09:00
|
|
|
var MAX_LENGTH_TO_CACHE = 1000;
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
var Parser = (function ParserClosure() {
|
2016-02-25 01:56:28 +09:00
|
|
|
function Parser(lexer, allowStreams, xref, recoveryMode) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.lexer = lexer;
|
|
|
|
this.allowStreams = allowStreams;
|
|
|
|
this.xref = xref;
|
2016-02-25 01:56:28 +09:00
|
|
|
this.recoveryMode = recoveryMode || false;
|
2016-01-28 02:04:13 +09:00
|
|
|
this.imageCache = Object.create(null);
|
2011-10-25 08:55:23 +09:00
|
|
|
this.refill();
|
|
|
|
}
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
Parser.prototype = {
|
2012-04-05 05:43:26 +09:00
|
|
|
refill: function Parser_refill() {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.buf1 = this.lexer.getObj();
|
|
|
|
this.buf2 = this.lexer.getObj();
|
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
shift: function Parser_shift() {
|
2011-10-25 08:55:23 +09:00
|
|
|
if (isCmd(this.buf2, 'ID')) {
|
|
|
|
this.buf1 = this.buf2;
|
|
|
|
this.buf2 = null;
|
|
|
|
} else {
|
|
|
|
this.buf1 = this.buf2;
|
|
|
|
this.buf2 = this.lexer.getObj();
|
|
|
|
}
|
|
|
|
},
|
2015-07-11 19:15:43 +09:00
|
|
|
tryShift: function Parser_tryShift() {
|
|
|
|
try {
|
|
|
|
this.shift();
|
|
|
|
return true;
|
|
|
|
} catch (e) {
|
2015-08-14 23:25:35 +09:00
|
|
|
if (e instanceof MissingDataException) {
|
|
|
|
throw e;
|
|
|
|
}
|
2015-07-11 19:15:43 +09:00
|
|
|
// Upon failure, the caller should reset this.lexer.pos to a known good
|
|
|
|
// state and call this.shift() twice to reset the buffers.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
getObj: function Parser_getObj(cipherTransform) {
|
2014-05-23 16:25:36 +09:00
|
|
|
var buf1 = this.buf1;
|
|
|
|
this.shift();
|
|
|
|
|
|
|
|
if (buf1 instanceof Cmd) {
|
|
|
|
switch (buf1.cmd) {
|
|
|
|
case 'BI': // inline image
|
|
|
|
return this.makeInlineImage(cipherTransform);
|
|
|
|
case '[': // array
|
|
|
|
var array = [];
|
|
|
|
while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
|
|
|
|
array.push(this.getObj(cipherTransform));
|
|
|
|
}
|
|
|
|
if (isEOF(this.buf1)) {
|
2016-02-25 01:56:28 +09:00
|
|
|
if (!this.recoveryMode) {
|
|
|
|
error('End of file inside array');
|
|
|
|
}
|
|
|
|
return array;
|
2014-05-23 16:25:36 +09:00
|
|
|
}
|
2013-05-01 07:29:25 +09:00
|
|
|
this.shift();
|
2014-05-23 16:25:36 +09:00
|
|
|
return array;
|
|
|
|
case '<<': // dictionary or stream
|
|
|
|
var dict = new Dict(this.xref);
|
|
|
|
while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
|
|
|
|
if (!isName(this.buf1)) {
|
|
|
|
info('Malformed dictionary: key must be a name object');
|
|
|
|
this.shift();
|
|
|
|
continue;
|
|
|
|
}
|
2012-03-20 21:16:48 +09:00
|
|
|
|
2014-05-23 16:25:36 +09:00
|
|
|
var key = this.buf1.name;
|
|
|
|
this.shift();
|
|
|
|
if (isEOF(this.buf1)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
|
|
}
|
|
|
|
if (isEOF(this.buf1)) {
|
2016-02-25 01:56:28 +09:00
|
|
|
if (!this.recoveryMode) {
|
|
|
|
error('End of file inside dictionary');
|
|
|
|
}
|
|
|
|
return dict;
|
2014-05-23 16:25:36 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-05-23 16:25:36 +09:00
|
|
|
// Stream objects are not allowed inside content streams or
|
|
|
|
// object streams.
|
|
|
|
if (isCmd(this.buf2, 'stream')) {
|
|
|
|
return (this.allowStreams ?
|
|
|
|
this.makeStream(dict, cipherTransform) : dict);
|
|
|
|
}
|
|
|
|
this.shift();
|
|
|
|
return dict;
|
|
|
|
default: // simple object
|
|
|
|
return buf1;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
|
|
|
|
if (isInt(buf1)) { // indirect reference or integer
|
|
|
|
var num = buf1;
|
2011-10-25 08:55:23 +09:00
|
|
|
if (isInt(this.buf1) && isCmd(this.buf2, 'R')) {
|
|
|
|
var ref = new Ref(num, this.buf1);
|
|
|
|
this.shift();
|
|
|
|
this.shift();
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
return num;
|
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
|
|
|
|
if (isString(buf1)) { // string
|
|
|
|
var str = buf1;
|
2014-03-21 04:28:22 +09:00
|
|
|
if (cipherTransform) {
|
2011-10-25 08:55:23 +09:00
|
|
|
str = cipherTransform.decryptString(str);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
// simple object
|
2014-05-23 16:25:36 +09:00
|
|
|
return buf1;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2014-10-05 07:12:47 +09:00
|
|
|
/**
|
|
|
|
* Find the end of the stream by searching for the /EI\s/.
|
|
|
|
* @returns {number} The inline stream length.
|
|
|
|
*/
|
|
|
|
findDefaultInlineStreamEnd:
|
|
|
|
function Parser_findDefaultInlineStreamEnd(stream) {
|
|
|
|
var E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD;
|
|
|
|
var startPos = stream.pos, state = 0, ch, i, n, followingBytes;
|
2014-08-14 15:25:45 +09:00
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
|
|
if (state === 0) {
|
|
|
|
state = (ch === E) ? 1 : 0;
|
|
|
|
} else if (state === 1) {
|
|
|
|
state = (ch === I) ? 2 : 0;
|
|
|
|
} else {
|
|
|
|
assert(state === 2);
|
2014-10-05 07:12:47 +09:00
|
|
|
if (ch === SPACE || ch === LF || ch === CR) {
|
2014-08-14 15:25:45 +09:00
|
|
|
// Let's check the next five bytes are ASCII... just be sure.
|
2014-10-05 07:12:47 +09:00
|
|
|
n = 5;
|
|
|
|
followingBytes = stream.peekBytes(n);
|
2014-08-14 15:25:45 +09:00
|
|
|
for (i = 0; i < n; i++) {
|
2013-06-22 07:03:03 +09:00
|
|
|
ch = followingBytes[i];
|
2014-10-05 07:12:47 +09:00
|
|
|
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
|
2014-08-14 15:25:45 +09:00
|
|
|
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
|
|
|
|
// it's binary stuff. Resetting the state.
|
2013-06-22 07:03:03 +09:00
|
|
|
state = 0;
|
2014-08-14 15:25:45 +09:00
|
|
|
break;
|
2013-06-22 07:03:03 +09:00
|
|
|
}
|
|
|
|
}
|
2014-08-14 15:25:45 +09:00
|
|
|
if (state === 2) {
|
2014-10-05 07:12:47 +09:00
|
|
|
break; // Finished!
|
2014-08-14 15:25:45 +09:00
|
|
|
}
|
|
|
|
} else {
|
2011-10-25 08:55:23 +09:00
|
|
|
state = 0;
|
2014-08-14 15:25:45 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2014-10-05 07:12:47 +09:00
|
|
|
return ((stream.pos - 4) - startPos);
|
|
|
|
},
|
2014-12-22 00:07:07 +09:00
|
|
|
/**
|
|
|
|
* Find the EOI (end-of-image) marker 0xFFD9 of the stream.
|
|
|
|
* @returns {number} The inline stream length.
|
|
|
|
*/
|
|
|
|
findDCTDecodeInlineStreamEnd:
|
|
|
|
function Parser_findDCTDecodeInlineStreamEnd(stream) {
|
|
|
|
var startPos = stream.pos, foundEOI = false, b, markerLength, length;
|
|
|
|
while ((b = stream.getByte()) !== -1) {
|
|
|
|
if (b !== 0xFF) { // Not a valid marker.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
switch (stream.getByte()) {
|
|
|
|
case 0x00: // Byte stuffing.
|
|
|
|
// 0xFF00 appears to be a very common byte sequence in JPEG images.
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xFF: // Fill byte.
|
|
|
|
// Avoid skipping a valid marker, resetting the stream position.
|
|
|
|
stream.skip(-1);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xD9: // EOI
|
|
|
|
foundEOI = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xC0: // SOF0
|
|
|
|
case 0xC1: // SOF1
|
|
|
|
case 0xC2: // SOF2
|
|
|
|
case 0xC3: // SOF3
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xC5: // SOF5
|
|
|
|
case 0xC6: // SOF6
|
|
|
|
case 0xC7: // SOF7
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xC9: // SOF9
|
|
|
|
case 0xCA: // SOF10
|
|
|
|
case 0xCB: // SOF11
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xCD: // SOF13
|
|
|
|
case 0xCE: // SOF14
|
|
|
|
case 0xCF: // SOF15
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xC4: // DHT
|
|
|
|
case 0xCC: // DAC
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xDA: // SOS
|
|
|
|
case 0xDB: // DQT
|
|
|
|
case 0xDC: // DNL
|
|
|
|
case 0xDD: // DRI
|
|
|
|
case 0xDE: // DHP
|
|
|
|
case 0xDF: // EXP
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xE0: // APP0
|
|
|
|
case 0xE1: // APP1
|
|
|
|
case 0xE2: // APP2
|
|
|
|
case 0xE3: // APP3
|
|
|
|
case 0xE4: // APP4
|
|
|
|
case 0xE5: // APP5
|
|
|
|
case 0xE6: // APP6
|
|
|
|
case 0xE7: // APP7
|
|
|
|
case 0xE8: // APP8
|
|
|
|
case 0xE9: // APP9
|
|
|
|
case 0xEA: // APP10
|
|
|
|
case 0xEB: // APP11
|
|
|
|
case 0xEC: // APP12
|
|
|
|
case 0xED: // APP13
|
|
|
|
case 0xEE: // APP14
|
|
|
|
case 0xEF: // APP15
|
Switch to using ESLint, instead of JSHint, for linting
*Please note that most of the necessary code adjustments were made in PR 7890.*
ESLint has a number of advantageous properties, compared to JSHint. Among those are:
- The ability to find subtle bugs, thanks to more rules (e.g. PR 7881).
- Much more customizable in general, and many rules allow fine-tuned behaviour rather than the just the on/off rules in JSHint.
- Many more rules that can help developers avoid bugs, and a lot of rules that can be used to enforce a consistent coding style. The latter should be particularily useful for new contributors (and reduce the amount of stylistic review comments necessary).
- The ability to easily specify exactly what rules to use/not to use, as opposed to JSHint which has a default set. *Note:* in future JSHint version some of the rules we depend on will be removed, according to warnings in http://jshint.com/docs/options/, so we wouldn't be able to update without losing lint coverage.
- More easily disable one, or more, rules temporarily. In JSHint this requires using a numeric code, which isn't very user friendly, whereas in ESLint the rule name is simply used instead.
By default there's no rules enabled in ESLint, but there are some default rule sets available. However, to prevent linting failures if we update ESLint in the future, it seemed easier to just explicitly specify what rules we want.
Obviously this makes the ESLint config file somewhat bigger than the old JSHint config file, but given how rarely that one has been updated over the years I don't think that matters too much.
I've tried, to the best of my ability, to ensure that we enable the same rules for ESLint that we had for JSHint. Furthermore, I've also enabled a number of rules that seemed to make sense, both to catch possible errors *and* various style guide violations.
Despite the ESLint README claiming that it's slower that JSHint, https://github.com/eslint/eslint#how-does-eslint-performance-compare-to-jshint, locally this patch actually reduces the runtime for `gulp` lint (by approximately 20-25%).
A couple of stylistic rules that would have been nice to enable, but where our code currently differs to much to make it feasible:
- `comma-dangle`, controls trailing commas in Objects and Arrays (among others).
- `object-curly-spacing`, controls spacing inside of Objects.
- `spaced-comment`, used to enforce spaces after `//` and `/*. (This is made difficult by the fact that there's still some usage of the old preprocessor left.)
Rules that I indend to look into possibly enabling in follow-ups, if it seems to make sense: `no-else-return`, `no-lonely-if`, `brace-style` with the `allowSingleLine` parameter removed.
Useful links:
- http://eslint.org/docs/user-guide/configuring
- http://eslint.org/docs/rules/
2016-12-15 23:52:29 +09:00
|
|
|
/* falls through */
|
2014-12-22 00:07:07 +09:00
|
|
|
case 0xFE: // COM
|
|
|
|
// The marker should be followed by the length of the segment.
|
|
|
|
markerLength = stream.getUint16();
|
|
|
|
if (markerLength > 2) {
|
|
|
|
// |markerLength| contains the byte length of the marker segment,
|
|
|
|
// including its own length (2 bytes) and excluding the marker.
|
|
|
|
stream.skip(markerLength - 2); // Jump to the next marker.
|
|
|
|
} else {
|
|
|
|
// The marker length is invalid, resetting the stream position.
|
|
|
|
stream.skip(-2);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (foundEOI) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
length = stream.pos - startPos;
|
|
|
|
if (b === -1) {
|
|
|
|
warn('Inline DCTDecode image stream: ' +
|
|
|
|
'EOI marker not found, searching for /EI/ instead.');
|
|
|
|
stream.skip(-length); // Reset the stream position.
|
|
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
|
|
return length;
|
|
|
|
},
|
2014-10-05 07:12:47 +09:00
|
|
|
/**
|
|
|
|
* Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
|
|
|
|
* @returns {number} The inline stream length.
|
|
|
|
*/
|
|
|
|
findASCII85DecodeInlineStreamEnd:
|
|
|
|
function Parser_findASCII85DecodeInlineStreamEnd(stream) {
|
|
|
|
var TILDE = 0x7E, GT = 0x3E;
|
|
|
|
var startPos = stream.pos, ch, length;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
|
|
if (ch === TILDE && stream.peekByte() === GT) {
|
|
|
|
stream.skip();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
length = stream.pos - startPos;
|
|
|
|
if (ch === -1) {
|
|
|
|
warn('Inline ASCII85Decode image stream: ' +
|
|
|
|
'EOD marker not found, searching for /EI/ instead.');
|
|
|
|
stream.skip(-length); // Reset the stream position.
|
|
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
|
|
return length;
|
|
|
|
},
|
|
|
|
/**
|
|
|
|
* Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
|
|
|
|
* @returns {number} The inline stream length.
|
|
|
|
*/
|
|
|
|
findASCIIHexDecodeInlineStreamEnd:
|
|
|
|
function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
|
|
|
|
var GT = 0x3E;
|
|
|
|
var startPos = stream.pos, ch, length;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
|
|
if (ch === GT) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
length = stream.pos - startPos;
|
|
|
|
if (ch === -1) {
|
|
|
|
warn('Inline ASCIIHexDecode image stream: ' +
|
|
|
|
'EOD marker not found, searching for /EI/ instead.');
|
|
|
|
stream.skip(-length); // Reset the stream position.
|
|
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
|
|
return length;
|
|
|
|
},
|
|
|
|
/**
|
|
|
|
* Skip over the /EI/ for streams where we search for an EOD marker.
|
|
|
|
*/
|
|
|
|
inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
|
|
|
|
var E = 0x45, I = 0x49;
|
|
|
|
var state = 0, ch;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
|
|
if (state === 0) {
|
|
|
|
state = (ch === E) ? 1 : 0;
|
|
|
|
} else if (state === 1) {
|
|
|
|
state = (ch === I) ? 2 : 0;
|
|
|
|
} else if (state === 2) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
|
|
|
|
var lexer = this.lexer;
|
|
|
|
var stream = lexer.stream;
|
|
|
|
|
|
|
|
// Parse dictionary.
|
2015-09-03 00:05:34 +09:00
|
|
|
var dict = new Dict(this.xref);
|
2014-10-05 07:12:47 +09:00
|
|
|
while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
|
|
|
|
if (!isName(this.buf1)) {
|
|
|
|
error('Dictionary key must be a name object');
|
|
|
|
}
|
|
|
|
var key = this.buf1.name;
|
|
|
|
this.shift();
|
|
|
|
if (isEOF(this.buf1)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-10-05 07:12:47 +09:00
|
|
|
// Extract the name of the first (i.e. the current) image filter.
|
2015-09-03 00:05:34 +09:00
|
|
|
var filter = dict.get('Filter', 'F'), filterName;
|
2014-10-05 07:12:47 +09:00
|
|
|
if (isName(filter)) {
|
|
|
|
filterName = filter.name;
|
2016-12-08 19:55:08 +09:00
|
|
|
} else if (isArray(filter)) {
|
|
|
|
var filterZero = this.xref.fetchIfRef(filter[0]);
|
|
|
|
if (isName(filterZero)) {
|
|
|
|
filterName = filterZero.name;
|
|
|
|
}
|
2014-10-05 07:12:47 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
// Parse image stream.
|
|
|
|
var startPos = stream.pos, length, i, ii;
|
2014-12-22 00:07:07 +09:00
|
|
|
if (filterName === 'DCTDecode' || filterName === 'DCT') {
|
|
|
|
length = this.findDCTDecodeInlineStreamEnd(stream);
|
|
|
|
} else if (filterName === 'ASCII85Decide' || filterName === 'A85') {
|
2014-10-05 07:12:47 +09:00
|
|
|
length = this.findASCII85DecodeInlineStreamEnd(stream);
|
|
|
|
} else if (filterName === 'ASCIIHexDecode' || filterName === 'AHx') {
|
|
|
|
length = this.findASCIIHexDecodeInlineStreamEnd(stream);
|
|
|
|
} else {
|
|
|
|
length = this.findDefaultInlineStreamEnd(stream);
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
var imageStream = stream.makeSubStream(startPos, length, dict);
|
2014-02-25 00:59:02 +09:00
|
|
|
|
2014-10-05 07:12:47 +09:00
|
|
|
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
|
2014-10-27 01:03:44 +09:00
|
|
|
// adler32 checksum.
|
|
|
|
var adler32;
|
|
|
|
if (length < MAX_LENGTH_TO_CACHE) {
|
2014-02-25 00:59:02 +09:00
|
|
|
var imageBytes = imageStream.getBytes();
|
|
|
|
imageStream.reset();
|
|
|
|
|
|
|
|
var a = 1;
|
|
|
|
var b = 0;
|
2014-04-08 06:42:54 +09:00
|
|
|
for (i = 0, ii = imageBytes.length; i < ii; ++i) {
|
2014-10-05 07:12:47 +09:00
|
|
|
// No modulo required in the loop if imageBytes.length < 5552.
|
2014-10-27 01:03:44 +09:00
|
|
|
a += imageBytes[i] & 0xff;
|
|
|
|
b += a;
|
2014-02-25 00:59:02 +09:00
|
|
|
}
|
2014-10-27 01:03:44 +09:00
|
|
|
adler32 = ((b % 65521) << 16) | (a % 65521);
|
2014-02-25 00:59:02 +09:00
|
|
|
|
2014-10-27 01:03:44 +09:00
|
|
|
if (this.imageCache.adler32 === adler32) {
|
2014-02-25 00:59:02 +09:00
|
|
|
this.buf2 = Cmd.get('EI');
|
|
|
|
this.shift();
|
|
|
|
|
2014-10-27 01:03:44 +09:00
|
|
|
this.imageCache[adler32].reset();
|
|
|
|
return this.imageCache[adler32];
|
2014-02-25 00:59:02 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cipherTransform) {
|
2014-03-11 14:18:30 +09:00
|
|
|
imageStream = cipherTransform.createStream(imageStream, length);
|
2014-02-25 00:59:02 +09:00
|
|
|
}
|
|
|
|
|
2011-10-25 08:55:23 +09:00
|
|
|
imageStream = this.filter(imageStream, dict, length);
|
2013-05-10 12:26:28 +09:00
|
|
|
imageStream.dict = dict;
|
2014-10-27 01:03:44 +09:00
|
|
|
if (adler32 !== undefined) {
|
2014-02-25 00:59:02 +09:00
|
|
|
imageStream.cacheKey = 'inline_' + length + '_' + adler32;
|
2014-10-27 01:03:44 +09:00
|
|
|
this.imageCache[adler32] = imageStream;
|
2014-02-25 00:59:02 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2011-12-19 04:39:10 +09:00
|
|
|
this.buf2 = Cmd.get('EI');
|
2011-10-25 08:55:23 +09:00
|
|
|
this.shift();
|
|
|
|
|
|
|
|
return imageStream;
|
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
makeStream: function Parser_makeStream(dict, cipherTransform) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var lexer = this.lexer;
|
|
|
|
var stream = lexer.stream;
|
|
|
|
|
|
|
|
// get stream start position
|
|
|
|
lexer.skipToNextLine();
|
2013-07-01 05:45:15 +09:00
|
|
|
var pos = stream.pos - 1;
|
2011-10-25 08:55:23 +09:00
|
|
|
|
|
|
|
// get length
|
2015-09-03 00:05:34 +09:00
|
|
|
var length = dict.get('Length');
|
2013-08-16 23:32:40 +09:00
|
|
|
if (!isInt(length)) {
|
|
|
|
info('Bad ' + length + ' attribute in stream');
|
|
|
|
length = 0;
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
|
|
|
// skip over the stream data
|
|
|
|
stream.pos = pos + length;
|
2013-07-01 05:45:15 +09:00
|
|
|
lexer.nextChar();
|
|
|
|
|
2015-07-11 19:15:43 +09:00
|
|
|
// Shift '>>' and check whether the new object marks the end of the stream
|
|
|
|
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
|
|
|
|
this.shift(); // 'stream'
|
|
|
|
} else {
|
2013-06-23 03:21:19 +09:00
|
|
|
// bad stream length, scanning for endstream
|
|
|
|
stream.pos = pos;
|
|
|
|
var SCAN_BLOCK_SIZE = 2048;
|
|
|
|
var ENDSTREAM_SIGNATURE_LENGTH = 9;
|
|
|
|
var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65,
|
|
|
|
0x61, 0x6D];
|
2014-04-08 06:42:54 +09:00
|
|
|
var skipped = 0, found = false, i, j;
|
2013-06-23 03:21:19 +09:00
|
|
|
while (stream.pos < stream.end) {
|
|
|
|
var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE);
|
|
|
|
var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH;
|
2014-06-07 05:06:29 +09:00
|
|
|
if (scanLength <= 0) {
|
|
|
|
break;
|
|
|
|
}
|
2014-04-08 06:42:54 +09:00
|
|
|
found = false;
|
2016-01-26 06:56:34 +09:00
|
|
|
i = 0;
|
|
|
|
while (i < scanLength) {
|
|
|
|
j = 0;
|
|
|
|
while (j < ENDSTREAM_SIGNATURE_LENGTH &&
|
|
|
|
scanBytes[i + j] === ENDSTREAM_SIGNATURE[j]) {
|
2013-06-23 03:21:19 +09:00
|
|
|
j++;
|
|
|
|
}
|
2016-01-26 06:56:34 +09:00
|
|
|
if (j >= ENDSTREAM_SIGNATURE_LENGTH) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
2013-06-23 03:21:19 +09:00
|
|
|
}
|
|
|
|
if (found) {
|
2016-01-26 06:56:34 +09:00
|
|
|
skipped += i;
|
|
|
|
stream.pos += i;
|
2013-06-23 03:21:19 +09:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
skipped += scanLength;
|
|
|
|
stream.pos += scanLength;
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
error('Missing endstream');
|
|
|
|
}
|
|
|
|
length = skipped;
|
2013-07-01 05:45:15 +09:00
|
|
|
|
|
|
|
lexer.nextChar();
|
2013-06-23 03:21:19 +09:00
|
|
|
this.shift();
|
|
|
|
this.shift();
|
2013-06-22 07:35:52 +09:00
|
|
|
}
|
2013-06-23 03:21:19 +09:00
|
|
|
this.shift(); // 'endstream'
|
2011-10-25 08:55:23 +09:00
|
|
|
|
|
|
|
stream = stream.makeSubStream(pos, length, dict);
|
2014-03-21 04:28:22 +09:00
|
|
|
if (cipherTransform) {
|
2014-03-11 14:18:30 +09:00
|
|
|
stream = cipherTransform.createStream(stream, length);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
stream = this.filter(stream, dict, length);
|
2013-05-10 12:26:28 +09:00
|
|
|
stream.dict = dict;
|
2011-10-25 08:55:23 +09:00
|
|
|
return stream;
|
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
filter: function Parser_filter(stream, dict, length) {
|
2015-09-03 00:05:34 +09:00
|
|
|
var filter = dict.get('Filter', 'F');
|
|
|
|
var params = dict.get('DecodeParms', 'DP');
|
2014-03-21 04:28:22 +09:00
|
|
|
if (isName(filter)) {
|
2016-10-15 00:19:50 +09:00
|
|
|
if (isArray(params)) {
|
2016-12-08 19:55:08 +09:00
|
|
|
params = this.xref.fetchIfRef(params[0]);
|
2016-10-15 00:19:50 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
return this.makeFilter(stream, filter.name, length, params);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-03-11 14:18:30 +09:00
|
|
|
|
|
|
|
var maybeLength = length;
|
2011-10-25 08:55:23 +09:00
|
|
|
if (isArray(filter)) {
|
|
|
|
var filterArray = filter;
|
|
|
|
var paramsArray = params;
|
|
|
|
for (var i = 0, ii = filterArray.length; i < ii; ++i) {
|
2016-12-08 19:55:08 +09:00
|
|
|
filter = this.xref.fetchIfRef(filterArray[i]);
|
2014-03-21 04:28:22 +09:00
|
|
|
if (!isName(filter)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
error('Bad filter name: ' + filter);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2012-03-20 21:16:48 +09:00
|
|
|
|
|
|
|
params = null;
|
2014-03-21 04:28:22 +09:00
|
|
|
if (isArray(paramsArray) && (i in paramsArray)) {
|
2016-12-08 19:55:08 +09:00
|
|
|
params = this.xref.fetchIfRef(paramsArray[i]);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-03-11 14:18:30 +09:00
|
|
|
stream = this.makeFilter(stream, filter.name, maybeLength, params);
|
2012-03-20 21:16:48 +09:00
|
|
|
// after the first stream the length variable is invalid
|
2014-03-11 14:18:30 +09:00
|
|
|
maybeLength = null;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return stream;
|
|
|
|
},
|
2014-03-11 14:18:30 +09:00
|
|
|
makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
|
2016-09-25 19:19:22 +09:00
|
|
|
// Since the 'Length' entry in the stream dictionary can be completely
|
|
|
|
// wrong, e.g. zero for non-empty streams, only skip parsing the stream
|
|
|
|
// when we can be absolutely certain that it actually is empty.
|
|
|
|
if (maybeLength === 0) {
|
2015-08-27 03:07:32 +09:00
|
|
|
warn('Empty "' + name + '" stream.');
|
2012-10-23 00:53:15 +09:00
|
|
|
return new NullStream(stream);
|
|
|
|
}
|
2014-06-15 19:44:39 +09:00
|
|
|
try {
|
|
|
|
var xrefStreamStats = this.xref.stats.streamTypes;
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'FlateDecode' || name === 'Fl') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.FLATE] = true;
|
|
|
|
if (params) {
|
|
|
|
return new PredictorStream(new FlateStream(stream, maybeLength),
|
|
|
|
maybeLength, params);
|
|
|
|
}
|
|
|
|
return new FlateStream(stream, maybeLength);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'LZWDecode' || name === 'LZW') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.LZW] = true;
|
|
|
|
var earlyChange = 1;
|
|
|
|
if (params) {
|
|
|
|
if (params.has('EarlyChange')) {
|
|
|
|
earlyChange = params.get('EarlyChange');
|
|
|
|
}
|
|
|
|
return new PredictorStream(
|
|
|
|
new LZWStream(stream, maybeLength, earlyChange),
|
|
|
|
maybeLength, params);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-06-15 19:44:39 +09:00
|
|
|
return new LZWStream(stream, maybeLength, earlyChange);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'DCTDecode' || name === 'DCT') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.DCT] = true;
|
2016-10-15 00:19:50 +09:00
|
|
|
return new JpegStream(stream, maybeLength, stream.dict, params);
|
2014-06-15 19:44:39 +09:00
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'JPXDecode' || name === 'JPX') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.JPX] = true;
|
2016-10-15 00:19:50 +09:00
|
|
|
return new JpxStream(stream, maybeLength, stream.dict, params);
|
2014-06-15 19:44:39 +09:00
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'ASCII85Decode' || name === 'A85') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.A85] = true;
|
|
|
|
return new Ascii85Stream(stream, maybeLength);
|
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'ASCIIHexDecode' || name === 'AHx') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.AHX] = true;
|
|
|
|
return new AsciiHexStream(stream, maybeLength);
|
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'CCITTFaxDecode' || name === 'CCF') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.CCF] = true;
|
|
|
|
return new CCITTFaxStream(stream, maybeLength, params);
|
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'RunLengthDecode' || name === 'RL') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.RL] = true;
|
|
|
|
return new RunLengthStream(stream, maybeLength);
|
|
|
|
}
|
2014-08-03 00:37:24 +09:00
|
|
|
if (name === 'JBIG2Decode') {
|
2014-06-15 19:44:39 +09:00
|
|
|
xrefStreamStats[StreamType.JBIG] = true;
|
2016-10-15 00:19:50 +09:00
|
|
|
return new Jbig2Stream(stream, maybeLength, stream.dict, params);
|
2014-06-15 19:44:39 +09:00
|
|
|
}
|
|
|
|
warn('filter "' + name + '" not supported yet');
|
|
|
|
return stream;
|
|
|
|
} catch (ex) {
|
2014-06-19 08:21:21 +09:00
|
|
|
if (ex instanceof MissingDataException) {
|
|
|
|
throw ex;
|
|
|
|
}
|
2014-06-15 19:44:39 +09:00
|
|
|
warn('Invalid stream: \"' + ex + '\"');
|
|
|
|
return new NullStream(stream);
|
2012-04-17 03:34:00 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
return Parser;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
var Lexer = (function LexerClosure() {
|
2012-05-21 03:44:03 +09:00
|
|
|
function Lexer(stream, knownCommands) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.stream = stream;
|
2013-07-01 05:45:15 +09:00
|
|
|
this.nextChar();
|
|
|
|
|
2014-01-29 13:21:16 +09:00
|
|
|
// While lexing, we build up many strings one char at a time. Using += for
|
|
|
|
// this can result in lots of garbage strings. It's better to build an
|
|
|
|
// array of single-char strings and then join() them together at the end.
|
|
|
|
// And reusing a single array (i.e. |this.strBuf|) over and over for this
|
|
|
|
// purpose uses less memory than using a new array for each string.
|
|
|
|
this.strBuf = [];
|
|
|
|
|
2012-05-22 05:23:49 +09:00
|
|
|
// The PDFs might have "glued" commands with other commands, operands or
|
|
|
|
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
|
|
|
|
// commands and their prefixes. The prefixes are built the following way:
|
|
|
|
// if there a command that is a prefix of the other valid command or
|
|
|
|
// literal (e.g. 'f' and 'false') the following prefixes must be included,
|
|
|
|
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
|
|
|
|
// other commands or literals as a prefix. The knowCommands is optional.
|
2012-05-21 03:44:03 +09:00
|
|
|
this.knownCommands = knownCommands;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
2014-03-21 04:28:22 +09:00
|
|
|
// A '1' in this array means the character is white space. A '1' or
|
2011-10-25 08:55:23 +09:00
|
|
|
// '2' means the character ends a name or command.
|
|
|
|
var specialChars = [
|
2014-03-21 04:28:22 +09:00
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
|
|
|
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
|
2011-10-25 08:55:23 +09:00
|
|
|
];
|
|
|
|
|
|
|
|
function toHexDigit(ch) {
|
2013-07-01 05:45:15 +09:00
|
|
|
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
|
|
|
|
return ch & 0x0F;
|
|
|
|
}
|
|
|
|
if ((ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) {
|
|
|
|
// 'A'-'F', 'a'-'f'
|
|
|
|
return (ch & 0x0F) + 9;
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
Lexer.prototype = {
|
2013-07-01 05:45:15 +09:00
|
|
|
nextChar: function Lexer_nextChar() {
|
|
|
|
return (this.currentChar = this.stream.getByte());
|
|
|
|
},
|
2014-02-02 05:46:09 +09:00
|
|
|
peekChar: function Lexer_peekChar() {
|
2014-09-11 23:33:49 +09:00
|
|
|
return this.stream.peekByte();
|
2014-02-02 05:46:09 +09:00
|
|
|
},
|
2013-07-01 05:45:15 +09:00
|
|
|
getNumber: function Lexer_getNumber() {
|
|
|
|
var ch = this.currentChar;
|
2014-02-02 05:46:09 +09:00
|
|
|
var eNotation = false;
|
|
|
|
var divideBy = 0; // different from 0 if it's a floating point value
|
|
|
|
var sign = 1;
|
|
|
|
|
|
|
|
if (ch === 0x2D) { // '-'
|
|
|
|
sign = -1;
|
|
|
|
ch = this.nextChar();
|
2015-07-16 19:11:49 +09:00
|
|
|
|
|
|
|
if (ch === 0x2D) { // '-'
|
|
|
|
// Ignore double negative (this is consistent with Adobe Reader).
|
|
|
|
ch = this.nextChar();
|
|
|
|
}
|
2014-02-02 05:46:09 +09:00
|
|
|
} else if (ch === 0x2B) { // '+'
|
|
|
|
ch = this.nextChar();
|
|
|
|
}
|
|
|
|
if (ch === 0x2E) { // '.'
|
|
|
|
divideBy = 10;
|
|
|
|
ch = this.nextChar();
|
|
|
|
}
|
2017-05-02 21:14:03 +09:00
|
|
|
if (ch === 0x0A || ch === 0x0D) { // LF, CR
|
|
|
|
// Ignore line-breaks (this is consistent with Adobe Reader).
|
|
|
|
do {
|
|
|
|
ch = this.nextChar();
|
|
|
|
} while (ch === 0x0A || ch === 0x0D);
|
|
|
|
}
|
2014-02-02 05:46:09 +09:00
|
|
|
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
2017-05-02 21:14:03 +09:00
|
|
|
error(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
|
2014-02-02 05:46:09 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
var baseValue = ch - 0x30; // '0'
|
|
|
|
var powerValue = 0;
|
|
|
|
var powerValueSign = 1;
|
|
|
|
|
2013-07-01 05:45:15 +09:00
|
|
|
while ((ch = this.nextChar()) >= 0) {
|
2014-02-02 05:46:09 +09:00
|
|
|
if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
|
|
|
|
var currentDigit = ch - 0x30; // '0'
|
|
|
|
if (eNotation) { // We are after an 'e' or 'E'
|
|
|
|
powerValue = powerValue * 10 + currentDigit;
|
|
|
|
} else {
|
|
|
|
if (divideBy !== 0) { // We are after a point
|
|
|
|
divideBy *= 10;
|
|
|
|
}
|
|
|
|
baseValue = baseValue * 10 + currentDigit;
|
|
|
|
}
|
|
|
|
} else if (ch === 0x2E) { // '.'
|
|
|
|
if (divideBy === 0) {
|
|
|
|
divideBy = 1;
|
|
|
|
} else {
|
|
|
|
// A number can have only one '.'
|
|
|
|
break;
|
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (ch === 0x2D) { // '-'
|
2011-10-25 08:55:23 +09:00
|
|
|
// ignore minus signs in the middle of numbers to match
|
|
|
|
// Adobe's behavior
|
2016-07-17 21:33:41 +09:00
|
|
|
warn('Badly formatted number');
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
|
2014-02-02 05:46:09 +09:00
|
|
|
// 'E' can be either a scientific notation or the beginning of a new
|
|
|
|
// operator
|
|
|
|
ch = this.peekChar();
|
|
|
|
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
|
|
|
|
powerValueSign = (ch === 0x2D) ? -1 : 1;
|
|
|
|
this.nextChar(); // Consume the sign character
|
|
|
|
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
|
|
|
// The 'E' must be the beginning of a new operator
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
eNotation = true;
|
2011-10-25 08:55:23 +09:00
|
|
|
} else {
|
|
|
|
// the last character doesn't belong to us
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2014-02-02 05:46:09 +09:00
|
|
|
|
|
|
|
if (divideBy !== 0) {
|
|
|
|
baseValue /= divideBy;
|
|
|
|
}
|
|
|
|
if (eNotation) {
|
|
|
|
baseValue *= Math.pow(10, powerValueSign * powerValue);
|
2014-01-29 13:45:23 +09:00
|
|
|
}
|
2014-02-02 05:46:09 +09:00
|
|
|
return sign * baseValue;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
getString: function Lexer_getString() {
|
2011-10-25 08:55:23 +09:00
|
|
|
var numParen = 1;
|
|
|
|
var done = false;
|
2014-01-29 13:21:16 +09:00
|
|
|
var strBuf = this.strBuf;
|
|
|
|
strBuf.length = 0;
|
2013-07-01 05:45:15 +09:00
|
|
|
|
|
|
|
var ch = this.nextChar();
|
|
|
|
while (true) {
|
|
|
|
var charBuffered = false;
|
|
|
|
switch (ch | 0) {
|
|
|
|
case -1:
|
2011-10-25 08:55:23 +09:00
|
|
|
warn('Unterminated string');
|
|
|
|
done = true;
|
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x28: // '('
|
2011-10-25 08:55:23 +09:00
|
|
|
++numParen;
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('(');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x29: // ')'
|
2013-02-01 08:33:09 +09:00
|
|
|
if (--numParen === 0) {
|
2013-07-01 05:45:15 +09:00
|
|
|
this.nextChar(); // consume strings ')'
|
2011-10-25 08:55:23 +09:00
|
|
|
done = true;
|
|
|
|
} else {
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(')');
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x5C: // '\\'
|
|
|
|
ch = this.nextChar();
|
2011-10-25 08:55:23 +09:00
|
|
|
switch (ch) {
|
2013-07-01 05:45:15 +09:00
|
|
|
case -1:
|
2011-10-25 08:55:23 +09:00
|
|
|
warn('Unterminated string');
|
|
|
|
done = true;
|
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x6E: // 'n'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('\n');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x72: // 'r'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('\r');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x74: // 't'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('\t');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x62: // 'b'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('\b');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x66: // 'f'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('\f');
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x5C: // '\'
|
|
|
|
case 0x28: // '('
|
|
|
|
case 0x29: // ')'
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode(ch));
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
|
|
|
|
case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
|
|
|
|
var x = ch & 0x0F;
|
|
|
|
ch = this.nextChar();
|
|
|
|
charBuffered = true;
|
|
|
|
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
|
|
|
|
x = (x << 3) + (ch & 0x0F);
|
|
|
|
ch = this.nextChar();
|
|
|
|
if (ch >= 0x30 && ch <= 0x37) { // '0'-'7'
|
|
|
|
charBuffered = false;
|
|
|
|
x = (x << 3) + (ch & 0x0F);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode(x));
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2014-03-21 01:50:12 +09:00
|
|
|
case 0x0D: // CR
|
|
|
|
if (this.peekChar() === 0x0A) { // LF
|
|
|
|
this.nextChar();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x0A: // LF
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
|
|
|
default:
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode(ch));
|
2013-02-24 02:35:18 +09:00
|
|
|
break;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode(ch));
|
2013-02-24 02:35:18 +09:00
|
|
|
break;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
if (done) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!charBuffered) {
|
|
|
|
ch = this.nextChar();
|
|
|
|
}
|
|
|
|
}
|
2014-01-29 13:21:16 +09:00
|
|
|
return strBuf.join('');
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2013-07-01 05:45:15 +09:00
|
|
|
getName: function Lexer_getName() {
|
2015-11-26 21:27:12 +09:00
|
|
|
var ch, previousCh;
|
2014-01-29 13:21:16 +09:00
|
|
|
var strBuf = this.strBuf;
|
|
|
|
strBuf.length = 0;
|
2013-07-01 05:45:15 +09:00
|
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
|
|
|
if (ch === 0x23) { // '#'
|
|
|
|
ch = this.nextChar();
|
2015-11-26 21:27:12 +09:00
|
|
|
if (specialChars[ch]) {
|
|
|
|
warn('Lexer_getName: ' +
|
|
|
|
'NUMBER SIGN (#) should be followed by a hexadecimal number.');
|
|
|
|
strBuf.push('#');
|
|
|
|
break;
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
var x = toHexDigit(ch);
|
2014-08-03 00:37:24 +09:00
|
|
|
if (x !== -1) {
|
2015-11-26 21:27:12 +09:00
|
|
|
previousCh = ch;
|
|
|
|
ch = this.nextChar();
|
|
|
|
var x2 = toHexDigit(ch);
|
2014-05-23 16:25:36 +09:00
|
|
|
if (x2 === -1) {
|
2015-11-26 21:27:12 +09:00
|
|
|
warn('Lexer_getName: Illegal digit (' +
|
2016-12-11 18:43:09 +09:00
|
|
|
String.fromCharCode(ch) + ') in hexadecimal number.');
|
2015-11-26 21:27:12 +09:00
|
|
|
strBuf.push('#', String.fromCharCode(previousCh));
|
|
|
|
if (specialChars[ch]) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strBuf.push(String.fromCharCode(ch));
|
|
|
|
continue;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode((x << 4) | x2));
|
2011-10-25 08:55:23 +09:00
|
|
|
} else {
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push('#', String.fromCharCode(ch));
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
} else {
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode(ch));
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2015-07-10 23:10:24 +09:00
|
|
|
if (strBuf.length > 127) {
|
|
|
|
warn('name token is longer than allowed by the spec: ' + strBuf.length);
|
2013-07-01 05:45:15 +09:00
|
|
|
}
|
2014-02-28 13:41:03 +09:00
|
|
|
return Name.get(strBuf.join(''));
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2013-07-01 05:45:15 +09:00
|
|
|
getHexString: function Lexer_getHexString() {
|
2014-01-29 13:21:16 +09:00
|
|
|
var strBuf = this.strBuf;
|
|
|
|
strBuf.length = 0;
|
2013-07-01 05:45:15 +09:00
|
|
|
var ch = this.currentChar;
|
2013-01-09 08:28:08 +09:00
|
|
|
var isFirstHex = true;
|
|
|
|
var firstDigit;
|
|
|
|
var secondDigit;
|
|
|
|
while (true) {
|
2013-07-01 05:45:15 +09:00
|
|
|
if (ch < 0) {
|
2011-10-25 08:55:23 +09:00
|
|
|
warn('Unterminated hex string');
|
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (ch === 0x3E) { // '>'
|
|
|
|
this.nextChar();
|
2013-01-09 08:28:08 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (specialChars[ch] === 1) {
|
|
|
|
ch = this.nextChar();
|
2013-01-09 08:28:08 +09:00
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
if (isFirstHex) {
|
|
|
|
firstDigit = toHexDigit(ch);
|
|
|
|
if (firstDigit === -1) {
|
2013-02-03 08:00:13 +09:00
|
|
|
warn('Ignoring invalid character "' + ch + '" in hex string');
|
2013-07-01 05:45:15 +09:00
|
|
|
ch = this.nextChar();
|
2013-01-09 08:28:08 +09:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
secondDigit = toHexDigit(ch);
|
|
|
|
if (secondDigit === -1) {
|
2013-02-03 08:00:13 +09:00
|
|
|
warn('Ignoring invalid character "' + ch + '" in hex string');
|
2013-07-01 05:45:15 +09:00
|
|
|
ch = this.nextChar();
|
2013-01-09 08:28:08 +09:00
|
|
|
continue;
|
|
|
|
}
|
2014-01-29 13:21:16 +09:00
|
|
|
strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
|
2013-01-09 08:28:08 +09:00
|
|
|
}
|
|
|
|
isFirstHex = !isFirstHex;
|
2013-07-01 05:45:15 +09:00
|
|
|
ch = this.nextChar();
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2014-01-29 13:21:16 +09:00
|
|
|
return strBuf.join('');
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
getObj: function Lexer_getObj() {
|
2011-10-25 08:55:23 +09:00
|
|
|
// skip whitespace and comments
|
|
|
|
var comment = false;
|
2013-07-01 05:45:15 +09:00
|
|
|
var ch = this.currentChar;
|
2011-10-25 08:55:23 +09:00
|
|
|
while (true) {
|
2013-07-01 05:45:15 +09:00
|
|
|
if (ch < 0) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return EOF;
|
2013-07-01 05:45:15 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
if (comment) {
|
2014-05-23 16:25:36 +09:00
|
|
|
if (ch === 0x0A || ch === 0x0D) { // LF, CR
|
2011-10-25 08:55:23 +09:00
|
|
|
comment = false;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (ch === 0x25) { // '%'
|
2011-10-25 08:55:23 +09:00
|
|
|
comment = true;
|
2013-07-01 05:45:15 +09:00
|
|
|
} else if (specialChars[ch] !== 1) {
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
ch = this.nextChar();
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
// start reading token
|
2013-07-01 05:45:15 +09:00
|
|
|
switch (ch | 0) {
|
|
|
|
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: // '0'-'4'
|
|
|
|
case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: // '5'-'9'
|
|
|
|
case 0x2B: case 0x2D: case 0x2E: // '+', '-', '.'
|
|
|
|
return this.getNumber();
|
|
|
|
case 0x28: // '('
|
2011-10-25 08:55:23 +09:00
|
|
|
return this.getString();
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x2F: // '/'
|
|
|
|
return this.getName();
|
2011-10-25 08:55:23 +09:00
|
|
|
// array punctuation
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x5B: // '['
|
|
|
|
this.nextChar();
|
|
|
|
return Cmd.get('[');
|
|
|
|
case 0x5D: // ']'
|
|
|
|
this.nextChar();
|
|
|
|
return Cmd.get(']');
|
2011-10-25 08:55:23 +09:00
|
|
|
// hex string or dict punctuation
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x3C: // '<'
|
|
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x3C) {
|
2011-10-25 08:55:23 +09:00
|
|
|
// dict punctuation
|
2013-07-01 05:45:15 +09:00
|
|
|
this.nextChar();
|
2011-12-19 04:39:10 +09:00
|
|
|
return Cmd.get('<<');
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
return this.getHexString();
|
2011-10-25 08:55:23 +09:00
|
|
|
// dict punctuation
|
2013-07-01 05:45:15 +09:00
|
|
|
case 0x3E: // '>'
|
|
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x3E) {
|
|
|
|
this.nextChar();
|
2011-12-19 04:39:10 +09:00
|
|
|
return Cmd.get('>>');
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
return Cmd.get('>');
|
|
|
|
case 0x7B: // '{'
|
|
|
|
this.nextChar();
|
|
|
|
return Cmd.get('{');
|
|
|
|
case 0x7D: // '}'
|
|
|
|
this.nextChar();
|
|
|
|
return Cmd.get('}');
|
|
|
|
case 0x29: // ')'
|
2017-02-12 03:11:52 +09:00
|
|
|
// Consume the current character in order to avoid permanently hanging
|
|
|
|
// the worker thread if `Lexer.getObject` is called from within a loop
|
|
|
|
// containing try-catch statements, since we would otherwise attempt
|
|
|
|
// to parse the *same* character over and over (fixes issue8061.pdf).
|
|
|
|
this.nextChar();
|
2011-10-25 08:55:23 +09:00
|
|
|
error('Illegal character: ' + ch);
|
2013-07-01 05:45:15 +09:00
|
|
|
break;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
// command
|
2013-07-01 05:45:15 +09:00
|
|
|
var str = String.fromCharCode(ch);
|
2012-05-21 03:44:03 +09:00
|
|
|
var knownCommands = this.knownCommands;
|
2014-06-02 19:14:53 +09:00
|
|
|
var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
2013-07-01 05:45:15 +09:00
|
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
2012-05-21 03:44:03 +09:00
|
|
|
// stop if known command is found and next character does not make
|
|
|
|
// the str a command
|
2013-07-01 05:45:15 +09:00
|
|
|
var possibleCommand = str + String.fromCharCode(ch);
|
2014-06-02 19:14:53 +09:00
|
|
|
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
|
2012-05-21 03:44:03 +09:00
|
|
|
break;
|
2013-07-01 05:45:15 +09:00
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
if (str.length === 128) {
|
2011-10-25 08:55:23 +09:00
|
|
|
error('Command token too long: ' + str.length);
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
str = possibleCommand;
|
2014-06-02 19:14:53 +09:00
|
|
|
knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
if (str === 'true') {
|
2011-10-25 08:55:23 +09:00
|
|
|
return true;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
if (str === 'false') {
|
2011-10-25 08:55:23 +09:00
|
|
|
return false;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-05-23 16:25:36 +09:00
|
|
|
if (str === 'null') {
|
2011-10-25 08:55:23 +09:00
|
|
|
return null;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-12-19 04:39:10 +09:00
|
|
|
return Cmd.get(str);
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2012-04-05 05:43:26 +09:00
|
|
|
skipToNextLine: function Lexer_skipToNextLine() {
|
2013-07-01 05:45:15 +09:00
|
|
|
var ch = this.currentChar;
|
|
|
|
while (ch >= 0) {
|
|
|
|
if (ch === 0x0D) { // CR
|
|
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x0A) { // LF
|
|
|
|
this.nextChar();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
} else if (ch === 0x0A) { // LF
|
|
|
|
this.nextChar();
|
|
|
|
break;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-07-01 05:45:15 +09:00
|
|
|
ch = this.nextChar();
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-12-09 07:18:43 +09:00
|
|
|
return Lexer;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2014-07-02 19:48:09 +09:00
|
|
|
var Linearization = {
|
|
|
|
create: function LinearizationCreate(stream) {
|
|
|
|
function getInt(name, allowZeroValue) {
|
|
|
|
var obj = linDict.get(name);
|
|
|
|
if (isInt(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return obj;
|
|
|
|
}
|
2014-07-02 19:48:09 +09:00
|
|
|
throw new Error('The "' + name + '" parameter in the linearization ' +
|
|
|
|
'dictionary is invalid.');
|
|
|
|
}
|
|
|
|
function getHints() {
|
|
|
|
var hints = linDict.get('H'), hintsLength, item;
|
|
|
|
if (isArray(hints) &&
|
|
|
|
((hintsLength = hints.length) === 2 || hintsLength === 4)) {
|
|
|
|
for (var index = 0; index < hintsLength; index++) {
|
|
|
|
if (!(isInt(item = hints[index]) && item > 0)) {
|
|
|
|
throw new Error('Hint (' + index +
|
|
|
|
') in the linearization dictionary is invalid.');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return hints;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2014-07-02 19:48:09 +09:00
|
|
|
throw new Error('Hint array in the linearization dictionary is invalid.');
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-07-02 19:48:09 +09:00
|
|
|
var parser = new Parser(new Lexer(stream), false, null);
|
|
|
|
var obj1 = parser.getObj();
|
|
|
|
var obj2 = parser.getObj();
|
|
|
|
var obj3 = parser.getObj();
|
|
|
|
var linDict = parser.getObj();
|
|
|
|
var obj, length;
|
|
|
|
if (!(isInt(obj1) && isInt(obj2) && isCmd(obj3, 'obj') && isDict(linDict) &&
|
|
|
|
isNum(obj = linDict.get('Linearized')) && obj > 0)) {
|
|
|
|
return null; // No valid linearization dictionary found.
|
|
|
|
} else if ((length = getInt('L')) !== stream.length) {
|
|
|
|
throw new Error('The "L" parameter in the linearization dictionary ' +
|
|
|
|
'does not equal the stream length.');
|
|
|
|
}
|
|
|
|
return {
|
2017-04-27 19:58:44 +09:00
|
|
|
length,
|
2014-07-02 19:48:09 +09:00
|
|
|
hints: getHints(),
|
|
|
|
objectNumberFirst: getInt('O'),
|
|
|
|
endFirst: getInt('E'),
|
|
|
|
numPages: getInt('N'),
|
|
|
|
mainXRefEntriesOffset: getInt('T'),
|
|
|
|
pageFirst: (linDict.has('P') ? getInt('P', true) : 0)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
};
|
2015-11-22 01:32:47 +09:00
|
|
|
|
|
|
|
exports.Lexer = Lexer;
|
|
|
|
exports.Linearization = Linearization;
|
|
|
|
exports.Parser = Parser;
|
|
|
|
}));
|