28d2ada59c
This should reduce the possibility of accidentally truncating some inline images, while *not* causing the "EI" detection to become significantly slower.[1] There's obviously a possibility that these added checks are not sufficient to catch *every* single case of "EI" sequences within the actual inline image data, but without specific test-cases I decided against over-engineering the solution here. *Please note:* The interpolation issues are somewhat orthogonal to the main issue here, which is the truncated image, and it's already tracked elsewhere. --- [1] I've looked at the issue a few times, and this is the first approach that I was able to come up with that didn't cause *unacceptable* performance regressions in e.g. issue 2618.
1414 lines
41 KiB
JavaScript
1414 lines
41 KiB
JavaScript
/* Copyright 2012 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/* eslint no-var: error */
|
|
|
|
import {
|
|
Ascii85Stream,
|
|
AsciiHexStream,
|
|
FlateStream,
|
|
LZWStream,
|
|
NullStream,
|
|
PredictorStream,
|
|
RunLengthStream,
|
|
} from "./stream.js";
|
|
import {
|
|
assert,
|
|
bytesToString,
|
|
FormatError,
|
|
info,
|
|
isNum,
|
|
StreamType,
|
|
warn,
|
|
} from "../shared/util.js";
|
|
import {
|
|
Cmd,
|
|
Dict,
|
|
EOF,
|
|
isCmd,
|
|
isDict,
|
|
isEOF,
|
|
isName,
|
|
Name,
|
|
Ref,
|
|
} from "./primitives.js";
|
|
import { isWhiteSpace, MissingDataException } from "./core_utils.js";
|
|
import { CCITTFaxStream } from "./ccitt_stream.js";
|
|
import { Jbig2Stream } from "./jbig2_stream.js";
|
|
import { JpegStream } from "./jpeg_stream.js";
|
|
import { JpxStream } from "./jpx_stream.js";
|
|
|
|
const MAX_LENGTH_TO_CACHE = 1000;
|
|
const MAX_ADLER32_LENGTH = 5552;
|
|
|
|
function computeAdler32(bytes) {
|
|
const bytesLength = bytes.length;
|
|
if (
|
|
typeof PDFJSDev === "undefined" ||
|
|
PDFJSDev.test("!PRODUCTION || TESTING")
|
|
) {
|
|
assert(
|
|
bytesLength < MAX_ADLER32_LENGTH,
|
|
'computeAdler32: Unsupported "bytes" length.'
|
|
);
|
|
}
|
|
let a = 1,
|
|
b = 0;
|
|
for (let i = 0; i < bytesLength; ++i) {
|
|
// No modulo required in the loop if `bytesLength < 5552`.
|
|
a += bytes[i] & 0xff;
|
|
b += a;
|
|
}
|
|
return (b % 65521 << 16) | a % 65521;
|
|
}
|
|
|
|
class Parser {
|
|
constructor({ lexer, xref, allowStreams = false, recoveryMode = false }) {
|
|
this.lexer = lexer;
|
|
this.xref = xref;
|
|
this.allowStreams = allowStreams;
|
|
this.recoveryMode = recoveryMode;
|
|
|
|
this.imageCache = Object.create(null);
|
|
this.refill();
|
|
}
|
|
|
|
refill() {
|
|
this.buf1 = this.lexer.getObj();
|
|
this.buf2 = this.lexer.getObj();
|
|
}
|
|
|
|
shift() {
|
|
if (this.buf2 instanceof Cmd && this.buf2.cmd === "ID") {
|
|
this.buf1 = this.buf2;
|
|
this.buf2 = null;
|
|
} else {
|
|
this.buf1 = this.buf2;
|
|
this.buf2 = this.lexer.getObj();
|
|
}
|
|
}
|
|
|
|
tryShift() {
|
|
try {
|
|
this.shift();
|
|
return true;
|
|
} catch (e) {
|
|
if (e instanceof MissingDataException) {
|
|
throw e;
|
|
}
|
|
// Upon failure, the caller should reset this.lexer.pos to a known good
|
|
// state and call this.shift() twice to reset the buffers.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
getObj(cipherTransform = null) {
|
|
const buf1 = this.buf1;
|
|
this.shift();
|
|
|
|
if (buf1 instanceof Cmd) {
|
|
switch (buf1.cmd) {
|
|
case "BI": // inline image
|
|
return this.makeInlineImage(cipherTransform);
|
|
case "[": // array
|
|
const array = [];
|
|
while (!isCmd(this.buf1, "]") && !isEOF(this.buf1)) {
|
|
array.push(this.getObj(cipherTransform));
|
|
}
|
|
if (isEOF(this.buf1)) {
|
|
if (!this.recoveryMode) {
|
|
throw new FormatError("End of file inside array");
|
|
}
|
|
return array;
|
|
}
|
|
this.shift();
|
|
return array;
|
|
case "<<": // dictionary or stream
|
|
const dict = new Dict(this.xref);
|
|
while (!isCmd(this.buf1, ">>") && !isEOF(this.buf1)) {
|
|
if (!isName(this.buf1)) {
|
|
info("Malformed dictionary: key must be a name object");
|
|
this.shift();
|
|
continue;
|
|
}
|
|
|
|
const key = this.buf1.name;
|
|
this.shift();
|
|
if (isEOF(this.buf1)) {
|
|
break;
|
|
}
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
}
|
|
if (isEOF(this.buf1)) {
|
|
if (!this.recoveryMode) {
|
|
throw new FormatError("End of file inside dictionary");
|
|
}
|
|
return dict;
|
|
}
|
|
|
|
// Stream objects are not allowed inside content streams or
|
|
// object streams.
|
|
if (isCmd(this.buf2, "stream")) {
|
|
return this.allowStreams
|
|
? this.makeStream(dict, cipherTransform)
|
|
: dict;
|
|
}
|
|
this.shift();
|
|
return dict;
|
|
default:
|
|
// simple object
|
|
return buf1;
|
|
}
|
|
}
|
|
|
|
if (Number.isInteger(buf1)) {
|
|
// indirect reference or integer
|
|
if (Number.isInteger(this.buf1) && isCmd(this.buf2, "R")) {
|
|
const ref = Ref.get(buf1, this.buf1);
|
|
this.shift();
|
|
this.shift();
|
|
return ref;
|
|
}
|
|
return buf1;
|
|
}
|
|
|
|
if (typeof buf1 === "string") {
|
|
if (cipherTransform) {
|
|
return cipherTransform.decryptString(buf1);
|
|
}
|
|
return buf1;
|
|
}
|
|
|
|
// simple object
|
|
return buf1;
|
|
}
|
|
|
|
/**
|
|
* Find the end of the stream by searching for the /EI\s/.
|
|
* @returns {number} The inline stream length.
|
|
*/
|
|
findDefaultInlineStreamEnd(stream) {
|
|
const E = 0x45,
|
|
I = 0x49,
|
|
SPACE = 0x20,
|
|
LF = 0xa,
|
|
CR = 0xd,
|
|
NUL = 0x0;
|
|
const lexer = this.lexer,
|
|
startPos = stream.pos,
|
|
n = 10;
|
|
let state = 0,
|
|
ch,
|
|
maybeEIPos;
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (state === 0) {
|
|
state = ch === E ? 1 : 0;
|
|
} else if (state === 1) {
|
|
state = ch === I ? 2 : 0;
|
|
} else {
|
|
assert(state === 2, "findDefaultInlineStreamEnd - invalid state.");
|
|
if (ch === SPACE || ch === LF || ch === CR) {
|
|
maybeEIPos = stream.pos;
|
|
// Let's check that the next `n` bytes are ASCII... just to be sure.
|
|
const followingBytes = stream.peekBytes(n);
|
|
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
|
|
ch = followingBytes[i];
|
|
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
|
// NUL bytes are not supposed to occur *outside* of inline
|
|
// images, but some PDF generators violate that assumption,
|
|
// thus breaking the EI detection heuristics used below.
|
|
//
|
|
// However, we can't unconditionally treat NUL bytes as "ASCII",
|
|
// since that *could* result in inline images being truncated.
|
|
//
|
|
// To attempt to address this, we'll still treat any *sequence*
|
|
// of NUL bytes as non-ASCII, but for a *single* NUL byte we'll
|
|
// continue checking the `followingBytes` (fixes issue8823.pdf).
|
|
continue;
|
|
}
|
|
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
|
|
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
|
|
// it's binary stuff. Resetting the state.
|
|
state = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (state !== 2) {
|
|
continue;
|
|
}
|
|
// Check that the "EI" sequence isn't part of the image data, since
|
|
// that would cause the image to be truncated (fixes issue11124.pdf).
|
|
if (lexer.knownCommands) {
|
|
const nextObj = lexer.peekObj();
|
|
if (nextObj instanceof Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
|
// Not a valid command, i.e. the inline image data *itself*
|
|
// contains an "EI" sequence. Resetting the state.
|
|
state = 0;
|
|
}
|
|
} else {
|
|
warn(
|
|
"findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined."
|
|
);
|
|
}
|
|
|
|
if (state === 2) {
|
|
break; // Finished!
|
|
}
|
|
} else {
|
|
state = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ch === -1) {
|
|
warn(
|
|
"findDefaultInlineStreamEnd: " +
|
|
"Reached the end of the stream without finding a valid EI marker"
|
|
);
|
|
if (maybeEIPos) {
|
|
warn('... trying to recover by using the last "EI" occurrence.');
|
|
stream.skip(-(stream.pos - maybeEIPos)); // Reset the stream position.
|
|
}
|
|
}
|
|
|
|
let endOffset = 4;
|
|
stream.skip(-endOffset); // Set the stream position to just before "EI".
|
|
ch = stream.peekByte();
|
|
stream.skip(endOffset); // ... and remember to reset the stream position.
|
|
|
|
// Ensure that we don't accidentally truncate the inline image, when the
|
|
// data is immediately followed by the "EI" marker (fixes issue10388.pdf).
|
|
if (!isWhiteSpace(ch)) {
|
|
endOffset--;
|
|
}
|
|
return stream.pos - endOffset - startPos;
|
|
}
|
|
|
|
/**
|
|
* Find the EOI (end-of-image) marker 0xFFD9 of the stream.
|
|
* @returns {number} The inline stream length.
|
|
*/
|
|
findDCTDecodeInlineStreamEnd(stream) {
|
|
const startPos = stream.pos;
|
|
let foundEOI = false,
|
|
b,
|
|
markerLength;
|
|
while ((b = stream.getByte()) !== -1) {
|
|
if (b !== 0xff) {
|
|
// Not a valid marker.
|
|
continue;
|
|
}
|
|
switch (stream.getByte()) {
|
|
case 0x00: // Byte stuffing.
|
|
// 0xFF00 appears to be a very common byte sequence in JPEG images.
|
|
break;
|
|
|
|
case 0xff: // Fill byte.
|
|
// Avoid skipping a valid marker, resetting the stream position.
|
|
stream.skip(-1);
|
|
break;
|
|
|
|
case 0xd9: // EOI
|
|
foundEOI = true;
|
|
break;
|
|
|
|
case 0xc0: // SOF0
|
|
case 0xc1: // SOF1
|
|
case 0xc2: // SOF2
|
|
case 0xc3: // SOF3
|
|
/* falls through */
|
|
case 0xc5: // SOF5
|
|
case 0xc6: // SOF6
|
|
case 0xc7: // SOF7
|
|
/* falls through */
|
|
case 0xc9: // SOF9
|
|
case 0xca: // SOF10
|
|
case 0xcb: // SOF11
|
|
/* falls through */
|
|
case 0xcd: // SOF13
|
|
case 0xce: // SOF14
|
|
case 0xcf: // SOF15
|
|
/* falls through */
|
|
case 0xc4: // DHT
|
|
case 0xcc: // DAC
|
|
/* falls through */
|
|
case 0xda: // SOS
|
|
case 0xdb: // DQT
|
|
case 0xdc: // DNL
|
|
case 0xdd: // DRI
|
|
case 0xde: // DHP
|
|
case 0xdf: // EXP
|
|
/* falls through */
|
|
case 0xe0: // APP0
|
|
case 0xe1: // APP1
|
|
case 0xe2: // APP2
|
|
case 0xe3: // APP3
|
|
case 0xe4: // APP4
|
|
case 0xe5: // APP5
|
|
case 0xe6: // APP6
|
|
case 0xe7: // APP7
|
|
case 0xe8: // APP8
|
|
case 0xe9: // APP9
|
|
case 0xea: // APP10
|
|
case 0xeb: // APP11
|
|
case 0xec: // APP12
|
|
case 0xed: // APP13
|
|
case 0xee: // APP14
|
|
case 0xef: // APP15
|
|
/* falls through */
|
|
case 0xfe: // COM
|
|
// The marker should be followed by the length of the segment.
|
|
markerLength = stream.getUint16();
|
|
if (markerLength > 2) {
|
|
// |markerLength| contains the byte length of the marker segment,
|
|
// including its own length (2 bytes) and excluding the marker.
|
|
stream.skip(markerLength - 2); // Jump to the next marker.
|
|
} else {
|
|
// The marker length is invalid, resetting the stream position.
|
|
stream.skip(-2);
|
|
}
|
|
break;
|
|
}
|
|
if (foundEOI) {
|
|
break;
|
|
}
|
|
}
|
|
const length = stream.pos - startPos;
|
|
if (b === -1) {
|
|
warn(
|
|
"Inline DCTDecode image stream: " +
|
|
"EOI marker not found, searching for /EI/ instead."
|
|
);
|
|
stream.skip(-length); // Reset the stream position.
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
/**
|
|
* Find the EOD (end-of-data) marker '~>' (i.e. TILDE + GT) of the stream.
|
|
* @returns {number} The inline stream length.
|
|
*/
|
|
findASCII85DecodeInlineStreamEnd(stream) {
|
|
const TILDE = 0x7e,
|
|
GT = 0x3e;
|
|
const startPos = stream.pos;
|
|
let ch;
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (ch === TILDE) {
|
|
const tildePos = stream.pos;
|
|
|
|
ch = stream.peekByte();
|
|
// Handle corrupt PDF documents which contains whitespace "inside" of
|
|
// the EOD marker (fixes issue10614.pdf).
|
|
while (isWhiteSpace(ch)) {
|
|
stream.skip();
|
|
ch = stream.peekByte();
|
|
}
|
|
if (ch === GT) {
|
|
stream.skip();
|
|
break;
|
|
}
|
|
// Handle corrupt PDF documents which contains truncated EOD markers,
|
|
// where the '>' character is missing (fixes issue11385.pdf).
|
|
if (stream.pos > tildePos) {
|
|
const maybeEI = stream.peekBytes(2);
|
|
if (maybeEI[0] === /* E = */ 0x45 && maybeEI[1] === /* I = */ 0x49) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
const length = stream.pos - startPos;
|
|
if (ch === -1) {
|
|
warn(
|
|
"Inline ASCII85Decode image stream: " +
|
|
"EOD marker not found, searching for /EI/ instead."
|
|
);
|
|
stream.skip(-length); // Reset the stream position.
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
/**
|
|
* Find the EOD (end-of-data) marker '>' (i.e. GT) of the stream.
|
|
* @returns {number} The inline stream length.
|
|
*/
|
|
findASCIIHexDecodeInlineStreamEnd(stream) {
|
|
const GT = 0x3e;
|
|
const startPos = stream.pos;
|
|
let ch;
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (ch === GT) {
|
|
break;
|
|
}
|
|
}
|
|
const length = stream.pos - startPos;
|
|
if (ch === -1) {
|
|
warn(
|
|
"Inline ASCIIHexDecode image stream: " +
|
|
"EOD marker not found, searching for /EI/ instead."
|
|
);
|
|
stream.skip(-length); // Reset the stream position.
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
/**
|
|
* Skip over the /EI/ for streams where we search for an EOD marker.
|
|
*/
|
|
inlineStreamSkipEI(stream) {
|
|
const E = 0x45,
|
|
I = 0x49;
|
|
let state = 0,
|
|
ch;
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (state === 0) {
|
|
state = ch === E ? 1 : 0;
|
|
} else if (state === 1) {
|
|
state = ch === I ? 2 : 0;
|
|
} else if (state === 2) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
makeInlineImage(cipherTransform) {
|
|
const lexer = this.lexer;
|
|
const stream = lexer.stream;
|
|
|
|
// Parse dictionary.
|
|
const dict = new Dict(this.xref);
|
|
let dictLength;
|
|
while (!isCmd(this.buf1, "ID") && !isEOF(this.buf1)) {
|
|
if (!isName(this.buf1)) {
|
|
throw new FormatError("Dictionary key must be a name object");
|
|
}
|
|
const key = this.buf1.name;
|
|
this.shift();
|
|
if (isEOF(this.buf1)) {
|
|
break;
|
|
}
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
}
|
|
if (lexer.beginInlineImagePos !== -1) {
|
|
dictLength = stream.pos - lexer.beginInlineImagePos;
|
|
}
|
|
|
|
// Extract the name of the first (i.e. the current) image filter.
|
|
const filter = dict.get("Filter", "F");
|
|
let filterName;
|
|
if (isName(filter)) {
|
|
filterName = filter.name;
|
|
} else if (Array.isArray(filter)) {
|
|
const filterZero = this.xref.fetchIfRef(filter[0]);
|
|
if (isName(filterZero)) {
|
|
filterName = filterZero.name;
|
|
}
|
|
}
|
|
|
|
// Parse image stream.
|
|
const startPos = stream.pos;
|
|
let length;
|
|
if (filterName === "DCTDecode" || filterName === "DCT") {
|
|
length = this.findDCTDecodeInlineStreamEnd(stream);
|
|
} else if (filterName === "ASCII85Decode" || filterName === "A85") {
|
|
length = this.findASCII85DecodeInlineStreamEnd(stream);
|
|
} else if (filterName === "ASCIIHexDecode" || filterName === "AHx") {
|
|
length = this.findASCIIHexDecodeInlineStreamEnd(stream);
|
|
} else {
|
|
length = this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
let imageStream = stream.makeSubStream(startPos, length, dict);
|
|
|
|
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
|
|
// adler32 checksum.
|
|
let cacheKey;
|
|
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
|
|
const imageBytes = imageStream.getBytes();
|
|
imageStream.reset();
|
|
|
|
const initialStreamPos = stream.pos;
|
|
// Set the stream position to the beginning of the dictionary data...
|
|
stream.pos = lexer.beginInlineImagePos;
|
|
// ... and fetch the bytes of the *entire* dictionary.
|
|
const dictBytes = stream.getBytes(dictLength);
|
|
// Finally, don't forget to reset the stream position.
|
|
stream.pos = initialStreamPos;
|
|
|
|
cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
|
|
|
|
const cacheEntry = this.imageCache[cacheKey];
|
|
if (cacheEntry !== undefined) {
|
|
this.buf2 = Cmd.get("EI");
|
|
this.shift();
|
|
|
|
cacheEntry.reset();
|
|
return cacheEntry;
|
|
}
|
|
}
|
|
|
|
if (cipherTransform) {
|
|
imageStream = cipherTransform.createStream(imageStream, length);
|
|
}
|
|
|
|
imageStream = this.filter(imageStream, dict, length);
|
|
imageStream.dict = dict;
|
|
if (cacheKey !== undefined) {
|
|
imageStream.cacheKey = `inline_${length}_${cacheKey}`;
|
|
this.imageCache[cacheKey] = imageStream;
|
|
}
|
|
|
|
this.buf2 = Cmd.get("EI");
|
|
this.shift();
|
|
|
|
return imageStream;
|
|
}
|
|
|
|
_findStreamLength(startPos, signature) {
|
|
const { stream } = this.lexer;
|
|
stream.pos = startPos;
|
|
|
|
const SCAN_BLOCK_LENGTH = 2048;
|
|
const signatureLength = signature.length;
|
|
|
|
while (stream.pos < stream.end) {
|
|
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
|
|
const scanLength = scanBytes.length - signatureLength;
|
|
|
|
if (scanLength <= 0) {
|
|
break;
|
|
}
|
|
let pos = 0;
|
|
while (pos < scanLength) {
|
|
let j = 0;
|
|
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
|
|
j++;
|
|
}
|
|
if (j >= signatureLength) {
|
|
// `signature` found.
|
|
stream.pos += pos;
|
|
return stream.pos - startPos;
|
|
}
|
|
pos++;
|
|
}
|
|
stream.pos += scanLength;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
makeStream(dict, cipherTransform) {
|
|
const lexer = this.lexer;
|
|
let stream = lexer.stream;
|
|
|
|
// Get the stream's start position.
|
|
lexer.skipToNextLine();
|
|
const startPos = stream.pos - 1;
|
|
|
|
// Get the length.
|
|
let length = dict.get("Length");
|
|
if (!Number.isInteger(length)) {
|
|
info(`Bad length "${length}" in stream`);
|
|
length = 0;
|
|
}
|
|
|
|
// Skip over the stream data.
|
|
stream.pos = startPos + length;
|
|
lexer.nextChar();
|
|
|
|
// Shift '>>' and check whether the new object marks the end of the stream.
|
|
if (this.tryShift() && isCmd(this.buf2, "endstream")) {
|
|
this.shift(); // 'stream'
|
|
} else {
|
|
// Bad stream length, scanning for endstream command.
|
|
// prettier-ignore
|
|
const ENDSTREAM_SIGNATURE = new Uint8Array([
|
|
0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D]);
|
|
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
|
|
if (actualLength < 0) {
|
|
// Only allow limited truncation of the endstream signature,
|
|
// to prevent false positives.
|
|
const MAX_TRUNCATION = 1;
|
|
// Check if the PDF generator included truncated endstream commands,
|
|
// such as e.g. "endstrea" (fixes issue10004.pdf).
|
|
for (let i = 1; i <= MAX_TRUNCATION; i++) {
|
|
const end = ENDSTREAM_SIGNATURE.length - i;
|
|
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
|
|
|
|
const maybeLength = this._findStreamLength(
|
|
startPos,
|
|
TRUNCATED_SIGNATURE
|
|
);
|
|
if (maybeLength >= 0) {
|
|
// Ensure that the byte immediately following the truncated
|
|
// endstream command is a space, to prevent false positives.
|
|
const lastByte = stream.peekBytes(end + 1)[end];
|
|
if (!isWhiteSpace(lastByte)) {
|
|
break;
|
|
}
|
|
info(
|
|
`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
|
|
"searching for endstream command."
|
|
);
|
|
actualLength = maybeLength;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (actualLength < 0) {
|
|
throw new FormatError("Missing endstream command.");
|
|
}
|
|
}
|
|
length = actualLength;
|
|
|
|
lexer.nextChar();
|
|
this.shift();
|
|
this.shift();
|
|
}
|
|
this.shift(); // 'endstream'
|
|
|
|
stream = stream.makeSubStream(startPos, length, dict);
|
|
if (cipherTransform) {
|
|
stream = cipherTransform.createStream(stream, length);
|
|
}
|
|
stream = this.filter(stream, dict, length);
|
|
stream.dict = dict;
|
|
return stream;
|
|
}
|
|
|
|
filter(stream, dict, length) {
|
|
let filter = dict.get("Filter", "F");
|
|
let params = dict.get("DecodeParms", "DP");
|
|
|
|
if (isName(filter)) {
|
|
if (Array.isArray(params)) {
|
|
warn(
|
|
"/DecodeParms should not contain an Array, " +
|
|
"when /Filter contains a Name."
|
|
);
|
|
}
|
|
return this.makeFilter(stream, filter.name, length, params);
|
|
}
|
|
|
|
let maybeLength = length;
|
|
if (Array.isArray(filter)) {
|
|
const filterArray = filter;
|
|
const paramsArray = params;
|
|
for (let i = 0, ii = filterArray.length; i < ii; ++i) {
|
|
filter = this.xref.fetchIfRef(filterArray[i]);
|
|
if (!isName(filter)) {
|
|
throw new FormatError(`Bad filter name "${filter}"`);
|
|
}
|
|
|
|
params = null;
|
|
if (Array.isArray(paramsArray) && i in paramsArray) {
|
|
params = this.xref.fetchIfRef(paramsArray[i]);
|
|
}
|
|
stream = this.makeFilter(stream, filter.name, maybeLength, params);
|
|
// After the first stream the `length` variable is invalid.
|
|
maybeLength = null;
|
|
}
|
|
}
|
|
return stream;
|
|
}
|
|
|
|
makeFilter(stream, name, maybeLength, params) {
|
|
// Since the 'Length' entry in the stream dictionary can be completely
|
|
// wrong, e.g. zero for non-empty streams, only skip parsing the stream
|
|
// when we can be absolutely certain that it actually is empty.
|
|
if (maybeLength === 0) {
|
|
warn(`Empty "${name}" stream.`);
|
|
return new NullStream();
|
|
}
|
|
|
|
try {
|
|
const xrefStreamStats = this.xref.stats.streamTypes;
|
|
if (name === "FlateDecode" || name === "Fl") {
|
|
xrefStreamStats[StreamType.FLATE] = true;
|
|
if (params) {
|
|
return new PredictorStream(
|
|
new FlateStream(stream, maybeLength),
|
|
maybeLength,
|
|
params
|
|
);
|
|
}
|
|
return new FlateStream(stream, maybeLength);
|
|
}
|
|
if (name === "LZWDecode" || name === "LZW") {
|
|
xrefStreamStats[StreamType.LZW] = true;
|
|
let earlyChange = 1;
|
|
if (params) {
|
|
if (params.has("EarlyChange")) {
|
|
earlyChange = params.get("EarlyChange");
|
|
}
|
|
return new PredictorStream(
|
|
new LZWStream(stream, maybeLength, earlyChange),
|
|
maybeLength,
|
|
params
|
|
);
|
|
}
|
|
return new LZWStream(stream, maybeLength, earlyChange);
|
|
}
|
|
if (name === "DCTDecode" || name === "DCT") {
|
|
xrefStreamStats[StreamType.DCT] = true;
|
|
return new JpegStream(stream, maybeLength, stream.dict, params);
|
|
}
|
|
if (name === "JPXDecode" || name === "JPX") {
|
|
xrefStreamStats[StreamType.JPX] = true;
|
|
return new JpxStream(stream, maybeLength, stream.dict, params);
|
|
}
|
|
if (name === "ASCII85Decode" || name === "A85") {
|
|
xrefStreamStats[StreamType.A85] = true;
|
|
return new Ascii85Stream(stream, maybeLength);
|
|
}
|
|
if (name === "ASCIIHexDecode" || name === "AHx") {
|
|
xrefStreamStats[StreamType.AHX] = true;
|
|
return new AsciiHexStream(stream, maybeLength);
|
|
}
|
|
if (name === "CCITTFaxDecode" || name === "CCF") {
|
|
xrefStreamStats[StreamType.CCF] = true;
|
|
return new CCITTFaxStream(stream, maybeLength, params);
|
|
}
|
|
if (name === "RunLengthDecode" || name === "RL") {
|
|
xrefStreamStats[StreamType.RLX] = true;
|
|
return new RunLengthStream(stream, maybeLength);
|
|
}
|
|
if (name === "JBIG2Decode") {
|
|
xrefStreamStats[StreamType.JBIG] = true;
|
|
return new Jbig2Stream(stream, maybeLength, stream.dict, params);
|
|
}
|
|
warn(`Filter "${name}" is not supported.`);
|
|
return stream;
|
|
} catch (ex) {
|
|
if (ex instanceof MissingDataException) {
|
|
throw ex;
|
|
}
|
|
warn(`Invalid stream: "${ex}"`);
|
|
return new NullStream();
|
|
}
|
|
}
|
|
}
|
|
|
|
// A '1' in this array means the character is white space. A '1' or
|
|
// '2' means the character ends a name or command.
|
|
// prettier-ignore
|
|
const specialChars = [
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
|
|
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
|
|
];
|
|
|
|
function toHexDigit(ch) {
|
|
if (ch >= /* '0' = */ 0x30 && ch /* '9' = */ <= 0x39) {
|
|
return ch & 0x0f;
|
|
}
|
|
if (
|
|
(ch >= /* 'A' = */ 0x41 && ch <= /* 'F' = */ 0x46) ||
|
|
(ch >= /* 'a' = */ 0x61 && ch <= /* 'f' = */ 0x66)
|
|
) {
|
|
return (ch & 0x0f) + 9;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
class Lexer {
|
|
constructor(stream, knownCommands = null) {
|
|
this.stream = stream;
|
|
this.nextChar();
|
|
|
|
// While lexing, we build up many strings one char at a time. Using += for
|
|
// this can result in lots of garbage strings. It's better to build an
|
|
// array of single-char strings and then join() them together at the end.
|
|
// And reusing a single array (i.e. |this.strBuf|) over and over for this
|
|
// purpose uses less memory than using a new array for each string.
|
|
this.strBuf = [];
|
|
|
|
// The PDFs might have "glued" commands with other commands, operands or
|
|
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
|
|
// commands and their prefixes. The prefixes are built the following way:
|
|
// if there a command that is a prefix of the other valid command or
|
|
// literal (e.g. 'f' and 'false') the following prefixes must be included,
|
|
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
|
|
// other commands or literals as a prefix. The knowCommands is optional.
|
|
this.knownCommands = knownCommands;
|
|
|
|
this._hexStringNumWarn = 0;
|
|
this.beginInlineImagePos = -1;
|
|
}
|
|
|
|
nextChar() {
|
|
return (this.currentChar = this.stream.getByte());
|
|
}
|
|
|
|
peekChar() {
|
|
return this.stream.peekByte();
|
|
}
|
|
|
|
getNumber() {
|
|
let ch = this.currentChar;
|
|
let eNotation = false;
|
|
let divideBy = 0; // Different from 0 if it's a floating point value.
|
|
let sign = 0;
|
|
|
|
if (ch === /* '-' = */ 0x2d) {
|
|
sign = -1;
|
|
ch = this.nextChar();
|
|
|
|
if (ch === /* '-' = */ 0x2d) {
|
|
// Ignore double negative (this is consistent with Adobe Reader).
|
|
ch = this.nextChar();
|
|
}
|
|
} else if (ch === /* '+' = */ 0x2b) {
|
|
sign = 1;
|
|
ch = this.nextChar();
|
|
}
|
|
if (ch === /* LF = */ 0x0a || ch === /* CR = */ 0x0d) {
|
|
// Ignore line-breaks (this is consistent with Adobe Reader).
|
|
do {
|
|
ch = this.nextChar();
|
|
} while (ch === 0x0a || ch === 0x0d);
|
|
}
|
|
if (ch === /* '.' = */ 0x2e) {
|
|
divideBy = 10;
|
|
ch = this.nextChar();
|
|
}
|
|
if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
|
|
if (
|
|
divideBy === 10 &&
|
|
sign === 0 &&
|
|
(isWhiteSpace(ch) || ch === /* EOF = */ -1)
|
|
) {
|
|
// This is consistent with Adobe Reader (fixes issue9252.pdf).
|
|
warn("Lexer.getNumber - treating a single decimal point as zero.");
|
|
return 0;
|
|
}
|
|
throw new FormatError(
|
|
`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`
|
|
);
|
|
}
|
|
|
|
sign = sign || 1;
|
|
let baseValue = ch - 0x30; // '0'
|
|
let powerValue = 0;
|
|
let powerValueSign = 1;
|
|
|
|
while ((ch = this.nextChar()) >= 0) {
|
|
if (ch >= /* '0' = */ 0x30 && ch <= /* '9' = */ 0x39) {
|
|
const currentDigit = ch - 0x30; // '0'
|
|
if (eNotation) {
|
|
// We are after an 'e' or 'E'.
|
|
powerValue = powerValue * 10 + currentDigit;
|
|
} else {
|
|
if (divideBy !== 0) {
|
|
// We are after a point.
|
|
divideBy *= 10;
|
|
}
|
|
baseValue = baseValue * 10 + currentDigit;
|
|
}
|
|
} else if (ch === /* '.' = */ 0x2e) {
|
|
if (divideBy === 0) {
|
|
divideBy = 1;
|
|
} else {
|
|
// A number can have only one dot.
|
|
break;
|
|
}
|
|
} else if (ch === /* '-' = */ 0x2d) {
|
|
// Ignore minus signs in the middle of numbers to match
|
|
// Adobe's behavior.
|
|
warn("Badly formatted number: minus sign in the middle");
|
|
} else if (ch === /* 'E' = */ 0x45 || ch === /* 'e' = */ 0x65) {
|
|
// 'E' can be either a scientific notation or the beginning of a new
|
|
// operator.
|
|
ch = this.peekChar();
|
|
if (ch === /* '+' = */ 0x2b || ch === /* '-' = */ 0x2d) {
|
|
powerValueSign = ch === 0x2d ? -1 : 1;
|
|
this.nextChar(); // Consume the sign character.
|
|
} else if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) {
|
|
// The 'E' must be the beginning of a new operator.
|
|
break;
|
|
}
|
|
eNotation = true;
|
|
} else {
|
|
// The last character doesn't belong to us.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (divideBy !== 0) {
|
|
baseValue /= divideBy;
|
|
}
|
|
if (eNotation) {
|
|
baseValue *= 10 ** (powerValueSign * powerValue);
|
|
}
|
|
return sign * baseValue;
|
|
}
|
|
|
|
getString() {
|
|
let numParen = 1;
|
|
let done = false;
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
|
|
let ch = this.nextChar();
|
|
while (true) {
|
|
let charBuffered = false;
|
|
switch (ch | 0) {
|
|
case -1:
|
|
warn("Unterminated string");
|
|
done = true;
|
|
break;
|
|
case 0x28: // '('
|
|
++numParen;
|
|
strBuf.push("(");
|
|
break;
|
|
case 0x29: // ')'
|
|
if (--numParen === 0) {
|
|
this.nextChar(); // consume strings ')'
|
|
done = true;
|
|
} else {
|
|
strBuf.push(")");
|
|
}
|
|
break;
|
|
case 0x5c: // '\\'
|
|
ch = this.nextChar();
|
|
switch (ch) {
|
|
case -1:
|
|
warn("Unterminated string");
|
|
done = true;
|
|
break;
|
|
case 0x6e: // 'n'
|
|
strBuf.push("\n");
|
|
break;
|
|
case 0x72: // 'r'
|
|
strBuf.push("\r");
|
|
break;
|
|
case 0x74: // 't'
|
|
strBuf.push("\t");
|
|
break;
|
|
case 0x62: // 'b'
|
|
strBuf.push("\b");
|
|
break;
|
|
case 0x66: // 'f'
|
|
strBuf.push("\f");
|
|
break;
|
|
case 0x5c: // '\'
|
|
case 0x28: // '('
|
|
case 0x29: // ')'
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
case 0x30: // '0'
|
|
case 0x31: // '1'
|
|
case 0x32: // '2'
|
|
case 0x33: // '3'
|
|
case 0x34: // '4'
|
|
case 0x35: // '5'
|
|
case 0x36: // '6'
|
|
case 0x37: // '7'
|
|
let x = ch & 0x0f;
|
|
ch = this.nextChar();
|
|
charBuffered = true;
|
|
if (ch >= /* '0' = */ 0x30 && ch <= /* '7' = */ 0x37) {
|
|
x = (x << 3) + (ch & 0x0f);
|
|
ch = this.nextChar();
|
|
if (ch >= /* '0' = */ 0x30 && ch /* '7' = */ <= 0x37) {
|
|
charBuffered = false;
|
|
x = (x << 3) + (ch & 0x0f);
|
|
}
|
|
}
|
|
strBuf.push(String.fromCharCode(x));
|
|
break;
|
|
case 0x0d: // CR
|
|
if (this.peekChar() === /* LF = */ 0x0a) {
|
|
this.nextChar();
|
|
}
|
|
break;
|
|
case 0x0a: // LF
|
|
break;
|
|
default:
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
}
|
|
if (done) {
|
|
break;
|
|
}
|
|
if (!charBuffered) {
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
return strBuf.join("");
|
|
}
|
|
|
|
getName() {
|
|
let ch, previousCh;
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
|
if (ch === /* '#' = */ 0x23) {
|
|
ch = this.nextChar();
|
|
if (specialChars[ch]) {
|
|
warn(
|
|
"Lexer_getName: " +
|
|
"NUMBER SIGN (#) should be followed by a hexadecimal number."
|
|
);
|
|
strBuf.push("#");
|
|
break;
|
|
}
|
|
const x = toHexDigit(ch);
|
|
if (x !== -1) {
|
|
previousCh = ch;
|
|
ch = this.nextChar();
|
|
const x2 = toHexDigit(ch);
|
|
if (x2 === -1) {
|
|
warn(
|
|
`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` +
|
|
"in hexadecimal number."
|
|
);
|
|
strBuf.push("#", String.fromCharCode(previousCh));
|
|
if (specialChars[ch]) {
|
|
break;
|
|
}
|
|
strBuf.push(String.fromCharCode(ch));
|
|
continue;
|
|
}
|
|
strBuf.push(String.fromCharCode((x << 4) | x2));
|
|
} else {
|
|
strBuf.push("#", String.fromCharCode(ch));
|
|
}
|
|
} else {
|
|
strBuf.push(String.fromCharCode(ch));
|
|
}
|
|
}
|
|
if (strBuf.length > 127) {
|
|
warn(`Name token is longer than allowed by the spec: ${strBuf.length}`);
|
|
}
|
|
return Name.get(strBuf.join(""));
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
*/
|
|
_hexStringWarn(ch) {
|
|
const MAX_HEX_STRING_NUM_WARN = 5;
|
|
|
|
if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
|
|
warn("getHexString - ignoring additional invalid characters.");
|
|
return;
|
|
}
|
|
if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
|
|
// Limit the number of warning messages printed for a `this.getHexString`
|
|
// invocation, since corrupt PDF documents may otherwise spam the console
|
|
// enough to affect general performance negatively.
|
|
return;
|
|
}
|
|
warn(`getHexString - ignoring invalid character: ${ch}`);
|
|
}
|
|
|
|
getHexString() {
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
let ch = this.currentChar;
|
|
let isFirstHex = true;
|
|
let firstDigit, secondDigit;
|
|
this._hexStringNumWarn = 0;
|
|
|
|
while (true) {
|
|
if (ch < 0) {
|
|
warn("Unterminated hex string");
|
|
break;
|
|
} else if (ch === /* '>' = */ 0x3e) {
|
|
this.nextChar();
|
|
break;
|
|
} else if (specialChars[ch] === 1) {
|
|
ch = this.nextChar();
|
|
continue;
|
|
} else {
|
|
if (isFirstHex) {
|
|
firstDigit = toHexDigit(ch);
|
|
if (firstDigit === -1) {
|
|
this._hexStringWarn(ch);
|
|
ch = this.nextChar();
|
|
continue;
|
|
}
|
|
} else {
|
|
secondDigit = toHexDigit(ch);
|
|
if (secondDigit === -1) {
|
|
this._hexStringWarn(ch);
|
|
ch = this.nextChar();
|
|
continue;
|
|
}
|
|
strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
|
|
}
|
|
isFirstHex = !isFirstHex;
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
return strBuf.join("");
|
|
}
|
|
|
|
getObj() {
|
|
// Skip whitespace and comments.
|
|
let comment = false;
|
|
let ch = this.currentChar;
|
|
while (true) {
|
|
if (ch < 0) {
|
|
return EOF;
|
|
}
|
|
if (comment) {
|
|
if (ch === /* LF = */ 0x0a || ch === /* CR = */ 0x0d) {
|
|
comment = false;
|
|
}
|
|
} else if (ch === /* '%' = */ 0x25) {
|
|
comment = true;
|
|
} else if (specialChars[ch] !== 1) {
|
|
break;
|
|
}
|
|
ch = this.nextChar();
|
|
}
|
|
|
|
// Start reading a token.
|
|
switch (ch | 0) {
|
|
case 0x30: // '0'
|
|
case 0x31: // '1'
|
|
case 0x32: // '2'
|
|
case 0x33: // '3'
|
|
case 0x34: // '4'
|
|
case 0x35: // '5'
|
|
case 0x36: // '6'
|
|
case 0x37: // '7'
|
|
case 0x38: // '8'
|
|
case 0x39: // '9'
|
|
case 0x2b: // '+'
|
|
case 0x2d: // '-'
|
|
case 0x2e: // '.'
|
|
return this.getNumber();
|
|
case 0x28: // '('
|
|
return this.getString();
|
|
case 0x2f: // '/'
|
|
return this.getName();
|
|
// array punctuation
|
|
case 0x5b: // '['
|
|
this.nextChar();
|
|
return Cmd.get("[");
|
|
case 0x5d: // ']'
|
|
this.nextChar();
|
|
return Cmd.get("]");
|
|
// hex string or dict punctuation
|
|
case 0x3c: // '<'
|
|
ch = this.nextChar();
|
|
if (ch === 0x3c) {
|
|
// dict punctuation
|
|
this.nextChar();
|
|
return Cmd.get("<<");
|
|
}
|
|
return this.getHexString();
|
|
// dict punctuation
|
|
case 0x3e: // '>'
|
|
ch = this.nextChar();
|
|
if (ch === 0x3e) {
|
|
this.nextChar();
|
|
return Cmd.get(">>");
|
|
}
|
|
return Cmd.get(">");
|
|
case 0x7b: // '{'
|
|
this.nextChar();
|
|
return Cmd.get("{");
|
|
case 0x7d: // '}'
|
|
this.nextChar();
|
|
return Cmd.get("}");
|
|
case 0x29: // ')'
|
|
// Consume the current character in order to avoid permanently hanging
|
|
// the worker thread if `Lexer.getObject` is called from within a loop
|
|
// containing try-catch statements, since we would otherwise attempt
|
|
// to parse the *same* character over and over (fixes issue8061.pdf).
|
|
this.nextChar();
|
|
throw new FormatError(`Illegal character: ${ch}`);
|
|
}
|
|
|
|
// Start reading a command.
|
|
let str = String.fromCharCode(ch);
|
|
const knownCommands = this.knownCommands;
|
|
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
|
// Stop if a known command is found and next character does not make
|
|
// the string a command.
|
|
const possibleCommand = str + String.fromCharCode(ch);
|
|
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
|
|
break;
|
|
}
|
|
if (str.length === 128) {
|
|
throw new FormatError(`Command token too long: ${str.length}`);
|
|
}
|
|
str = possibleCommand;
|
|
knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
|
}
|
|
if (str === "true") {
|
|
return true;
|
|
}
|
|
if (str === "false") {
|
|
return false;
|
|
}
|
|
if (str === "null") {
|
|
return null;
|
|
}
|
|
|
|
if (str === "BI") {
|
|
// Keep track of the current stream position, since it's needed in order
|
|
// to correctly cache inline images; see `Parser.makeInlineImage`.
|
|
this.beginInlineImagePos = this.stream.pos;
|
|
}
|
|
|
|
return Cmd.get(str);
|
|
}
|
|
|
|
peekObj() {
|
|
const streamPos = this.stream.pos,
|
|
currentChar = this.currentChar,
|
|
beginInlineImagePos = this.beginInlineImagePos;
|
|
|
|
let nextObj;
|
|
try {
|
|
nextObj = this.getObj();
|
|
} catch (ex) {
|
|
if (ex instanceof MissingDataException) {
|
|
throw ex;
|
|
}
|
|
warn(`peekObj: ${ex}`);
|
|
}
|
|
// Ensure that we reset *all* relevant `Lexer`-instance state.
|
|
this.stream.pos = streamPos;
|
|
this.currentChar = currentChar;
|
|
this.beginInlineImagePos = beginInlineImagePos;
|
|
|
|
return nextObj;
|
|
}
|
|
|
|
skipToNextLine() {
|
|
let ch = this.currentChar;
|
|
while (ch >= 0) {
|
|
if (ch === /* CR = */ 0x0d) {
|
|
ch = this.nextChar();
|
|
if (ch === /* LF = */ 0x0a) {
|
|
this.nextChar();
|
|
}
|
|
break;
|
|
} else if (ch === /* LF = */ 0x0a) {
|
|
this.nextChar();
|
|
break;
|
|
}
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
}
|
|
|
|
class Linearization {
|
|
static create(stream) {
|
|
function getInt(linDict, name, allowZeroValue = false) {
|
|
const obj = linDict.get(name);
|
|
if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
|
|
return obj;
|
|
}
|
|
throw new Error(
|
|
`The "${name}" parameter in the linearization ` +
|
|
"dictionary is invalid."
|
|
);
|
|
}
|
|
|
|
function getHints(linDict) {
|
|
const hints = linDict.get("H");
|
|
let hintsLength;
|
|
|
|
if (
|
|
Array.isArray(hints) &&
|
|
((hintsLength = hints.length) === 2 || hintsLength === 4)
|
|
) {
|
|
for (let index = 0; index < hintsLength; index++) {
|
|
const hint = hints[index];
|
|
if (!(Number.isInteger(hint) && hint > 0)) {
|
|
throw new Error(
|
|
`Hint (${index}) in the linearization dictionary is invalid.`
|
|
);
|
|
}
|
|
}
|
|
return hints;
|
|
}
|
|
throw new Error("Hint array in the linearization dictionary is invalid.");
|
|
}
|
|
|
|
const parser = new Parser({
|
|
lexer: new Lexer(stream),
|
|
xref: null,
|
|
});
|
|
const obj1 = parser.getObj();
|
|
const obj2 = parser.getObj();
|
|
const obj3 = parser.getObj();
|
|
const linDict = parser.getObj();
|
|
let obj, length;
|
|
if (
|
|
!(
|
|
Number.isInteger(obj1) &&
|
|
Number.isInteger(obj2) &&
|
|
isCmd(obj3, "obj") &&
|
|
isDict(linDict) &&
|
|
isNum((obj = linDict.get("Linearized"))) &&
|
|
obj > 0
|
|
)
|
|
) {
|
|
return null; // No valid linearization dictionary found.
|
|
} else if ((length = getInt(linDict, "L")) !== stream.length) {
|
|
throw new Error(
|
|
'The "L" parameter in the linearization dictionary ' +
|
|
"does not equal the stream length."
|
|
);
|
|
}
|
|
return {
|
|
length,
|
|
hints: getHints(linDict),
|
|
objectNumberFirst: getInt(linDict, "O"),
|
|
endFirst: getInt(linDict, "E"),
|
|
numPages: getInt(linDict, "N"),
|
|
mainXRefEntriesOffset: getInt(linDict, "T"),
|
|
pageFirst: linDict.has("P")
|
|
? getInt(linDict, "P", /* allowZeroValue = */ true)
|
|
: 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
export { Lexer, Linearization, Parser };
|