pdf.js/test/unit/parser_spec.js

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

447 lines
14 KiB
JavaScript
Raw Normal View History

2017-01-10 01:40:57 +09:00
/* Copyright 2017 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2012-10-25 00:45:05 +09:00
import { Cmd, EOF, Name } from "../../src/core/primitives.js";
import { Lexer, Linearization, Parser } from "../../src/core/parser.js";
import { FormatError } from "../../src/shared/util.js";
import { StringStream } from "../../src/core/stream.js";
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
describe("parser", function () {
describe("Parser", function () {
describe("inlineStreamSkipEI", function () {
it("should skip over the EI marker if it is found", function () {
const string =
"q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 " +
"/F /A85 ID abc123~> EI Q";
const input = new StringStream(string);
const parser = new Parser({
lexer: new Lexer(input),
xref: null,
allowStreams: true,
});
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.indexOf("Q"));
expect(input.peekByte()).toEqual(0x51); // 'Q'
});
it("should skip to the end of stream if the EI marker is not found", function () {
const string =
"q 1 0 0 1 0 0 cm BI /W 10 /H 10 /BPC 1 /F /A85 ID abc123~> Q";
const input = new StringStream(string);
const parser = new Parser({
lexer: new Lexer(input),
xref: null,
allowStreams: true,
});
parser.inlineStreamSkipEI(input);
expect(input.pos).toEqual(string.length);
expect(input.peekByte()).toEqual(-1);
});
});
});
describe("Lexer", function () {
describe("nextChar", function () {
it("should return and set -1 when the end of the stream is reached", function () {
const input = new StringStream("");
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it("should return and set the character after the current position", function () {
const input = new StringStream("123");
const lexer = new Lexer(input);
expect(lexer.nextChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x32); // '2'
});
});
describe("peekChar", function () {
it("should only return -1 when the end of the stream is reached", function () {
const input = new StringStream("");
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(-1);
expect(lexer.currentChar).toEqual(-1);
});
it("should only return the character after the current position", function () {
const input = new StringStream("123");
const lexer = new Lexer(input);
expect(lexer.peekChar()).toEqual(0x32); // '2'
expect(lexer.currentChar).toEqual(0x31); // '1'
});
});
describe("getNumber", function () {
it("should stop parsing numbers at the end of stream", function () {
const input = new StringStream("11.234");
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(11.234);
});
it("should parse PostScript numbers", function () {
const numbers = [
"-.002",
"34.5",
"-3.62",
"123.6e10",
"1E-5",
"-1.",
"0.0",
"123",
"-98",
"43445",
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
"0",
"+17",
];
for (const number of numbers) {
const input = new StringStream(number);
const lexer = new Lexer(input);
const result = lexer.getNumber(),
expected = parseFloat(number);
if (result !== expected && Math.abs(result - expected) < 1e-15) {
console.error(
`Fuzzy matching "${result}" with "${expected}" to ` +
"work-around rounding bugs in Chromium browsers."
);
expect(true).toEqual(true);
continue;
}
expect(result).toEqual(expected);
}
});
it("should ignore double negative before number", function () {
const input = new StringStream("--205.88");
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(-205.88);
});
it("should ignore minus signs in the middle of number", function () {
const input = new StringStream("205--.88");
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(205.88);
});
it("should ignore line-breaks between operator and digit in number", function () {
const minusInput = new StringStream("-\r\n205.88");
const minusLexer = new Lexer(minusInput);
expect(minusLexer.getNumber()).toEqual(-205.88);
const plusInput = new StringStream("+\r\n205.88");
const plusLexer = new Lexer(plusInput);
expect(plusLexer.getNumber()).toEqual(205.88);
});
it("should treat a single decimal point, or minus/plus sign, as zero", function () {
const validNums = [".", "-", "+", "-.", "+.", "-\r\n.", "+\r\n."];
for (const number of validNums) {
const validInput = new StringStream(number);
const validLexer = new Lexer(validInput);
expect(validLexer.getNumber()).toEqual(0);
}
const invalidNums = ["..", ".-", ".+"];
for (const number of invalidNums) {
const invalidInput = new StringStream(number);
const invalidLexer = new Lexer(invalidInput);
expect(function () {
return invalidLexer.getNumber();
}).toThrowError(FormatError, /^Invalid number:\s/);
}
});
it("should handle glued numbers and operators", function () {
const input = new StringStream("123ET");
const lexer = new Lexer(input);
expect(lexer.getNumber()).toEqual(123);
// The lexer must not have consumed the 'E'
expect(lexer.currentChar).toEqual(0x45); // 'E'
});
2013-02-24 02:35:18 +09:00
});
describe("getString", function () {
it("should stop parsing strings at the end of stream", function () {
const input = new StringStream("(1$4)");
input.getByte = function (super_getByte) {
// Simulating end of file using null (see issue 2766).
const ch = super_getByte.call(input);
return ch === 0x24 /* '$' */ ? -1 : ch;
}.bind(input, input.getByte);
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual("1");
});
2014-03-21 01:50:12 +09:00
it("should ignore escaped CR and LF", function () {
// '(\101\<CR><LF>\102)' should be parsed as 'AB'.
const input = new StringStream("(\\101\\\r\n\\102\\\r\\103\\\n\\104)");
const lexer = new Lexer(input);
expect(lexer.getString()).toEqual("ABCD");
});
2014-03-21 01:50:12 +09:00
});
describe("getHexString", function () {
it("should not throw exception on bad input", function () {
// '7 0 2 15 5 2 2 2 4 3 2 4' should be parsed as '70 21 55 22 24 32'.
const input = new StringStream("<7 0 2 15 5 2 2 2 4 3 2 4>");
const lexer = new Lexer(input);
expect(lexer.getHexString()).toEqual('p!U"$2');
});
});
describe("getName", function () {
it("should handle Names with invalid usage of NUMBER SIGN (#)", function () {
const inputNames = ["/# 680 0 R", "/#AQwerty", "/#A<</B"];
const expectedNames = ["#", "#AQwerty", "#A"];
for (let i = 0, ii = inputNames.length; i < ii; i++) {
const input = new StringStream(inputNames[i]);
const lexer = new Lexer(input);
expect(lexer.getName()).toEqual(Name.get(expectedNames[i]));
}
});
});
describe("getObj", function () {
it(
"should stop immediately when the start of a command is " +
"a non-visible ASCII character (issue 13999)",
function () {
const input = new StringStream("\x14q\nQ");
const lexer = new Lexer(input);
let obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("\x14");
obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("q");
obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("Q");
obj = lexer.getObj();
expect(obj).toEqual(EOF);
}
);
});
2012-10-25 00:45:05 +09:00
});
describe("Linearization", function () {
it("should not find a linearization dictionary", function () {
// Not an actual linearization dictionary.
// prettier-ignore
const stream1 = new StringStream(
"3 0 obj\n" +
"<<\n" +
"/Length 4622\n" +
"/Filter /FlateDecode\n" +
">>\n" +
"endobj"
);
expect(Linearization.create(stream1)).toEqual(null);
// Linearization dictionary with invalid version number.
// prettier-ignore
const stream2 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 0\n" +
">>\n" +
"endobj"
);
expect(Linearization.create(stream2)).toEqual(null);
});
it("should accept a valid linearization dictionary", function () {
// prettier-ignore
const stream = new StringStream(
"131 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H [ 1388 863 ]\n" +
"/L 90\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
const expectedLinearizationDict = {
length: 90,
hints: [1388, 863],
objectNumberFirst: 133,
endFirst: 43573,
numPages: 18,
mainXRefEntriesOffset: 193883,
pageFirst: 0,
};
expect(Linearization.create(stream)).toEqual(expectedLinearizationDict);
});
it(
"should reject a linearization dictionary with invalid " +
"integer parameters",
function () {
// The /L parameter should be equal to the stream length.
// prettier-ignore
const stream1 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H [ 1388 863 ]\n" +
"/L 196622\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream1);
}).toThrow(
new Error(
'The "L" parameter in the linearization ' +
"dictionary does not equal the stream length."
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
)
);
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
// The /E parameter should not be zero.
// prettier-ignore
const stream2 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H [ 1388 863 ]\n" +
"/L 84\n" +
"/E 0\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream2);
}).toThrow(
new Error(
'The "E" parameter in the linearization dictionary is invalid.'
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
)
);
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
// The /O parameter should be an integer.
// prettier-ignore
const stream3 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O /abc\n" +
"/H [ 1388 863 ]\n" +
"/L 89\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream3);
}).toThrow(
new Error(
'The "O" parameter in the linearization dictionary is invalid.'
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
)
);
}
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
);
it("should reject a linearization dictionary with invalid hint parameters", function () {
// The /H parameter should be an array.
// prettier-ignore
const stream1 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H 1388\n" +
"/L 80\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream1);
}).toThrow(
new Error("Hint array in the linearization dictionary is invalid.")
);
// The hint array should contain two, or four, elements.
// prettier-ignore
const stream2 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H [ 1388 ]\n" +
"/L 84\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream2);
}).toThrow(
new Error("Hint array in the linearization dictionary is invalid.")
);
// The hint array should not contain zero.
// prettier-ignore
const stream3 = new StringStream(
"1 0 obj\n" +
"<<\n" +
"/Linearized 1\n" +
"/O 133\n" +
"/H [ 1388 863 0 234]\n" +
"/L 93\n" +
"/E 43573\n" +
"/N 18\n" +
"/T 193883\n" +
">>\n" +
"endobj"
);
expect(function () {
return Linearization.create(stream3);
}).toThrow(
new Error("Hint (2) in the linearization dictionary is invalid.")
);
});
});
2012-10-25 00:45:05 +09:00
});