pdf.js/src/core/primitives.js

412 lines
9.5 KiB
JavaScript
Raw Normal View History

/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
import { assert, unreachable } from "../shared/util.js";
const EOF = {};
const Name = (function NameClosure() {
let nameCache = Object.create(null);
// eslint-disable-next-line no-shadow
function Name(name) {
this.name = name;
}
Name.prototype = {};
Name.get = function Name_get(name) {
const nameValue = nameCache[name];
// eslint-disable-next-line no-restricted-syntax
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return nameValue ? nameValue : (nameCache[name] = new Name(name));
};
Name._clearCache = function () {
nameCache = Object.create(null);
};
return Name;
})();
const Cmd = (function CmdClosure() {
let cmdCache = Object.create(null);
// eslint-disable-next-line no-shadow
function Cmd(cmd) {
this.cmd = cmd;
}
Cmd.prototype = {};
Cmd.get = function Cmd_get(cmd) {
const cmdValue = cmdCache[cmd];
// eslint-disable-next-line no-restricted-syntax
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return cmdValue ? cmdValue : (cmdCache[cmd] = new Cmd(cmd));
};
Cmd._clearCache = function () {
cmdCache = Object.create(null);
};
return Cmd;
})();
const Dict = (function DictClosure() {
const nonSerializable = function nonSerializableClosure() {
return nonSerializable; // creating closure on some variable
};
// xref is optional
// eslint-disable-next-line no-shadow
function Dict(xref) {
// Map should only be used internally, use functions below to access.
this._map = Object.create(null);
this.xref = xref;
this.objId = null;
this.suppressEncryption = false;
this.__nonSerializable__ = nonSerializable; // disable cloning of the Dict
}
Dict.prototype = {
assignXref: function Dict_assignXref(newXref) {
this.xref = newXref;
},
get size() {
return Object.keys(this._map).length;
},
// automatically dereferences Ref objects
get(key1, key2, key3) {
let value = this._map[key1];
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
if (value === undefined && key2 !== undefined) {
value = this._map[key2];
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
if (value === undefined && key3 !== undefined) {
value = this._map[key3];
}
}
if (value instanceof Ref && this.xref) {
return this.xref.fetch(value, this.suppressEncryption);
}
return value;
},
// Same as get(), but returns a promise and uses fetchIfRefAsync().
async getAsync(key1, key2, key3) {
let value = this._map[key1];
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
if (value === undefined && key2 !== undefined) {
value = this._map[key2];
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
if (value === undefined && key3 !== undefined) {
value = this._map[key3];
}
}
if (value instanceof Ref && this.xref) {
return this.xref.fetchAsync(value, this.suppressEncryption);
}
return value;
},
// Same as get(), but dereferences all elements if the result is an Array.
getArray(key1, key2, key3) {
let value = this.get(key1, key2, key3);
if (!Array.isArray(value) || !this.xref) {
return value;
}
value = value.slice(); // Ensure that we don't modify the Dict data.
for (let i = 0, ii = value.length; i < ii; i++) {
if (!(value[i] instanceof Ref)) {
continue;
}
value[i] = this.xref.fetch(value[i], this.suppressEncryption);
}
return value;
},
// no dereferencing
getRaw: function Dict_getRaw(key) {
return this._map[key];
},
getKeys: function Dict_getKeys() {
return Object.keys(this._map);
},
// no dereferencing
getRawValues: function Dict_getRawValues() {
return Object.values(this._map);
},
set: function Dict_set(key, value) {
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
if (
(typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")) &&
value === undefined
) {
unreachable('Dict.set: The "value" cannot be undefined.');
}
this._map[key] = value;
},
has: function Dict_has(key) {
Slightly simplify the lookup of data in `Dict.{get, getAsync, has}` Note that `Dict.set` will only be called with values returned through `Parser.getObj`, and thus indirectly via `Lexer.getObj`. Since neither of those methods will ever return `undefined`, we can simply assert that that's the case when inserting data into the `Dict` and thus get rid of `in` checks when doing the data lookups. In this case, since `Dict.set` is fairly hot, the patch utilizes an *inline check* and when necessary a direct call to `unreachable` to not affect performance of `gulp server/test` too much (rather than always just calling `assert`). For very large and complex PDF files this will help performance *slightly*, since `Dict.{get, getAsync, has}` is called *a lot* during parsing in the worker. This patch was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 250, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 250 | 2838 | 2820 | -18 | -0.65 | faster Firefox | Page Request | 250 | 1 | 2 | 0 | 11.92 | slower Firefox | Rendering | 250 | 2837 | 2818 | -19 | -0.65 | faster ```
2020-01-31 19:39:48 +09:00
return this._map[key] !== undefined;
},
forEach: function Dict_forEach(callback) {
for (const key in this._map) {
callback(key, this.get(key));
}
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
},
};
Dict.empty = (function () {
const emptyDict = new Dict(null);
emptyDict.set = (key, value) => {
unreachable("Should not call `set` on the empty dictionary.");
};
return emptyDict;
})();
Dict.merge = function ({ xref, dictArray, mergeSubDicts = false }) {
const mergedDict = new Dict(xref);
if (!mergeSubDicts) {
for (const dict of dictArray) {
if (!(dict instanceof Dict)) {
continue;
}
for (const [key, value] of Object.entries(dict._map)) {
if (mergedDict._map[key] === undefined) {
mergedDict._map[key] = value;
}
}
}
return mergedDict.size > 0 ? mergedDict : Dict.empty;
}
const properties = new Map();
for (const dict of dictArray) {
if (!(dict instanceof Dict)) {
continue;
}
for (const [key, value] of Object.entries(dict._map)) {
let property = properties.get(key);
if (property === undefined) {
property = [];
properties.set(key, property);
}
property.push(value);
}
}
for (const [name, values] of properties) {
if (values.length === 1 || !(values[0] instanceof Dict)) {
mergedDict._map[name] = values[0];
continue;
}
const subDict = new Dict(xref);
for (const dict of values) {
if (!(dict instanceof Dict)) {
continue;
}
for (const [key, value] of Object.entries(dict._map)) {
if (subDict._map[key] === undefined) {
subDict._map[key] = value;
}
}
}
if (subDict.size > 0) {
mergedDict._map[name] = subDict;
}
}
properties.clear();
return mergedDict.size > 0 ? mergedDict : Dict.empty;
};
return Dict;
})();
const Ref = (function RefClosure() {
let refCache = Object.create(null);
// eslint-disable-next-line no-shadow
function Ref(num, gen) {
this.num = num;
this.gen = gen;
}
Ref.prototype = {
toString: function Ref_toString() {
// This function is hot, so we make the string as compact as possible.
// |this.gen| is almost always zero, so we treat that case specially.
if (this.gen === 0) {
return `${this.num}R`;
}
return `${this.num}R${this.gen}`;
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
},
};
Ref.get = function (num, gen) {
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
const key = gen === 0 ? `${num}R` : `${num}R${gen}`;
const refValue = refCache[key];
// eslint-disable-next-line no-restricted-syntax
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return refValue ? refValue : (refCache[key] = new Ref(num, gen));
};
Ref._clearCache = function () {
refCache = Object.create(null);
};
return Ref;
})();
// The reference is identified by number and generation.
// This structure stores only one instance of the reference.
class RefSet {
Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes` The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message). In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`. To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient. This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf: ``` [ { "id": "issue6961", "file": "../web/pdfs/issue6961.pdf", "md5": "a80e4357a8fda758d96c2c76f2980b03", "rounds": 100, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, page, stat -- browser | page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | ------------- firefox | 0 | Overall | 100 | 1034 | 555 | -480 | -46.39 | faster firefox | 0 | Page Request | 100 | 489 | 7 | -482 | -98.67 | faster firefox | 0 | Rendering | 100 | 545 | 548 | 2 | 0.45 | firefox | 1 | Overall | 100 | 912 | 428 | -484 | -53.06 | faster firefox | 1 | Page Request | 100 | 487 | 1 | -486 | -99.77 | faster firefox | 1 | Rendering | 100 | 425 | 427 | 2 | 0.51 | ``` --- [1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.
2020-11-05 21:35:33 +09:00
constructor(parent = null) {
if (
(typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")) &&
parent &&
!(parent instanceof RefSet)
) {
unreachable('RefSet: Invalid "parent" value.');
}
this._set = new Set(parent && parent._set);
}
has(ref) {
return this._set.has(ref.toString());
}
put(ref) {
this._set.add(ref.toString());
}
remove(ref) {
this._set.delete(ref.toString());
}
Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes` The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message). In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`. To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient. This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf: ``` [ { "id": "issue6961", "file": "../web/pdfs/issue6961.pdf", "md5": "a80e4357a8fda758d96c2c76f2980b03", "rounds": 100, "type": "eq" } ] ``` which gave the following results when comparing this patch against the `master` branch: ``` -- Grouped By browser, page, stat -- browser | page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | ------------- firefox | 0 | Overall | 100 | 1034 | 555 | -480 | -46.39 | faster firefox | 0 | Page Request | 100 | 489 | 7 | -482 | -98.67 | faster firefox | 0 | Rendering | 100 | 545 | 548 | 2 | 0.45 | firefox | 1 | Overall | 100 | 912 | 428 | -484 | -53.06 | faster firefox | 1 | Page Request | 100 | 487 | 1 | -486 | -99.77 | faster firefox | 1 | Rendering | 100 | 425 | 427 | 2 | 0.51 | ``` --- [1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.
2020-11-05 21:35:33 +09:00
forEach(callback) {
for (const ref of this._set.values()) {
callback(ref);
}
}
clear() {
this._set.clear();
}
}
class RefSetCache {
constructor() {
this._map = new Map();
}
get size() {
return this._map.size;
}
get(ref) {
return this._map.get(ref.toString());
}
has(ref) {
return this._map.has(ref.toString());
}
put(ref, obj) {
this._map.set(ref.toString(), obj);
}
putAlias(ref, aliasRef) {
this._map.set(ref.toString(), this.get(aliasRef));
}
forEach(callback) {
for (const value of this._map.values()) {
callback(value);
}
}
clear() {
this._map.clear();
}
}
function isEOF(v) {
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return v === EOF;
}
function isName(v, name) {
return v instanceof Name && (name === undefined || v.name === name);
}
function isCmd(v, cmd) {
return v instanceof Cmd && (cmd === undefined || v.cmd === cmd);
}
function isDict(v, type) {
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return (
v instanceof Dict && (type === undefined || isName(v.get("Type"), type))
);
}
function isRef(v) {
return v instanceof Ref;
}
function isRefsEqual(v1, v2) {
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
v1 instanceof Ref && v2 instanceof Ref,
"isRefsEqual: Both parameters should be `Ref`s."
);
}
return v1.num === v2.num && v1.gen === v2.gen;
}
function isStream(v) {
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
return typeof v === "object" && v !== null && v.getBytes !== undefined;
}
function clearPrimitiveCaches() {
Cmd._clearCache();
Name._clearCache();
Ref._clearCache();
}
export {
clearPrimitiveCaches,
Cmd,
Dict,
EOF,
isCmd,
isDict,
isEOF,
isName,
isRef,
isRefsEqual,
isStream,
Name,
Ref,
RefSet,
RefSetCache,
};