Replace the XRef.cache Array with a Map instead

Given that the different types of `Stream`s will never be cached, this thus implies that the `XRef.cache` Array will *always* be more-or-less sparse.
Generally speaking, the longer the document the more sparse the `XRef.cache` will thus become. For example, looking at the `pdf.pdf` file from the test-suite: The length of the `XRef.cache` Array will be a few hundred thousand elements, with approximately 95% of them being empty.

Hence it seems pretty clear that an Array isn't really the best data-structure for this kind of cache, and this patch thus changes it to a Map instead.

This patch-series was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file:
```
[
    {  "id": "issue2618",
       "file": "../web/pdfs/issue2618.pdf",
       "md5": "",
       "rounds": 200,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch-series against the `master` branch:
```
-- Grouped By browser, stat --
browser | stat         | Count | Baseline(ms) | Current(ms) | +/- |    %  | Result(P<.05)
------- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
Firefox | Overall      |   200 |         2736 |        2736 |   1 |  0.02 |
Firefox | Page Request |   200 |            2 |           2 |   0 | -8.26 |        faster
Firefox | Rendering    |   200 |         2733 |        2734 |   1 |  0.03 |
```
This commit is contained in:
Jonas Jenwald 2019-08-17 11:44:51 +02:00
parent 34a53b9f5d
commit 1cd9a28c81

View File

@ -1043,8 +1043,7 @@ var XRef = (function XRefClosure() {
this.pdfManager = pdfManager;
this.entries = [];
this.xrefstms = Object.create(null);
// prepare the XRef cache
this.cache = [];
this._cacheMap = new Map(); // Prepare the XRef cache.
this.stats = {
streamTypes: Object.create(null),
fontTypes: Object.create(null),
@ -1636,9 +1635,10 @@ var XRef = (function XRefClosure() {
if (!(ref instanceof Ref)) {
throw new Error('ref object is not a reference');
}
var num = ref.num;
if (num in this.cache) {
var cacheEntry = this.cache[num];
const num = ref.num;
if (this._cacheMap.has(num)) {
const cacheEntry = this._cacheMap.get(num);
// In documents with Object Streams, it's possible that cached `Dict`s
// have not been assigned an `objId` yet (see e.g. issue3115r.pdf).
if (cacheEntry instanceof Dict && !cacheEntry.objId) {
@ -1646,12 +1646,11 @@ var XRef = (function XRefClosure() {
}
return cacheEntry;
}
let xrefEntry = this.getEntry(num);
var xrefEntry = this.getEntry(num);
// the referenced entry can be free
if (xrefEntry === null) {
return (this.cache[num] = null);
if (xrefEntry === null) { // The referenced entry can be free.
this._cacheMap.set(num, xrefEntry);
return xrefEntry;
}
if (xrefEntry.uncompressed) {
@ -1709,7 +1708,7 @@ var XRef = (function XRefClosure() {
xrefEntry = parser.getObj();
}
if (!isStream(xrefEntry)) {
this.cache[num] = xrefEntry;
this._cacheMap.set(num, xrefEntry);
}
return xrefEntry;
},
@ -1757,7 +1756,7 @@ var XRef = (function XRefClosure() {
num = nums[i];
var entry = this.entries[num];
if (entry && entry.offset === tableOffset && entry.gen === i) {
this.cache[num] = entries[i];
this._cacheMap.set(num, entries[i]);
}
}
xrefEntry = entries[xrefEntry.gen];