From 34a53b9f5df4909d7e306a9f070994093f392cd3 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 17 Aug 2019 11:06:27 +0200 Subject: [PATCH 1/2] Inline the `isRef` checks in the various `XRef.fetch` related methods The relevant methods are usually not hot enough for these changes to have an easily measurable effect, however there's been a lot of other cases where similiar inlining has helped performance. (And these changes may help offset the changes made in the next patch.) --- src/core/obj.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 4de709ced..3446f8821 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -1626,14 +1626,14 @@ var XRef = (function XRefClosure() { }, fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) { - if (!isRef(obj)) { - return obj; + if (obj instanceof Ref) { + return this.fetch(obj, suppressEncryption); } - return this.fetch(obj, suppressEncryption); + return obj; }, fetch: function XRef_fetch(ref, suppressEncryption) { - if (!isRef(ref)) { + if (!(ref instanceof Ref)) { throw new Error('ref object is not a reference'); } var num = ref.num; @@ -1768,10 +1768,10 @@ var XRef = (function XRefClosure() { }, async fetchIfRefAsync(obj, suppressEncryption) { - if (!isRef(obj)) { - return obj; + if (obj instanceof Ref) { + return this.fetchAsync(obj, suppressEncryption); } - return this.fetchAsync(obj, suppressEncryption); + return obj; }, async fetchAsync(ref, suppressEncryption) { From 1cd9a28c8126e8461b7fe8c8a253d6d7ee328236 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 17 Aug 2019 11:44:51 +0200 Subject: [PATCH 2/2] Replace the `XRef.cache` Array with a Map instead Given that the different types of `Stream`s will never be cached, this thus implies that the `XRef.cache` Array will *always* be more-or-less sparse. Generally speaking, the longer the document the more sparse the `XRef.cache` will thus become. For example, looking at the `pdf.pdf` file from the test-suite: The length of the `XRef.cache` Array will be a few hundred thousand elements, with approximately 95% of them being empty. Hence it seems pretty clear that an Array isn't really the best data-structure for this kind of cache, and this patch thus changes it to a Map instead. This patch-series was tested using the PDF file from issue 2618, i.e. http://bugzilla-attachments.gnome.org/attachment.cgi?id=226471, with the following manifest file: ``` [ { "id": "issue2618", "file": "../web/pdfs/issue2618.pdf", "md5": "", "rounds": 200, "type": "eq" } ] ``` which gave the following results when comparing this patch-series against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 200 | 2736 | 2736 | 1 | 0.02 | Firefox | Page Request | 200 | 2 | 2 | 0 | -8.26 | faster Firefox | Rendering | 200 | 2733 | 2734 | 1 | 0.03 | ``` --- src/core/obj.js | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 3446f8821..9b74c7b1b 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -1043,8 +1043,7 @@ var XRef = (function XRefClosure() { this.pdfManager = pdfManager; this.entries = []; this.xrefstms = Object.create(null); - // prepare the XRef cache - this.cache = []; + this._cacheMap = new Map(); // Prepare the XRef cache. this.stats = { streamTypes: Object.create(null), fontTypes: Object.create(null), @@ -1636,9 +1635,10 @@ var XRef = (function XRefClosure() { if (!(ref instanceof Ref)) { throw new Error('ref object is not a reference'); } - var num = ref.num; - if (num in this.cache) { - var cacheEntry = this.cache[num]; + const num = ref.num; + + if (this._cacheMap.has(num)) { + const cacheEntry = this._cacheMap.get(num); // In documents with Object Streams, it's possible that cached `Dict`s // have not been assigned an `objId` yet (see e.g. issue3115r.pdf). if (cacheEntry instanceof Dict && !cacheEntry.objId) { @@ -1646,12 +1646,11 @@ var XRef = (function XRefClosure() { } return cacheEntry; } + let xrefEntry = this.getEntry(num); - var xrefEntry = this.getEntry(num); - - // the referenced entry can be free - if (xrefEntry === null) { - return (this.cache[num] = null); + if (xrefEntry === null) { // The referenced entry can be free. + this._cacheMap.set(num, xrefEntry); + return xrefEntry; } if (xrefEntry.uncompressed) { @@ -1709,7 +1708,7 @@ var XRef = (function XRefClosure() { xrefEntry = parser.getObj(); } if (!isStream(xrefEntry)) { - this.cache[num] = xrefEntry; + this._cacheMap.set(num, xrefEntry); } return xrefEntry; }, @@ -1757,7 +1756,7 @@ var XRef = (function XRefClosure() { num = nums[i]; var entry = this.entries[num]; if (entry && entry.offset === tableOffset && entry.gen === i) { - this.cache[num] = entries[i]; + this._cacheMap.set(num, entries[i]); } } xrefEntry = entries[xrefEntry.gen];