From 06412a557b1ffc5a1341680d125bba9493ec2c99 Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Thu, 28 Nov 2019 16:13:55 +0100
Subject: [PATCH 1/2] Slighthly re-factor `XRef.fetchCompressed`

 - Change all occurences of `var` to `let`/`const`.

 - Initialize the (temporary) Arrays with the correct sizes upfront.

 - Inline the `isCmd` check. Obviously this won't make a huge difference, but given that the check is only relevant for corrupt documents it cannot hurt.
---
 src/core/obj.js | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/core/obj.js b/src/core/obj.js
index 42ae9cf00..6a6c8ff2e 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -1707,13 +1707,13 @@ var XRef = (function XRefClosure() {
     },
 
     fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
-      var tableOffset = xrefEntry.offset;
-      var stream = this.fetch(Ref.get(tableOffset, 0));
+      const tableOffset = xrefEntry.offset;
+      const stream = this.fetch(Ref.get(tableOffset, 0));
       if (!isStream(stream)) {
         throw new FormatError('bad ObjStm stream');
       }
-      var first = stream.dict.get('First');
-      var n = stream.dict.get('N');
+      const first = stream.dict.get('First');
+      const n = stream.dict.get('N');
       if (!Number.isInteger(first) || !Number.isInteger(n)) {
         throw new FormatError(
           'invalid first and n parameters for ObjStm stream');
@@ -1723,33 +1723,34 @@ var XRef = (function XRefClosure() {
         xref: this,
         allowStreams: true,
       });
-      var i, entries = [], num, nums = [];
+      const nums = new Array(n);
       // read the object numbers to populate cache
-      for (i = 0; i < n; ++i) {
-        num = parser.getObj();
+      for (let i = 0; i < n; ++i) {
+        const num = parser.getObj();
         if (!Number.isInteger(num)) {
           throw new FormatError(
             `invalid object number in the ObjStm stream: ${num}`);
         }
-        nums.push(num);
-        var offset = parser.getObj();
+        const offset = parser.getObj();
         if (!Number.isInteger(offset)) {
           throw new FormatError(
             `invalid object offset in the ObjStm stream: ${offset}`);
         }
+        nums[i] = num;
       }
+      const entries = new Array(n);
       // read stream objects for cache
-      for (i = 0; i < n; ++i) {
-        entries.push(parser.getObj());
+      for (let i = 0; i < n; ++i) {
+        const obj = parser.getObj();
+        entries[i] = obj;
         // The ObjStm should not contain 'endobj'. If it's present, skip over it
         // to support corrupt PDFs (fixes issue 5241, bug 898610, bug 1037816).
-        if (isCmd(parser.buf1, 'endobj')) {
+        if ((parser.buf1 instanceof Cmd) && parser.buf1.cmd === 'endobj') {
           parser.shift();
         }
-        num = nums[i];
-        var entry = this.entries[num];
+        const num = nums[i], entry = this.entries[num];
         if (entry && entry.offset === tableOffset && entry.gen === i) {
-          this._cacheMap.set(num, entries[i]);
+          this._cacheMap.set(num, obj);
         }
       }
       xrefEntry = entries[xrefEntry.gen];

From 168c6aecae30243732f23949216dd4e46ca43f3d Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Thu, 28 Nov 2019 16:16:04 +0100
Subject: [PATCH 2/2] Stop caching Streams in `XRef.fetchCompressed`

I'm slightly surprised that this hasn't actually caused any (known) bugs, but that may be more luck than anything else since it fortunately doesn't seem common for Streams to be defined inside of an 'ObjStm'.[1]

Note that in the `XRef.fetchUncompressed` method we're *not* caching Streams, and that for very good reasons too.

 - Streams, especially the `DecodeStream` ones, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that even the `src/core/` code is now fairly asynchronous, see e.g. the `PartialEvaluator`, it's generally impossible to assert that any one Stream isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Streams isn't going to work in the general case.

All in all, I cannot understand why it'd ever be correct to cache Streams in the `XRef.fetchCompressed` method.

---
[1] One example where that happens is the `issue3115r.pdf` file in the test-suite, where the streams in question are not actually used for anything within the PDF.js code.
---
 src/core/obj.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/core/obj.js b/src/core/obj.js
index 6a6c8ff2e..253a48666 100644
--- a/src/core/obj.js
+++ b/src/core/obj.js
@@ -1748,6 +1748,9 @@ var XRef = (function XRefClosure() {
         if ((parser.buf1 instanceof Cmd) && parser.buf1.cmd === 'endobj') {
           parser.shift();
         }
+        if (isStream(obj)) {
+          continue;
+        }
         const num = nums[i], entry = this.entries[num];
         if (entry && entry.offset === tableOffset && entry.gen === i) {
           this._cacheMap.set(num, obj);