From ca8d2bdce4252b72bec83ef1f00a467510f5a94b Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Thu, 25 Nov 2021 13:28:24 +0100
Subject: [PATCH] Abort parsing when the XRef /W-array contain bogus entries
 (issue 14303)

For this particular PDF document, we have `/W [1 2 166666666666666666666666666]` which obviously makes no sense.

While this patch makes no attempt at actually validating the entries in the /W-array, we'll now simply abort all processing when the end of the PDF document has been reached (thus preventing hanging the browser).
Please note that this patch doesn't enable the PDF document to be loaded/rendered, but at least it fails "correctly" now.

Fixes one of the issues listed in issue 14303, namely the `REDHAT-1531897-0.pdf`document.
---
 src/core/xref.js               |  18 +++++++++++++++---
 test/pdfs/.gitignore           |   1 +
 test/pdfs/REDHAT-1531897-0.pdf | Bin 0 -> 871 bytes
 test/unit/api_spec.js          |  20 ++++++++++++++++++++
 4 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 test/pdfs/REDHAT-1531897-0.pdf
diff --git a/src/core/xref.js b/src/core/xref.js
index 37256085e..568c44cad 100644
--- a/src/core/xref.js
+++ b/src/core/xref.js
@@ -323,17 +323,29 @@ class XRef {
           offset = 0,
           generation = 0;
         for (j = 0; j < typeFieldWidth; ++j) {
-          type = (type << 8) | stream.getByte();
+          const typeByte = stream.getByte();
+          if (typeByte === -1) {
+            throw new FormatError("Invalid XRef byteWidths 'type'.");
+          }
+          type = (type << 8) | typeByte;
         }
         // if type field is absent, its default value is 1
         if (typeFieldWidth === 0) {
           type = 1;
         }
         for (j = 0; j < offsetFieldWidth; ++j) {
-          offset = (offset << 8) | stream.getByte();
+          const offsetByte = stream.getByte();
+          if (offsetByte === -1) {
+            throw new FormatError("Invalid XRef byteWidths 'offset'.");
+          }
+          offset = (offset << 8) | offsetByte;
         }
         for (j = 0; j < generationFieldWidth; ++j) {
-          generation = (generation << 8) | stream.getByte();
+          const generationByte = stream.getByte();
+          if (generationByte === -1) {
+            throw new FormatError("Invalid XRef byteWidths 'generation'.");
+          }
+          generation = (generation << 8) | generationByte;
         }
         const entry = {};
         entry.offset = offset;
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 1b7bd6cee..1e0488f11 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -488,3 +488,4 @@
 !rc_annotation.pdf
 !issue14267.pdf
 !PDFBOX-4352-0.pdf
+!REDHAT-1531897-0.pdf
diff --git a/test/pdfs/REDHAT-1531897-0.pdf b/test/pdfs/REDHAT-1531897-0.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..8978e307c506d65172a1614122592e5a18fd0a98
GIT binary patch
literal 871
zcmY!laB<T$)HCK)ef0SJWnM0G1p|frq%2+=8-1V5ywt>^%&OEB1w(xw1#?RiQ+<C0
zOMO=bV^bp&eLtXRh=RG9shPfqLbQU3siA_Qv3abW9dBx03djfr?0}cc&=_HfOKNg{
zN@_r2QEoBNO6UBX(%iga1rz;%qSTbk<dXa%1w$h{JAJpzoRZWceYc#%l2nKmeNUHY
z8xt32OG8IP3v*LPGZ%9Mb4OQaBLib6Cj%oxXG>#813McxGiOU9Cu1`UM+-+oV-o`l
z6H{|XLl<L9Q%7TSS0_umSbfjDl+=o7a|I(q5IZej!3^lUAbp?Iy!4U`1yduCTgpHl
zHP;Wy&o5E1080l014hBfLO-OkAXPsiC^bz#JlasfNWstyCjf>`aY<2XVlFQ*<Urc|
zld^(Ka^a#}8TV{XALKe@AmDm2e&$@>MRSj6g?cTFxWlQ(a($!cONSQ@My`LVnY;RT
z*BCHx_y(7C`^a=3jO5A>ows<qlg;<Anv3&!p2?mGTzoNN{nj|eb#?34tTS=(v*EI_
zuv)Qw#VPIG_SbgvWbZYfdM|gwuSd}-+j;E7VvqT2UkDcBI_SP)8;7!c#f3LNriN6i
zbMD*WKSd$ku|{aeS8)l=imTh?8QB=p)2&l@xqvYY^$9etO$f!PF@0kcJr<2I<IWr-
z>I@JO1dNKLq=bZoq$jqAjvTO`+S$0Uy|Zy*Yv;#}Dih9f2qZ9QMKX(E35ep7#G;al
VqSQ2CGB-5i<x*93^>^dt0suU#5@!Ga

literal 0
HcmV?d00001

diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js
index 9a5428bb1..f61d19a35 100644
--- a/test/unit/api_spec.js
+++ b/test/unit/api_spec.js
@@ -458,6 +458,26 @@ describe("api", function () {
 
       await loadingTask.destroy();
     });
+
+    it("creates pdf doc from PDF file with bad XRef byteWidths", async function () {
+      // A corrupt PDF file, where the XRef /W-array have (some) bogus entries.
+      const loadingTask = getDocument(
+        buildGetDocumentParams("REDHAT-1531897-0.pdf")
+      );
+      expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
+
+      try {
+        await loadingTask.promise;
+
+        // Shouldn't get here.
+        expect(false).toEqual(true);
+      } catch (reason) {
+        expect(reason instanceof InvalidPDFException).toEqual(true);
+        expect(reason.message).toEqual("Invalid PDF structure.");
+      }
+
+      await loadingTask.destroy();
+    });
   });
 
   describe("PDFWorker", function () {