From d6c095038910178a5218b385ebd26a6520c58e86 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 27 Jul 2023 17:41:08 +0200 Subject: [PATCH] Avoid eagerly matching "trailer"-strings when searching for incomplete objects in `XRef.indexObjects` (issue 16759, PR 15854 follow-up, bug 1845762) When searching for "endobj"-operators, make sure that we don't accidentally match a "trailer"-string in /Content-streams without /Filter-entries (i.e. streams that contain "raw" and thus human-readable data). --- src/core/xref.js | 2 +- test/pdfs/issue16759.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/issue16759.pdf.link diff --git a/src/core/xref.js b/src/core/xref.js index 5c9946c96..6c32f6ee4 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -431,7 +431,7 @@ class XRef { } return skipped; } - const gEndobjRegExp = /\b(endobj|\d+\s+\d+\s+obj|xref|trailer)\b/g; + const gEndobjRegExp = /\b(endobj|\d+\s+\d+\s+obj|xref|trailer\s*<<)\b/g; const gStartxrefRegExp = /\b(startxref|\d+\s+\d+\s+obj)\b/g; const objRegExp = /^(\d+)\s+(\d+)\s+obj\b/; diff --git a/test/pdfs/issue16759.pdf.link b/test/pdfs/issue16759.pdf.link new file mode 100644 index 000000000..de4f14f8f --- /dev/null +++ b/test/pdfs/issue16759.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/12185373/issue16759.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 24d440ba9..fe03910e5 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1844,6 +1844,14 @@ "type": "eq", "annotations": true }, + { "id": "issue16759", + "file": "pdfs/issue16759.pdf", + "md5": "07d97ae84f3d757e7ed20c628b94ecd5", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "issue9105_other", "file": "pdfs/issue9105_other.pdf", "md5": "4c8b9c2cceb9c5d621e1d50b3dc38efc",