From ffc847eaa55dd5ce239402c24522fac146665677 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 11 Oct 2019 20:39:02 +0200 Subject: [PATCH] Allow over-writing entries, in `XRef.indexObjects`, only when the generation number matches (issues 11230, 11139, 9552, 9129, 7303) This patch is making me somewhat worried about future regressions, since it's certainly easy to imagine this completely breaking certain kinds of corrupt/edited PDF documents while fixing others.[1] Obviously it passes all existing reference tests (and even improves one), however compared to many other patches there's no telling how much it could break. The only reason that I'm even submitting this patch, is because of the number of open issues that it would address. Generally speaking though, the best course of action would probably be if `XRef.indexObjects` was re-written to be much more robust (since it currently feels somewhat hand-wavy in parts). E.g. by actually checking/validating more of the objects before committing to them. --- [1] Especially given that it's reverting part of PR 5910, however in the case of issue 5909 it seems that other (more recent) changes have actually made that PR redundant. --- src/core/obj.js | 2 +- test/pdfs/issue11139.pdf.link | 1 + test/pdfs/issue11230.pdf.link | 1 + test/pdfs/issue7303.pdf.link | 1 + test/pdfs/issue9129.pdf.link | 1 + test/pdfs/issue9552.pdf.link | 1 + test/test_manifest.json | 43 +++++++++++++++++++++++++++++++++++ 7 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/issue11139.pdf.link create mode 100644 test/pdfs/issue11230.pdf.link create mode 100644 test/pdfs/issue7303.pdf.link create mode 100644 test/pdfs/issue9129.pdf.link create mode 100644 test/pdfs/issue9552.pdf.link diff --git a/src/core/obj.js b/src/core/obj.js index e854e1e1f..1f3053db0 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -1415,7 +1415,7 @@ var XRef = (function XRefClosure() { position += skipUntil(buffer, position, startxrefBytes); } else if ((m = objRegExp.exec(token))) { const num = m[1] | 0, gen = m[2] | 0; - if (typeof this.entries[num] === 'undefined') { + if (!this.entries[num] || this.entries[num].gen === gen) { this.entries[num] = { offset: position - stream.start, gen, diff --git a/test/pdfs/issue11139.pdf.link b/test/pdfs/issue11139.pdf.link new file mode 100644 index 000000000..aa66e87a7 --- /dev/null +++ b/test/pdfs/issue11139.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3602024/FR.Cekada.SedesUnCum.pdf diff --git a/test/pdfs/issue11230.pdf.link b/test/pdfs/issue11230.pdf.link new file mode 100644 index 000000000..a45cc1a3c --- /dev/null +++ b/test/pdfs/issue11230.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3717821/Y._Tsividis_A_First_Lab_in_Circuits_and_Electronics.pdf diff --git a/test/pdfs/issue7303.pdf.link b/test/pdfs/issue7303.pdf.link new file mode 100644 index 000000000..eae35adb6 --- /dev/null +++ b/test/pdfs/issue7303.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/254710/ws_protectyourwork_e.pdf diff --git a/test/pdfs/issue9129.pdf.link b/test/pdfs/issue9129.pdf.link new file mode 100644 index 000000000..35b73744c --- /dev/null +++ b/test/pdfs/issue9129.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3721643/issue9129.pdf diff --git a/test/pdfs/issue9552.pdf.link b/test/pdfs/issue9552.pdf.link new file mode 100644 index 000000000..a8e41b6dc --- /dev/null +++ b/test/pdfs/issue9552.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/3721640/issue9552.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 2fd3a1d93..d1c582360 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1378,14 +1378,40 @@ "rounds": 1, "type": "eq" }, + { "id": "issue7303", + "file": "pdfs/issue7303.pdf", + "md5": "3a5a4ab6755d6c3b0c490996b83d69d2", + "link": true, + "rounds": 1, + "lastPage": 2, + "type": "eq" + }, { "id": "issue7496", "file": "pdfs/issue7496.pdf", "md5": "b422981ae781166e75c0fb4c3634ed96", "link": true, "rounds": 1, "lastPage": 2, + "type": "eq", + "annotations": true + }, + { "id": "issue9129", + "file": "pdfs/issue9129.pdf", + "md5": "939ffc8d6d29b1d74e9d0f98b227b97f", + "link": true, + "rounds": 1, + "lastPage": 1, "type": "eq" }, + { "id": "issue9552", + "file": "pdfs/issue9552.pdf", + "md5": "7f80fd5b426926f88fd2a9fdc02cd3bd", + "link": true, + "rounds": 1, + "lastPage": 1, + "type": "eq", + "annotations": true + }, { "id": "issue10326", "file": "pdfs/issue10326.pdf", "md5": "015c13b09ef735ea1204f38992c60487", @@ -1394,6 +1420,23 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue11139", + "file": "pdfs/issue11139.pdf", + "md5": "006dd4f4bb1878bc14a12072d81a4524", + "link": true, + "rounds": 1, + "lastPage": 1, + "type": "eq" + }, + { "id": "issue11230", + "file": "pdfs/issue11230.pdf", + "md5": "db0a1464d8f9f3ce079b52e0cacdccd3", + "link": true, + "rounds": 1, + "firstPage": 100, + "lastPage": 100, + "type": "eq" + }, { "id": "issue7544", "file": "pdfs/issue7544.pdf", "md5": "87e3a9fc7d6a6c1bd5b53af6926ce48e",