From a807ffe907a417f27a26e93a0348fe6465aacad3 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 26 Nov 2021 14:11:39 +0100 Subject: [PATCH] Prevent circular references in XRef tables from hanging the worker-thread (issue 14303) *Please note:* While this patch on its own is sufficient to prevent the worker-thread from hanging, however in combination with PR 14311 these PDF documents will both load *and* render correctly. Rather than focusing on the particular structure of these PDF documents, it seemed (at least to me) to make sense to try and prevent all circular references when fetching/looking-up data using the XRef table. To avoid a solution that required tracking the references manually everywhere, the implementation settled on here instead handles that internally in the `XRef.fetch`-method. This should work, since that method *and* the `Parser`/`Lexer`-implementations are completely synchronous. Note also that the existing `XRef`-caching, used for all data-types *except* Streams, should hopefully help to lessen the performance impact of these changes. One *potential* problem with these changes could be certain *browser* exceptions, since those are generally not catchable in JavaScript code, however those would most likely "stop" worker-thread parsing anyway (at least I hope so). Finally, note that I settled on returning dummy-data rather than throwing an exception. This was done to allow parsing, for the rest of the document, to continue such that *one* bad reference doesn't prevent an entire document from loading. Fixes two of the issues listed in issue 14303, namely the `poppler-91414-0.zip-2.gz-53.pdf` and `poppler-91414-0.zip-2.gz-54.pdf` documents. --- src/core/parser.js | 2 +- src/core/primitives.js | 2 + src/core/xref.js | 26 ++++++++--- test/pdfs/.gitignore | 2 + test/pdfs/poppler-91414-0-53.pdf | 70 +++++++++++++++++++++++++++++ test/pdfs/poppler-91414-0-54.pdf | 77 ++++++++++++++++++++++++++++++++ test/unit/api_spec.js | 34 ++++++++++++++ 7 files changed, 207 insertions(+), 6 deletions(-) create mode 100644 test/pdfs/poppler-91414-0-53.pdf create mode 100644 test/pdfs/poppler-91414-0-54.pdf diff --git a/src/core/parser.js b/src/core/parser.js index 4d9a4267c..c5e141f9c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -632,7 +632,7 @@ class Parser { // Get the length. let length = dict.get("Length"); if (!Number.isInteger(length)) { - info(`Bad length "${length}" in stream`); + info(`Bad length "${length && length.toString()}" in stream.`); length = 0; } diff --git a/src/core/primitives.js b/src/core/primitives.js index cc37a5f68..4b5c53221 100644 --- a/src/core/primitives.js +++ b/src/core/primitives.js @@ -16,6 +16,7 @@ import { assert, shadow, unreachable } from "../shared/util.js"; import { BaseStream } from "./base_stream.js"; +const CIRCULAR_REF = Symbol("CIRCULAR_REF"); const EOF = Symbol("EOF"); const Name = (function NameClosure() { @@ -422,6 +423,7 @@ function clearPrimitiveCaches() { } export { + CIRCULAR_REF, clearPrimitiveCaches, Cmd, Dict, diff --git a/src/core/xref.js b/src/core/xref.js index 5473c335c..7c2c2172a 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -21,7 +21,7 @@ import { InvalidPDFException, warn, } from "../shared/util.js"; -import { Cmd, Dict, isCmd, Ref } from "./primitives.js"; +import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js"; import { DocStats, MissingDataException, @@ -40,6 +40,7 @@ class XRef { this.entries = []; this.xrefstms = Object.create(null); this._cacheMap = new Map(); // Prepare the XRef cache. + this._pendingRefs = new RefSet(); this.stats = new DocStats(pdfManager.msgHandler); this._newRefNum = null; } @@ -733,11 +734,26 @@ class XRef { this._cacheMap.set(num, xrefEntry); return xrefEntry; } + // Prevent circular references, in corrupt PDF documents, from hanging the + // worker-thread. This relies, implicitly, on the parsing being synchronous. + if (this._pendingRefs.has(ref)) { + this._pendingRefs.remove(ref); - if (xrefEntry.uncompressed) { - xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); - } else { - xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + warn(`Ignoring circular reference: ${ref}.`); + return CIRCULAR_REF; + } + this._pendingRefs.put(ref); + + try { + if (xrefEntry.uncompressed) { + xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); + } else { + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + } + this._pendingRefs.remove(ref); + } catch (ex) { + this._pendingRefs.remove(ref); + throw ex; } if (xrefEntry instanceof Dict) { xrefEntry.objId = ref.toString(); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c4fcf9d30..d8681a1e7 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -492,3 +492,5 @@ !xfa_issue14315.pdf !poppler-67295-0.pdf !poppler-85140-0.pdf +!poppler-91414-0-53.pdf +!poppler-91414-0-54.pdf diff --git a/test/pdfs/poppler-91414-0-53.pdf b/test/pdfs/poppler-91414-0-53.pdf new file mode 100644 index 000000000..3d9305e76 --- /dev/null +++ b/test/pdfs/poppler-91414-0-53.pdf @@ -0,0 +1,70 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 6 0 R >> +stream +2 +endstream +endobj +7 0 obj +<<>> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000500 00000 n +trailer +<< + /Size 8 + /Root 1 0 R + /Info 7 0 R +>> +startxref +520 +%%EOF \ No newline at end of file diff --git a/test/pdfs/poppler-91414-0-54.pdf b/test/pdfs/poppler-91414-0-54.pdf new file mode 100644 index 000000000..c6ef3a691 --- /dev/null +++ b/test/pdfs/poppler-91414-0-54.pdf @@ -0,0 +1,77 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 7 0 R >> +stream + foobar +endstream +endobj +7 0 obj +<< /Length 6 0 R >> +stream + foobar +endstream +endobj +8 0 obj +<<>> +endobj +xref +0 9 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000506 00000 n +0000000566 00000 n +trailer +<< + /Size 9 + /Root 1 0 R + /Info 8 0 R +>> +startxref +586 +%%EOF \ No newline at end of file diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index c8d98acf3..6b8c1c4f2 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -511,6 +511,40 @@ describe("api", function () { await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); + + it("creates pdf doc from PDF files, with circular references", async function () { + const loadingTask1 = getDocument( + buildGetDocumentParams("poppler-91414-0-53.pdf") + ); + const loadingTask2 = getDocument( + buildGetDocumentParams("poppler-91414-0-54.pdf") + ); + expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); + expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument1 = await loadingTask1.promise; + const pdfDocument2 = await loadingTask2.promise; + + expect(pdfDocument1.numPages).toEqual(1); + expect(pdfDocument2.numPages).toEqual(1); + + const pageA = await pdfDocument1.getPage(1); + const pageB = await pdfDocument2.getPage(1); + + expect(pageA instanceof PDFPageProxy).toEqual(true); + expect(pageB instanceof PDFPageProxy).toEqual(true); + + for (const opList of [ + await pageA.getOperatorList(), + await pageB.getOperatorList(), + ]) { + expect(opList.fnArray.length).toBeGreaterThan(5); + expect(opList.argsArray.length).toBeGreaterThan(5); + expect(opList.lastChunk).toEqual(true); + } + + await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); + }); }); describe("PDFWorker", function () {