diff --git a/src/core/parser.js b/src/core/parser.js index 4d9a4267c..c5e141f9c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -632,7 +632,7 @@ class Parser { // Get the length. let length = dict.get("Length"); if (!Number.isInteger(length)) { - info(`Bad length "${length}" in stream`); + info(`Bad length "${length && length.toString()}" in stream.`); length = 0; } diff --git a/src/core/primitives.js b/src/core/primitives.js index cc37a5f68..4b5c53221 100644 --- a/src/core/primitives.js +++ b/src/core/primitives.js @@ -16,6 +16,7 @@ import { assert, shadow, unreachable } from "../shared/util.js"; import { BaseStream } from "./base_stream.js"; +const CIRCULAR_REF = Symbol("CIRCULAR_REF"); const EOF = Symbol("EOF"); const Name = (function NameClosure() { @@ -422,6 +423,7 @@ function clearPrimitiveCaches() { } export { + CIRCULAR_REF, clearPrimitiveCaches, Cmd, Dict, diff --git a/src/core/xref.js b/src/core/xref.js index 5473c335c..7c2c2172a 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -21,7 +21,7 @@ import { InvalidPDFException, warn, } from "../shared/util.js"; -import { Cmd, Dict, isCmd, Ref } from "./primitives.js"; +import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js"; import { DocStats, MissingDataException, @@ -40,6 +40,7 @@ class XRef { this.entries = []; this.xrefstms = Object.create(null); this._cacheMap = new Map(); // Prepare the XRef cache. + this._pendingRefs = new RefSet(); this.stats = new DocStats(pdfManager.msgHandler); this._newRefNum = null; } @@ -733,11 +734,26 @@ class XRef { this._cacheMap.set(num, xrefEntry); return xrefEntry; } + // Prevent circular references, in corrupt PDF documents, from hanging the + // worker-thread. This relies, implicitly, on the parsing being synchronous. + if (this._pendingRefs.has(ref)) { + this._pendingRefs.remove(ref); - if (xrefEntry.uncompressed) { - xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); - } else { - xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + warn(`Ignoring circular reference: ${ref}.`); + return CIRCULAR_REF; + } + this._pendingRefs.put(ref); + + try { + if (xrefEntry.uncompressed) { + xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); + } else { + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); + } + this._pendingRefs.remove(ref); + } catch (ex) { + this._pendingRefs.remove(ref); + throw ex; } if (xrefEntry instanceof Dict) { xrefEntry.objId = ref.toString(); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c4fcf9d30..d8681a1e7 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -492,3 +492,5 @@ !xfa_issue14315.pdf !poppler-67295-0.pdf !poppler-85140-0.pdf +!poppler-91414-0-53.pdf +!poppler-91414-0-54.pdf diff --git a/test/pdfs/poppler-91414-0-53.pdf b/test/pdfs/poppler-91414-0-53.pdf new file mode 100644 index 000000000..3d9305e76 --- /dev/null +++ b/test/pdfs/poppler-91414-0-53.pdf @@ -0,0 +1,70 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 6 0 R >> +stream +2 +endstream +endobj +7 0 obj +<<>> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000500 00000 n +trailer +<< + /Size 8 + /Root 1 0 R + /Info 7 0 R +>> +startxref +520 +%%EOF \ No newline at end of file diff --git a/test/pdfs/poppler-91414-0-54.pdf b/test/pdfs/poppler-91414-0-54.pdf new file mode 100644 index 000000000..c6ef3a691 --- /dev/null +++ b/test/pdfs/poppler-91414-0-54.pdf @@ -0,0 +1,77 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 0 R + /Kids [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +>> +endobj +4 0 obj +<< /Length 43 >> +stream +BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +endobj +6 0 obj +<< /Length 7 0 R >> +stream + foobar +endstream +endobj +7 0 obj +<< /Length 6 0 R >> +stream + foobar +endstream +endobj +8 0 obj +<<>> +endobj +xref +0 9 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +0000000446 00000 n +0000000506 00000 n +0000000566 00000 n +trailer +<< + /Size 9 + /Root 1 0 R + /Info 8 0 R +>> +startxref +586 +%%EOF \ No newline at end of file diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index c8d98acf3..6b8c1c4f2 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -511,6 +511,40 @@ describe("api", function () { await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); + + it("creates pdf doc from PDF files, with circular references", async function () { + const loadingTask1 = getDocument( + buildGetDocumentParams("poppler-91414-0-53.pdf") + ); + const loadingTask2 = getDocument( + buildGetDocumentParams("poppler-91414-0-54.pdf") + ); + expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); + expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument1 = await loadingTask1.promise; + const pdfDocument2 = await loadingTask2.promise; + + expect(pdfDocument1.numPages).toEqual(1); + expect(pdfDocument2.numPages).toEqual(1); + + const pageA = await pdfDocument1.getPage(1); + const pageB = await pdfDocument2.getPage(1); + + expect(pageA instanceof PDFPageProxy).toEqual(true); + expect(pageB instanceof PDFPageProxy).toEqual(true); + + for (const opList of [ + await pageA.getOperatorList(), + await pageB.getOperatorList(), + ]) { + expect(opList.fnArray.length).toBeGreaterThan(5); + expect(opList.argsArray.length).toBeGreaterThan(5); + expect(opList.lastChunk).toEqual(true); + } + + await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); + }); }); describe("PDFWorker", function () {