Prevent circular references in XRef tables from hanging the worker-thread (issue 14303)

*Please note:* While this patch on its own is sufficient to prevent the worker-thread from hanging, however in combination with PR 14311 these PDF documents will both load *and* render correctly.

Rather than focusing on the particular structure of these PDF documents, it seemed (at least to me) to make sense to try and prevent all circular references when fetching/looking-up data using the XRef table.
To avoid a solution that required tracking the references manually everywhere, the implementation settled on here instead handles that internally in the `XRef.fetch`-method. This should work, since that method *and* the `Parser`/`Lexer`-implementations are completely synchronous.

Note also that the existing `XRef`-caching, used for all data-types *except* Streams, should hopefully help to lessen the performance impact of these changes.
One *potential* problem with these changes could be certain *browser* exceptions, since those are generally not catchable in JavaScript code, however those would most likely "stop" worker-thread parsing anyway (at least I hope so).

Finally, note that I settled on returning dummy-data rather than throwing an exception. This was done to allow parsing, for the rest of the document, to continue such that *one* bad reference doesn't prevent an entire document from loading.

Fixes two of the issues listed in issue 14303, namely the `poppler-91414-0.zip-2.gz-53.pdf` and `poppler-91414-0.zip-2.gz-54.pdf` documents.
This commit is contained in:
Jonas Jenwald 2021-11-26 14:11:39 +01:00
parent a669fce762
commit a807ffe907
7 changed files with 207 additions and 6 deletions

View File

@ -632,7 +632,7 @@ class Parser {
// Get the length.
let length = dict.get("Length");
if (!Number.isInteger(length)) {
info(`Bad length "${length}" in stream`);
info(`Bad length "${length && length.toString()}" in stream.`);
length = 0;
}

View File

@ -16,6 +16,7 @@
import { assert, shadow, unreachable } from "../shared/util.js";
import { BaseStream } from "./base_stream.js";
const CIRCULAR_REF = Symbol("CIRCULAR_REF");
const EOF = Symbol("EOF");
const Name = (function NameClosure() {
@ -422,6 +423,7 @@ function clearPrimitiveCaches() {
}
export {
CIRCULAR_REF,
clearPrimitiveCaches,
Cmd,
Dict,

View File

@ -21,7 +21,7 @@ import {
InvalidPDFException,
warn,
} from "../shared/util.js";
import { Cmd, Dict, isCmd, Ref } from "./primitives.js";
import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js";
import {
DocStats,
MissingDataException,
@ -40,6 +40,7 @@ class XRef {
this.entries = [];
this.xrefstms = Object.create(null);
this._cacheMap = new Map(); // Prepare the XRef cache.
this._pendingRefs = new RefSet();
this.stats = new DocStats(pdfManager.msgHandler);
this._newRefNum = null;
}
@ -733,11 +734,26 @@ class XRef {
this._cacheMap.set(num, xrefEntry);
return xrefEntry;
}
// Prevent circular references, in corrupt PDF documents, from hanging the
// worker-thread. This relies, implicitly, on the parsing being synchronous.
if (this._pendingRefs.has(ref)) {
this._pendingRefs.remove(ref);
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
warn(`Ignoring circular reference: ${ref}.`);
return CIRCULAR_REF;
}
this._pendingRefs.put(ref);
try {
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
this._pendingRefs.remove(ref);
} catch (ex) {
this._pendingRefs.remove(ref);
throw ex;
}
if (xrefEntry instanceof Dict) {
xrefEntry.objId = ref.toString();

View File

@ -492,3 +492,5 @@
!xfa_issue14315.pdf
!poppler-67295-0.pdf
!poppler-85140-0.pdf
!poppler-91414-0-53.pdf
!poppler-91414-0-54.pdf

View File

@ -0,0 +1,70 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 6 0 R >>
stream
2
endstream
endobj
7 0 obj
<<>>
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000500 00000 n
trailer
<<
/Size 8
/Root 1 0 R
/Info 7 0 R
>>
startxref
520
%%EOF

View File

@ -0,0 +1,77 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 7 0 R >>
stream
foobar
endstream
endobj
7 0 obj
<< /Length 6 0 R >>
stream
foobar
endstream
endobj
8 0 obj
<<>>
endobj
xref
0 9
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000506 00000 n
0000000566 00000 n
trailer
<<
/Size 9
/Root 1 0 R
/Info 8 0 R
>>
startxref
586
%%EOF

View File

@ -511,6 +511,40 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
it("creates pdf doc from PDF files, with circular references", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-91414-0-53.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-91414-0-54.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
const pageA = await pdfDocument1.getPage(1);
const pageB = await pdfDocument2.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);
expect(pageB instanceof PDFPageProxy).toEqual(true);
for (const opList of [
await pageA.getOperatorList(),
await pageB.getOperatorList(),
]) {
expect(opList.fnArray.length).toBeGreaterThan(5);
expect(opList.argsArray.length).toBeGreaterThan(5);
expect(opList.lastChunk).toEqual(true);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
});
describe("PDFWorker", function () {