Merge pull request #14312 from Snuffleupagus/XRef-circular-reference

Prevent circular references in XRef tables from hanging the worker-thread (issue 14303)
This commit is contained in:
Tim van der Meij 2021-11-28 14:07:02 +01:00 committed by GitHub
commit c14552874b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 227 additions and 37 deletions

View File

@ -632,7 +632,7 @@ class Parser {
// Get the length.
let length = dict.get("Length");
if (!Number.isInteger(length)) {
info(`Bad length "${length}" in stream`);
info(`Bad length "${length && length.toString()}" in stream.`);
length = 0;
}

View File

@ -16,6 +16,7 @@
import { assert, shadow, unreachable } from "../shared/util.js";
import { BaseStream } from "./base_stream.js";
const CIRCULAR_REF = Symbol("CIRCULAR_REF");
const EOF = Symbol("EOF");
const Name = (function NameClosure() {
@ -422,6 +423,7 @@ function clearPrimitiveCaches() {
}
export {
CIRCULAR_REF,
clearPrimitiveCaches,
Cmd,
Dict,

View File

@ -21,15 +21,7 @@ import {
InvalidPDFException,
warn,
} from "../shared/util.js";
import {
Cmd,
Dict,
isCmd,
isDict,
isRef,
isStream,
Ref,
} from "./primitives.js";
import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js";
import {
DocStats,
MissingDataException,
@ -38,6 +30,7 @@ import {
XRefParseException,
} from "./core_utils.js";
import { Lexer, Parser } from "./parser.js";
import { BaseStream } from "./base_stream.js";
import { CipherTransformFactory } from "./crypto.js";
class XRef {
@ -47,6 +40,7 @@ class XRef {
this.entries = [];
this.xrefstms = Object.create(null);
this._cacheMap = new Map(); // Prepare the XRef cache.
this._pendingRefs = new RefSet();
this.stats = new DocStats(pdfManager.msgHandler);
this._newRefNum = null;
}
@ -88,7 +82,7 @@ class XRef {
}
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
}
if (isDict(encrypt)) {
if (encrypt instanceof Dict) {
const ids = trailerDict.get("ID");
const fileId = ids && ids.length ? ids[0] : "";
// The 'Encrypt' dictionary itself should not be encrypted, and by
@ -113,7 +107,7 @@ class XRef {
}
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
}
if (isDict(root) && root.has("Pages")) {
if (root instanceof Dict && root.has("Pages")) {
this.root = root;
} else {
if (!recoveryMode) {
@ -155,10 +149,10 @@ class XRef {
let dict = parser.getObj();
// The pdflib PDF generator can generate a nested trailer dictionary
if (!isDict(dict) && dict.dict) {
if (!(dict instanceof Dict) && dict.dict) {
dict = dict.dict;
}
if (!isDict(dict)) {
if (!(dict instanceof Dict)) {
throw new FormatError(
"Invalid XRef table: could not parse trailer dictionary"
);
@ -289,19 +283,15 @@ class XRef {
}
readXRefStream(stream) {
let i, j;
const streamState = this.streamState;
stream.pos = streamState.streamPos;
const byteWidths = streamState.byteWidths;
const typeFieldWidth = byteWidths[0];
const offsetFieldWidth = byteWidths[1];
const generationFieldWidth = byteWidths[2];
const [typeFieldWidth, offsetFieldWidth, generationFieldWidth] =
streamState.byteWidths;
const entryRanges = streamState.entryRanges;
while (entryRanges.length > 0) {
const first = entryRanges[0];
const n = entryRanges[1];
const [first, n] = entryRanges;
if (!Number.isInteger(first) || !Number.isInteger(n)) {
throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`);
@ -315,14 +305,14 @@ class XRef {
`Invalid XRef entry fields length: ${first}, ${n}`
);
}
for (i = streamState.entryNum; i < n; ++i) {
for (let i = streamState.entryNum; i < n; ++i) {
streamState.entryNum = i;
streamState.streamPos = stream.pos;
let type = 0,
offset = 0,
generation = 0;
for (j = 0; j < typeFieldWidth; ++j) {
for (let j = 0; j < typeFieldWidth; ++j) {
const typeByte = stream.getByte();
if (typeByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'type'.");
@ -333,14 +323,14 @@ class XRef {
if (typeFieldWidth === 0) {
type = 1;
}
for (j = 0; j < offsetFieldWidth; ++j) {
for (let j = 0; j < offsetFieldWidth; ++j) {
const offsetByte = stream.getByte();
if (offsetByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'offset'.");
}
offset = (offset << 8) | offsetByte;
}
for (j = 0; j < generationFieldWidth; ++j) {
for (let j = 0; j < generationFieldWidth; ++j) {
const generationByte = stream.getByte();
if (generationByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'generation'.");
@ -568,7 +558,7 @@ class XRef {
}
// read the trailer dictionary
const dict = parser.getObj();
if (!isDict(dict)) {
if (!(dict instanceof Dict)) {
continue;
}
// Do some basic validation of the trailer/root dictionary candidate.
@ -660,7 +650,7 @@ class XRef {
if (
!Number.isInteger(parser.getObj()) ||
!isCmd(parser.getObj(), "obj") ||
!isStream((obj = parser.getObj()))
!((obj = parser.getObj()) instanceof BaseStream)
) {
throw new FormatError("Invalid XRef stream");
}
@ -679,7 +669,7 @@ class XRef {
obj = dict.get("Prev");
if (Number.isInteger(obj)) {
this.startXRefQueue.push(obj);
} else if (isRef(obj)) {
} else if (obj instanceof Ref) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num);
@ -744,15 +734,30 @@ class XRef {
this._cacheMap.set(num, xrefEntry);
return xrefEntry;
}
// Prevent circular references, in corrupt PDF documents, from hanging the
// worker-thread. This relies, implicitly, on the parsing being synchronous.
if (this._pendingRefs.has(ref)) {
this._pendingRefs.remove(ref);
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
warn(`Ignoring circular reference: ${ref}.`);
return CIRCULAR_REF;
}
if (isDict(xrefEntry)) {
this._pendingRefs.put(ref);
try {
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
this._pendingRefs.remove(ref);
} catch (ex) {
this._pendingRefs.remove(ref);
throw ex;
}
if (xrefEntry instanceof Dict) {
xrefEntry.objId = ref.toString();
} else if (isStream(xrefEntry)) {
} else if (xrefEntry instanceof BaseStream) {
xrefEntry.dict.objId = ref.toString();
}
return xrefEntry;
@ -794,7 +799,7 @@ class XRef {
} else {
xrefEntry = parser.getObj();
}
if (!isStream(xrefEntry)) {
if (!(xrefEntry instanceof BaseStream)) {
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
@ -812,7 +817,7 @@ class XRef {
fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
const tableOffset = xrefEntry.offset;
const stream = this.fetch(Ref.get(tableOffset, 0));
if (!isStream(stream)) {
if (!(stream instanceof BaseStream)) {
throw new FormatError("bad ObjStm stream");
}
const first = stream.dict.get("First");
@ -863,7 +868,7 @@ class XRef {
const obj = parser.getObj();
entries[i] = obj;
if (isStream(obj)) {
if (obj instanceof BaseStream) {
continue;
}
const num = nums[i],

View File

@ -492,3 +492,5 @@
!xfa_issue14315.pdf
!poppler-67295-0.pdf
!poppler-85140-0.pdf
!poppler-91414-0-53.pdf
!poppler-91414-0-54.pdf

View File

@ -0,0 +1,70 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 6 0 R >>
stream
2
endstream
endobj
7 0 obj
<<>>
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000500 00000 n
trailer
<<
/Size 8
/Root 1 0 R
/Info 7 0 R
>>
startxref
520
%%EOF

View File

@ -0,0 +1,77 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 7 0 R >>
stream
foobar
endstream
endobj
7 0 obj
<< /Length 6 0 R >>
stream
foobar
endstream
endobj
8 0 obj
<<>>
endobj
xref
0 9
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000506 00000 n
0000000566 00000 n
trailer
<<
/Size 9
/Root 1 0 R
/Info 8 0 R
>>
startxref
586
%%EOF

View File

@ -511,6 +511,40 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
it("creates pdf doc from PDF files, with circular references", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-91414-0-53.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-91414-0-54.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
const pageA = await pdfDocument1.getPage(1);
const pageB = await pdfDocument2.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);
expect(pageB instanceof PDFPageProxy).toEqual(true);
for (const opList of [
await pageA.getOperatorList(),
await pageB.getOperatorList(),
]) {
expect(opList.fnArray.length).toBeGreaterThan(5);
expect(opList.argsArray.length).toBeGreaterThan(5);
expect(opList.lastChunk).toEqual(true);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
});
describe("PDFWorker", function () {