Merge pull request #14312 from Snuffleupagus/XRef-circular-reference

Prevent circular references in XRef tables from hanging the worker-thread (issue 14303)
This commit is contained in:
Tim van der Meij 2021-11-28 14:07:02 +01:00 committed by GitHub
commit c14552874b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 227 additions and 37 deletions

View File

@ -632,7 +632,7 @@ class Parser {
// Get the length. // Get the length.
let length = dict.get("Length"); let length = dict.get("Length");
if (!Number.isInteger(length)) { if (!Number.isInteger(length)) {
info(`Bad length "${length}" in stream`); info(`Bad length "${length && length.toString()}" in stream.`);
length = 0; length = 0;
} }

View File

@ -16,6 +16,7 @@
import { assert, shadow, unreachable } from "../shared/util.js"; import { assert, shadow, unreachable } from "../shared/util.js";
import { BaseStream } from "./base_stream.js"; import { BaseStream } from "./base_stream.js";
const CIRCULAR_REF = Symbol("CIRCULAR_REF");
const EOF = Symbol("EOF"); const EOF = Symbol("EOF");
const Name = (function NameClosure() { const Name = (function NameClosure() {
@ -422,6 +423,7 @@ function clearPrimitiveCaches() {
} }
export { export {
CIRCULAR_REF,
clearPrimitiveCaches, clearPrimitiveCaches,
Cmd, Cmd,
Dict, Dict,

View File

@ -21,15 +21,7 @@ import {
InvalidPDFException, InvalidPDFException,
warn, warn,
} from "../shared/util.js"; } from "../shared/util.js";
import { import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js";
Cmd,
Dict,
isCmd,
isDict,
isRef,
isStream,
Ref,
} from "./primitives.js";
import { import {
DocStats, DocStats,
MissingDataException, MissingDataException,
@ -38,6 +30,7 @@ import {
XRefParseException, XRefParseException,
} from "./core_utils.js"; } from "./core_utils.js";
import { Lexer, Parser } from "./parser.js"; import { Lexer, Parser } from "./parser.js";
import { BaseStream } from "./base_stream.js";
import { CipherTransformFactory } from "./crypto.js"; import { CipherTransformFactory } from "./crypto.js";
class XRef { class XRef {
@ -47,6 +40,7 @@ class XRef {
this.entries = []; this.entries = [];
this.xrefstms = Object.create(null); this.xrefstms = Object.create(null);
this._cacheMap = new Map(); // Prepare the XRef cache. this._cacheMap = new Map(); // Prepare the XRef cache.
this._pendingRefs = new RefSet();
this.stats = new DocStats(pdfManager.msgHandler); this.stats = new DocStats(pdfManager.msgHandler);
this._newRefNum = null; this._newRefNum = null;
} }
@ -88,7 +82,7 @@ class XRef {
} }
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
} }
if (isDict(encrypt)) { if (encrypt instanceof Dict) {
const ids = trailerDict.get("ID"); const ids = trailerDict.get("ID");
const fileId = ids && ids.length ? ids[0] : ""; const fileId = ids && ids.length ? ids[0] : "";
// The 'Encrypt' dictionary itself should not be encrypted, and by // The 'Encrypt' dictionary itself should not be encrypted, and by
@ -113,7 +107,7 @@ class XRef {
} }
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
} }
if (isDict(root) && root.has("Pages")) { if (root instanceof Dict && root.has("Pages")) {
this.root = root; this.root = root;
} else { } else {
if (!recoveryMode) { if (!recoveryMode) {
@ -155,10 +149,10 @@ class XRef {
let dict = parser.getObj(); let dict = parser.getObj();
// The pdflib PDF generator can generate a nested trailer dictionary // The pdflib PDF generator can generate a nested trailer dictionary
if (!isDict(dict) && dict.dict) { if (!(dict instanceof Dict) && dict.dict) {
dict = dict.dict; dict = dict.dict;
} }
if (!isDict(dict)) { if (!(dict instanceof Dict)) {
throw new FormatError( throw new FormatError(
"Invalid XRef table: could not parse trailer dictionary" "Invalid XRef table: could not parse trailer dictionary"
); );
@ -289,19 +283,15 @@ class XRef {
} }
readXRefStream(stream) { readXRefStream(stream) {
let i, j;
const streamState = this.streamState; const streamState = this.streamState;
stream.pos = streamState.streamPos; stream.pos = streamState.streamPos;
const byteWidths = streamState.byteWidths; const [typeFieldWidth, offsetFieldWidth, generationFieldWidth] =
const typeFieldWidth = byteWidths[0]; streamState.byteWidths;
const offsetFieldWidth = byteWidths[1];
const generationFieldWidth = byteWidths[2];
const entryRanges = streamState.entryRanges; const entryRanges = streamState.entryRanges;
while (entryRanges.length > 0) { while (entryRanges.length > 0) {
const first = entryRanges[0]; const [first, n] = entryRanges;
const n = entryRanges[1];
if (!Number.isInteger(first) || !Number.isInteger(n)) { if (!Number.isInteger(first) || !Number.isInteger(n)) {
throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`); throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`);
@ -315,14 +305,14 @@ class XRef {
`Invalid XRef entry fields length: ${first}, ${n}` `Invalid XRef entry fields length: ${first}, ${n}`
); );
} }
for (i = streamState.entryNum; i < n; ++i) { for (let i = streamState.entryNum; i < n; ++i) {
streamState.entryNum = i; streamState.entryNum = i;
streamState.streamPos = stream.pos; streamState.streamPos = stream.pos;
let type = 0, let type = 0,
offset = 0, offset = 0,
generation = 0; generation = 0;
for (j = 0; j < typeFieldWidth; ++j) { for (let j = 0; j < typeFieldWidth; ++j) {
const typeByte = stream.getByte(); const typeByte = stream.getByte();
if (typeByte === -1) { if (typeByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'type'."); throw new FormatError("Invalid XRef byteWidths 'type'.");
@ -333,14 +323,14 @@ class XRef {
if (typeFieldWidth === 0) { if (typeFieldWidth === 0) {
type = 1; type = 1;
} }
for (j = 0; j < offsetFieldWidth; ++j) { for (let j = 0; j < offsetFieldWidth; ++j) {
const offsetByte = stream.getByte(); const offsetByte = stream.getByte();
if (offsetByte === -1) { if (offsetByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'offset'."); throw new FormatError("Invalid XRef byteWidths 'offset'.");
} }
offset = (offset << 8) | offsetByte; offset = (offset << 8) | offsetByte;
} }
for (j = 0; j < generationFieldWidth; ++j) { for (let j = 0; j < generationFieldWidth; ++j) {
const generationByte = stream.getByte(); const generationByte = stream.getByte();
if (generationByte === -1) { if (generationByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'generation'."); throw new FormatError("Invalid XRef byteWidths 'generation'.");
@ -568,7 +558,7 @@ class XRef {
} }
// read the trailer dictionary // read the trailer dictionary
const dict = parser.getObj(); const dict = parser.getObj();
if (!isDict(dict)) { if (!(dict instanceof Dict)) {
continue; continue;
} }
// Do some basic validation of the trailer/root dictionary candidate. // Do some basic validation of the trailer/root dictionary candidate.
@ -660,7 +650,7 @@ class XRef {
if ( if (
!Number.isInteger(parser.getObj()) || !Number.isInteger(parser.getObj()) ||
!isCmd(parser.getObj(), "obj") || !isCmd(parser.getObj(), "obj") ||
!isStream((obj = parser.getObj())) !((obj = parser.getObj()) instanceof BaseStream)
) { ) {
throw new FormatError("Invalid XRef stream"); throw new FormatError("Invalid XRef stream");
} }
@ -679,7 +669,7 @@ class XRef {
obj = dict.get("Prev"); obj = dict.get("Prev");
if (Number.isInteger(obj)) { if (Number.isInteger(obj)) {
this.startXRefQueue.push(obj); this.startXRefQueue.push(obj);
} else if (isRef(obj)) { } else if (obj instanceof Ref) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN" // The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R" // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num); this.startXRefQueue.push(obj.num);
@ -744,15 +734,30 @@ class XRef {
this._cacheMap.set(num, xrefEntry); this._cacheMap.set(num, xrefEntry);
return xrefEntry; return xrefEntry;
} }
// Prevent circular references, in corrupt PDF documents, from hanging the
// worker-thread. This relies, implicitly, on the parsing being synchronous.
if (this._pendingRefs.has(ref)) {
this._pendingRefs.remove(ref);
if (xrefEntry.uncompressed) { warn(`Ignoring circular reference: ${ref}.`);
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); return CIRCULAR_REF;
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
} }
if (isDict(xrefEntry)) { this._pendingRefs.put(ref);
try {
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
this._pendingRefs.remove(ref);
} catch (ex) {
this._pendingRefs.remove(ref);
throw ex;
}
if (xrefEntry instanceof Dict) {
xrefEntry.objId = ref.toString(); xrefEntry.objId = ref.toString();
} else if (isStream(xrefEntry)) { } else if (xrefEntry instanceof BaseStream) {
xrefEntry.dict.objId = ref.toString(); xrefEntry.dict.objId = ref.toString();
} }
return xrefEntry; return xrefEntry;
@ -794,7 +799,7 @@ class XRef {
} else { } else {
xrefEntry = parser.getObj(); xrefEntry = parser.getObj();
} }
if (!isStream(xrefEntry)) { if (!(xrefEntry instanceof BaseStream)) {
if ( if (
typeof PDFJSDev === "undefined" || typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING") PDFJSDev.test("!PRODUCTION || TESTING")
@ -812,7 +817,7 @@ class XRef {
fetchCompressed(ref, xrefEntry, suppressEncryption = false) { fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
const tableOffset = xrefEntry.offset; const tableOffset = xrefEntry.offset;
const stream = this.fetch(Ref.get(tableOffset, 0)); const stream = this.fetch(Ref.get(tableOffset, 0));
if (!isStream(stream)) { if (!(stream instanceof BaseStream)) {
throw new FormatError("bad ObjStm stream"); throw new FormatError("bad ObjStm stream");
} }
const first = stream.dict.get("First"); const first = stream.dict.get("First");
@ -863,7 +868,7 @@ class XRef {
const obj = parser.getObj(); const obj = parser.getObj();
entries[i] = obj; entries[i] = obj;
if (isStream(obj)) { if (obj instanceof BaseStream) {
continue; continue;
} }
const num = nums[i], const num = nums[i],

View File

@ -492,3 +492,5 @@
!xfa_issue14315.pdf !xfa_issue14315.pdf
!poppler-67295-0.pdf !poppler-67295-0.pdf
!poppler-85140-0.pdf !poppler-85140-0.pdf
!poppler-91414-0-53.pdf
!poppler-91414-0-54.pdf

View File

@ -0,0 +1,70 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 6 0 R >>
stream
2
endstream
endobj
7 0 obj
<<>>
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000500 00000 n
trailer
<<
/Size 8
/Root 1 0 R
/Info 7 0 R
>>
startxref
520
%%EOF

View File

@ -0,0 +1,77 @@
%PDF-1.5
%€€€€
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Count 6 0 R
/Kids [3 0 R]
/Type /Pages
>>
endobj
3 0 obj
<<
/Resources <<
/Font <<
/F1 5 0 R
>>
>>
/MediaBox [0 0 795 842]
/Parent 2 0 R
/Contents 4 0 R
/Type /Page
>>
endobj
4 0 obj
<< /Length 43 >>
stream
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
endstream
endobj
5 0 obj
<<
/Name /F1
/BaseFont /Helvetica
/Type /Font
/Subtype /Type1
>>
endobj
6 0 obj
<< /Length 7 0 R >>
stream
foobar
endstream
endobj
7 0 obj
<< /Length 6 0 R >>
stream
foobar
endstream
endobj
8 0 obj
<<>>
endobj
xref
0 9
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000130 00000 n
0000000269 00000 n
0000000362 00000 n
0000000446 00000 n
0000000506 00000 n
0000000566 00000 n
trailer
<<
/Size 9
/Root 1 0 R
/Info 8 0 R
>>
startxref
586
%%EOF

View File

@ -511,6 +511,40 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
}); });
it("creates pdf doc from PDF files, with circular references", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-91414-0-53.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-91414-0-54.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
const pageA = await pdfDocument1.getPage(1);
const pageB = await pdfDocument2.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);
expect(pageB instanceof PDFPageProxy).toEqual(true);
for (const opList of [
await pageA.getOperatorList(),
await pageB.getOperatorList(),
]) {
expect(opList.fnArray.length).toBeGreaterThan(5);
expect(opList.argsArray.length).toBeGreaterThan(5);
expect(opList.lastChunk).toEqual(true);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
}); });
describe("PDFWorker", function () { describe("PDFWorker", function () {