Merge pull request #14312 from Snuffleupagus/XRef-circular-reference
Prevent circular references in XRef tables from hanging the worker-thread (issue 14303)
This commit is contained in:
commit
c14552874b
@ -632,7 +632,7 @@ class Parser {
|
||||
// Get the length.
|
||||
let length = dict.get("Length");
|
||||
if (!Number.isInteger(length)) {
|
||||
info(`Bad length "${length}" in stream`);
|
||||
info(`Bad length "${length && length.toString()}" in stream.`);
|
||||
length = 0;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
import { assert, shadow, unreachable } from "../shared/util.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
|
||||
const CIRCULAR_REF = Symbol("CIRCULAR_REF");
|
||||
const EOF = Symbol("EOF");
|
||||
|
||||
const Name = (function NameClosure() {
|
||||
@ -422,6 +423,7 @@ function clearPrimitiveCaches() {
|
||||
}
|
||||
|
||||
export {
|
||||
CIRCULAR_REF,
|
||||
clearPrimitiveCaches,
|
||||
Cmd,
|
||||
Dict,
|
||||
|
@ -21,15 +21,7 @@ import {
|
||||
InvalidPDFException,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import {
|
||||
Cmd,
|
||||
Dict,
|
||||
isCmd,
|
||||
isDict,
|
||||
isRef,
|
||||
isStream,
|
||||
Ref,
|
||||
} from "./primitives.js";
|
||||
import { CIRCULAR_REF, Cmd, Dict, isCmd, Ref, RefSet } from "./primitives.js";
|
||||
import {
|
||||
DocStats,
|
||||
MissingDataException,
|
||||
@ -38,6 +30,7 @@ import {
|
||||
XRefParseException,
|
||||
} from "./core_utils.js";
|
||||
import { Lexer, Parser } from "./parser.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { CipherTransformFactory } from "./crypto.js";
|
||||
|
||||
class XRef {
|
||||
@ -47,6 +40,7 @@ class XRef {
|
||||
this.entries = [];
|
||||
this.xrefstms = Object.create(null);
|
||||
this._cacheMap = new Map(); // Prepare the XRef cache.
|
||||
this._pendingRefs = new RefSet();
|
||||
this.stats = new DocStats(pdfManager.msgHandler);
|
||||
this._newRefNum = null;
|
||||
}
|
||||
@ -88,7 +82,7 @@ class XRef {
|
||||
}
|
||||
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
|
||||
}
|
||||
if (isDict(encrypt)) {
|
||||
if (encrypt instanceof Dict) {
|
||||
const ids = trailerDict.get("ID");
|
||||
const fileId = ids && ids.length ? ids[0] : "";
|
||||
// The 'Encrypt' dictionary itself should not be encrypted, and by
|
||||
@ -113,7 +107,7 @@ class XRef {
|
||||
}
|
||||
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
|
||||
}
|
||||
if (isDict(root) && root.has("Pages")) {
|
||||
if (root instanceof Dict && root.has("Pages")) {
|
||||
this.root = root;
|
||||
} else {
|
||||
if (!recoveryMode) {
|
||||
@ -155,10 +149,10 @@ class XRef {
|
||||
let dict = parser.getObj();
|
||||
|
||||
// The pdflib PDF generator can generate a nested trailer dictionary
|
||||
if (!isDict(dict) && dict.dict) {
|
||||
if (!(dict instanceof Dict) && dict.dict) {
|
||||
dict = dict.dict;
|
||||
}
|
||||
if (!isDict(dict)) {
|
||||
if (!(dict instanceof Dict)) {
|
||||
throw new FormatError(
|
||||
"Invalid XRef table: could not parse trailer dictionary"
|
||||
);
|
||||
@ -289,19 +283,15 @@ class XRef {
|
||||
}
|
||||
|
||||
readXRefStream(stream) {
|
||||
let i, j;
|
||||
const streamState = this.streamState;
|
||||
stream.pos = streamState.streamPos;
|
||||
|
||||
const byteWidths = streamState.byteWidths;
|
||||
const typeFieldWidth = byteWidths[0];
|
||||
const offsetFieldWidth = byteWidths[1];
|
||||
const generationFieldWidth = byteWidths[2];
|
||||
const [typeFieldWidth, offsetFieldWidth, generationFieldWidth] =
|
||||
streamState.byteWidths;
|
||||
|
||||
const entryRanges = streamState.entryRanges;
|
||||
while (entryRanges.length > 0) {
|
||||
const first = entryRanges[0];
|
||||
const n = entryRanges[1];
|
||||
const [first, n] = entryRanges;
|
||||
|
||||
if (!Number.isInteger(first) || !Number.isInteger(n)) {
|
||||
throw new FormatError(`Invalid XRef range fields: ${first}, ${n}`);
|
||||
@ -315,14 +305,14 @@ class XRef {
|
||||
`Invalid XRef entry fields length: ${first}, ${n}`
|
||||
);
|
||||
}
|
||||
for (i = streamState.entryNum; i < n; ++i) {
|
||||
for (let i = streamState.entryNum; i < n; ++i) {
|
||||
streamState.entryNum = i;
|
||||
streamState.streamPos = stream.pos;
|
||||
|
||||
let type = 0,
|
||||
offset = 0,
|
||||
generation = 0;
|
||||
for (j = 0; j < typeFieldWidth; ++j) {
|
||||
for (let j = 0; j < typeFieldWidth; ++j) {
|
||||
const typeByte = stream.getByte();
|
||||
if (typeByte === -1) {
|
||||
throw new FormatError("Invalid XRef byteWidths 'type'.");
|
||||
@ -333,14 +323,14 @@ class XRef {
|
||||
if (typeFieldWidth === 0) {
|
||||
type = 1;
|
||||
}
|
||||
for (j = 0; j < offsetFieldWidth; ++j) {
|
||||
for (let j = 0; j < offsetFieldWidth; ++j) {
|
||||
const offsetByte = stream.getByte();
|
||||
if (offsetByte === -1) {
|
||||
throw new FormatError("Invalid XRef byteWidths 'offset'.");
|
||||
}
|
||||
offset = (offset << 8) | offsetByte;
|
||||
}
|
||||
for (j = 0; j < generationFieldWidth; ++j) {
|
||||
for (let j = 0; j < generationFieldWidth; ++j) {
|
||||
const generationByte = stream.getByte();
|
||||
if (generationByte === -1) {
|
||||
throw new FormatError("Invalid XRef byteWidths 'generation'.");
|
||||
@ -568,7 +558,7 @@ class XRef {
|
||||
}
|
||||
// read the trailer dictionary
|
||||
const dict = parser.getObj();
|
||||
if (!isDict(dict)) {
|
||||
if (!(dict instanceof Dict)) {
|
||||
continue;
|
||||
}
|
||||
// Do some basic validation of the trailer/root dictionary candidate.
|
||||
@ -660,7 +650,7 @@ class XRef {
|
||||
if (
|
||||
!Number.isInteger(parser.getObj()) ||
|
||||
!isCmd(parser.getObj(), "obj") ||
|
||||
!isStream((obj = parser.getObj()))
|
||||
!((obj = parser.getObj()) instanceof BaseStream)
|
||||
) {
|
||||
throw new FormatError("Invalid XRef stream");
|
||||
}
|
||||
@ -679,7 +669,7 @@ class XRef {
|
||||
obj = dict.get("Prev");
|
||||
if (Number.isInteger(obj)) {
|
||||
this.startXRefQueue.push(obj);
|
||||
} else if (isRef(obj)) {
|
||||
} else if (obj instanceof Ref) {
|
||||
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
|
||||
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
|
||||
this.startXRefQueue.push(obj.num);
|
||||
@ -744,15 +734,30 @@ class XRef {
|
||||
this._cacheMap.set(num, xrefEntry);
|
||||
return xrefEntry;
|
||||
}
|
||||
// Prevent circular references, in corrupt PDF documents, from hanging the
|
||||
// worker-thread. This relies, implicitly, on the parsing being synchronous.
|
||||
if (this._pendingRefs.has(ref)) {
|
||||
this._pendingRefs.remove(ref);
|
||||
|
||||
if (xrefEntry.uncompressed) {
|
||||
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
|
||||
} else {
|
||||
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
|
||||
warn(`Ignoring circular reference: ${ref}.`);
|
||||
return CIRCULAR_REF;
|
||||
}
|
||||
if (isDict(xrefEntry)) {
|
||||
this._pendingRefs.put(ref);
|
||||
|
||||
try {
|
||||
if (xrefEntry.uncompressed) {
|
||||
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
|
||||
} else {
|
||||
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
|
||||
}
|
||||
this._pendingRefs.remove(ref);
|
||||
} catch (ex) {
|
||||
this._pendingRefs.remove(ref);
|
||||
throw ex;
|
||||
}
|
||||
if (xrefEntry instanceof Dict) {
|
||||
xrefEntry.objId = ref.toString();
|
||||
} else if (isStream(xrefEntry)) {
|
||||
} else if (xrefEntry instanceof BaseStream) {
|
||||
xrefEntry.dict.objId = ref.toString();
|
||||
}
|
||||
return xrefEntry;
|
||||
@ -794,7 +799,7 @@ class XRef {
|
||||
} else {
|
||||
xrefEntry = parser.getObj();
|
||||
}
|
||||
if (!isStream(xrefEntry)) {
|
||||
if (!(xrefEntry instanceof BaseStream)) {
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("!PRODUCTION || TESTING")
|
||||
@ -812,7 +817,7 @@ class XRef {
|
||||
fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
|
||||
const tableOffset = xrefEntry.offset;
|
||||
const stream = this.fetch(Ref.get(tableOffset, 0));
|
||||
if (!isStream(stream)) {
|
||||
if (!(stream instanceof BaseStream)) {
|
||||
throw new FormatError("bad ObjStm stream");
|
||||
}
|
||||
const first = stream.dict.get("First");
|
||||
@ -863,7 +868,7 @@ class XRef {
|
||||
|
||||
const obj = parser.getObj();
|
||||
entries[i] = obj;
|
||||
if (isStream(obj)) {
|
||||
if (obj instanceof BaseStream) {
|
||||
continue;
|
||||
}
|
||||
const num = nums[i],
|
||||
|
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -492,3 +492,5 @@
|
||||
!xfa_issue14315.pdf
|
||||
!poppler-67295-0.pdf
|
||||
!poppler-85140-0.pdf
|
||||
!poppler-91414-0-53.pdf
|
||||
!poppler-91414-0-54.pdf
|
||||
|
70
test/pdfs/poppler-91414-0-53.pdf
Normal file
70
test/pdfs/poppler-91414-0-53.pdf
Normal file
@ -0,0 +1,70 @@
|
||||
%PDF-1.5
|
||||
%€€€€
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Count 6 0 R
|
||||
/Kids [3 0 R]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
/MediaBox [0 0 795 842]
|
||||
/Parent 2 0 R
|
||||
/Contents 4 0 R
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<< /Length 43 >>
|
||||
stream
|
||||
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
|
||||
endstream
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Name /F1
|
||||
/BaseFont /Helvetica
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<< /Length 6 0 R >>
|
||||
stream
|
||||
2
|
||||
endstream
|
||||
endobj
|
||||
7 0 obj
|
||||
<<>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000066 00000 n
|
||||
0000000130 00000 n
|
||||
0000000269 00000 n
|
||||
0000000362 00000 n
|
||||
0000000446 00000 n
|
||||
0000000500 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 8
|
||||
/Root 1 0 R
|
||||
/Info 7 0 R
|
||||
>>
|
||||
startxref
|
||||
520
|
||||
%%EOF
|
77
test/pdfs/poppler-91414-0-54.pdf
Normal file
77
test/pdfs/poppler-91414-0-54.pdf
Normal file
@ -0,0 +1,77 @@
|
||||
%PDF-1.5
|
||||
%€€€€
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Count 6 0 R
|
||||
/Kids [3 0 R]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
/MediaBox [0 0 795 842]
|
||||
/Parent 2 0 R
|
||||
/Contents 4 0 R
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<< /Length 43 >>
|
||||
stream
|
||||
BT 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET
|
||||
endstream
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/Name /F1
|
||||
/BaseFont /Helvetica
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<< /Length 7 0 R >>
|
||||
stream
|
||||
foobar
|
||||
endstream
|
||||
endobj
|
||||
7 0 obj
|
||||
<< /Length 6 0 R >>
|
||||
stream
|
||||
foobar
|
||||
endstream
|
||||
endobj
|
||||
8 0 obj
|
||||
<<>>
|
||||
endobj
|
||||
xref
|
||||
0 9
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000066 00000 n
|
||||
0000000130 00000 n
|
||||
0000000269 00000 n
|
||||
0000000362 00000 n
|
||||
0000000446 00000 n
|
||||
0000000506 00000 n
|
||||
0000000566 00000 n
|
||||
trailer
|
||||
<<
|
||||
/Size 9
|
||||
/Root 1 0 R
|
||||
/Info 8 0 R
|
||||
>>
|
||||
startxref
|
||||
586
|
||||
%%EOF
|
@ -511,6 +511,40 @@ describe("api", function () {
|
||||
|
||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||
});
|
||||
|
||||
it("creates pdf doc from PDF files, with circular references", async function () {
|
||||
const loadingTask1 = getDocument(
|
||||
buildGetDocumentParams("poppler-91414-0-53.pdf")
|
||||
);
|
||||
const loadingTask2 = getDocument(
|
||||
buildGetDocumentParams("poppler-91414-0-54.pdf")
|
||||
);
|
||||
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
|
||||
const pdfDocument1 = await loadingTask1.promise;
|
||||
const pdfDocument2 = await loadingTask2.promise;
|
||||
|
||||
expect(pdfDocument1.numPages).toEqual(1);
|
||||
expect(pdfDocument2.numPages).toEqual(1);
|
||||
|
||||
const pageA = await pdfDocument1.getPage(1);
|
||||
const pageB = await pdfDocument2.getPage(1);
|
||||
|
||||
expect(pageA instanceof PDFPageProxy).toEqual(true);
|
||||
expect(pageB instanceof PDFPageProxy).toEqual(true);
|
||||
|
||||
for (const opList of [
|
||||
await pageA.getOperatorList(),
|
||||
await pageB.getOperatorList(),
|
||||
]) {
|
||||
expect(opList.fnArray.length).toBeGreaterThan(5);
|
||||
expect(opList.argsArray.length).toBeGreaterThan(5);
|
||||
expect(opList.lastChunk).toEqual(true);
|
||||
}
|
||||
|
||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("PDFWorker", function () {
|
||||
|
Loading…
x
Reference in New Issue
Block a user