Merge pull request #14338 from Snuffleupagus/XRef-more-Pages-validation

[api-minor] Clear all caches in `XRef.indexObjects`, and improve /Root dictionary validation in `XRef.parse` (issue 14303)
This commit is contained in:
Tim van der Meij 2021-12-04 13:23:40 +01:00 committed by GitHub
commit 335c4c8a43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 43 deletions

View File

@ -107,14 +107,26 @@ class XRef {
} }
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
} }
if (root instanceof Dict && root.has("Pages")) { if (root instanceof Dict) {
this.root = root; try {
} else { const pages = root.get("Pages");
if (!recoveryMode) { if (pages instanceof Dict) {
throw new XRefParseException(); this.root = root;
return;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Pages" reference: "${ex}".`);
} }
throw new FormatError("Invalid root reference");
} }
if (!recoveryMode) {
throw new XRefParseException();
}
// Even recovery failed, there's nothing more we can do here.
throw new InvalidPDFException("Invalid Root reference.");
} }
processXRefTable(parser) { processXRefTable(parser) {
@ -417,6 +429,7 @@ class XRef {
// Clear out any existing entries, since they may be bogus. // Clear out any existing entries, since they may be bogus.
this.entries.length = 0; this.entries.length = 0;
this._cacheMap.clear();
const stream = this.stream; const stream = this.stream;
stream.pos = 0; stream.pos = 0;

View File

@ -445,7 +445,7 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
}); });
it("creates pdf doc from PDF file with bad XRef table", async function () { it("creates pdf doc from PDF file with bad XRef entry", async function () {
// A corrupt PDF file, where the XRef table have (some) bogus entries. // A corrupt PDF file, where the XRef table have (some) bogus entries.
const loadingTask = getDocument( const loadingTask = getDocument(
buildGetDocumentParams("PDFBOX-4352-0.pdf", { buildGetDocumentParams("PDFBOX-4352-0.pdf", {
@ -468,6 +468,26 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("creates pdf doc from PDF file with bad XRef header", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
expect(pdfDocument.numPages).toEqual(1);
const page = await pdfDocument.getPage(1);
expect(page instanceof PDFPageProxy).toEqual(true);
const opList = await page.getOperatorList();
expect(opList.fnArray.length).toEqual(0);
expect(opList.argsArray.length).toEqual(0);
expect(opList.lastChunk).toEqual(true);
await loadingTask.destroy();
});
it("creates pdf doc from PDF file with bad XRef byteWidths", async function () { it("creates pdf doc from PDF file with bad XRef byteWidths", async function () {
// A corrupt PDF file, where the XRef /W-array have (some) bogus entries. // A corrupt PDF file, where the XRef /W-array have (some) bogus entries.
const loadingTask = getDocument( const loadingTask = getDocument(
@ -488,6 +508,25 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("creates pdf doc from PDF file with inaccessible /Pages tree", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
try {
await loadingTask.promise;
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof InvalidPDFException).toEqual(true);
expect(reason.message).toEqual("Invalid Root reference.");
}
await loadingTask.destroy();
});
it("creates pdf doc from PDF files, with bad /Pages tree /Count", async function () { it("creates pdf doc from PDF files, with bad /Pages tree /Count", async function () {
const loadingTask1 = getDocument( const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-67295-0.pdf") buildGetDocumentParams("poppler-67295-0.pdf")
@ -495,30 +534,23 @@ describe("api", function () {
const loadingTask2 = getDocument( const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-85140-0.pdf") buildGetDocumentParams("poppler-85140-0.pdf")
); );
const loadingTask3 = getDocument(
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
);
const loadingTask4 = getDocument(
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask4 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise; const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise; const pdfDocument2 = await loadingTask2.promise;
const pdfDocument3 = await loadingTask3.promise;
const pdfDocument4 = await loadingTask4.promise;
expect(pdfDocument1.numPages).toEqual(1); expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1); expect(pdfDocument2.numPages).toEqual(1);
expect(pdfDocument3.numPages).toEqual(1);
expect(pdfDocument4.numPages).toEqual(1);
const pageA = await pdfDocument1.getPage(1); const page = await pdfDocument1.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true); expect(page instanceof PDFPageProxy).toEqual(true);
const opList = await page.getOperatorList();
expect(opList.fnArray.length).toBeGreaterThan(5);
expect(opList.argsArray.length).toBeGreaterThan(5);
expect(opList.lastChunk).toEqual(true);
try { try {
await pdfDocument2.getPage(1); await pdfDocument2.getPage(1);
@ -529,28 +561,6 @@ describe("api", function () {
expect(reason instanceof UnknownErrorException).toEqual(true); expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R"); expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
} }
try {
await pdfDocument3.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}
try {
await pdfDocument4.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
}); });