Handle errors correctly when data lookup fails during /Pages-tree parsing (issue 14303)

This only applies to severely corrupt documents, where it's possible that the `Parser` throws when we try to access e.g. a /Kids-entry in the /Pages-tree.

Fixes two of the issues listed in issue 14303, namely the `poppler-742-0.pdf...` and `poppler-937-0.pdf...` documents.
This commit is contained in:
Jonas Jenwald 2021-12-01 19:35:02 +01:00
parent 700eaecddd
commit 63be23f05b
5 changed files with 71 additions and 6 deletions

View File

@ -1091,15 +1091,14 @@ class Catalog {
const visitedNodes = new RefSet();
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;
let count,
currentPageIndex = 0;
let currentPageIndex = 0;
function next() {
while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop();
if (isRef(currentNode)) {
count = pageKidsCountCache.get(currentNode);
const count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be.
if (count > 0 && currentPageIndex + count < pageIndex) {
currentPageIndex += count;
@ -1146,7 +1145,14 @@ class Catalog {
return;
}
count = currentNode.get("Count");
let count;
try {
count = currentNode.get("Count");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (Number.isInteger(count) && count >= 0 && !skipCount) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
@ -1161,13 +1167,28 @@ class Catalog {
}
}
const kids = currentNode.get("Kids");
let kids;
try {
kids = currentNode.get("Kids");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
let type;
try {
type = currentNode.get("Type");
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
}
if (
isName(currentNode.get("Type"), "Page") ||
isName(type, "Page") ||
(!currentNode.has("Type") && currentNode.has("Contents"))
) {
if (currentPageIndex === pageIndex) {

View File

@ -494,3 +494,5 @@
!poppler-85140-0.pdf
!poppler-91414-0-53.pdf
!poppler-91414-0-54.pdf
!poppler-742-0-fuzzed.pdf
!poppler-937-0-fuzzed.pdf

Binary file not shown.

Binary file not shown.

View File

@ -545,6 +545,48 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
it("creates pdf doc from PDF files, with bad /Pages tree /Kids entries", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-742-0-fuzzed.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-937-0-fuzzed.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
try {
await pdfDocument1.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kids object is not an array."
);
}
try {
await pdfDocument2.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kids object is not an array."
);
}
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
});
describe("PDFWorker", function () {