[api-minor] Let Catalog.getAllPageDicts
return an *empty* dictionary when loading the first /Page fails (issue 15590)
In order to support opening certain corrupt PDF documents, particularly hand-edited ones, this patch adds support for letting the `Catalog.getAllPageDicts` method fallback to returning an *empty* dictionary to replace (only) the first /Page of the document. Given that the viewer cannot initialize/load without access to the first page, this will thus allow e.g. document-level scripting to run as expected. Note that by effectively replacing a corrupt or missing first /Page in this way[1], we'll now render nothing but a *blank* page for certain cases of broken/corrupt PDF documents which may look weird. *Please note:* This functionality is controlled via the existing `stopAtErrors` option, that can be passed to `getDocument`, since it's easy to imagine use-cases where this sort of fallback behaviour isn't desirable. --- [1] Currently we still require that a /Pages-dictionary is found though, however it *may* be possible to relax even that assumption if that becomes absolutely necessary in future corrupt documents.
This commit is contained in:
parent
2516ffa78e
commit
23930a249e
@ -1191,6 +1191,8 @@ class Catalog {
|
||||
* @returns {Promise<Map>}
|
||||
*/
|
||||
async getAllPageDicts(recoveryMode = false) {
|
||||
const { ignoreErrors } = this.pdfManager.evaluatorOptions;
|
||||
|
||||
const queue = [{ currentNode: this.toplevelPagesDict, posInKids: 0 }];
|
||||
const visitedNodes = new RefSet();
|
||||
|
||||
@ -1215,6 +1217,11 @@ class Catalog {
|
||||
if (error instanceof XRefEntryException && !recoveryMode) {
|
||||
throw error;
|
||||
}
|
||||
if (recoveryMode && ignoreErrors && pageIndex === 0) {
|
||||
// Ensure that the viewer will always load (fixes issue15590.pdf).
|
||||
warn(`getAllPageDicts - Skipping invalid first page: "${error}".`);
|
||||
error = Dict.empty;
|
||||
}
|
||||
|
||||
map.set(pageIndex++, [error, null]);
|
||||
}
|
||||
|
@ -602,27 +602,42 @@ describe("api", function () {
|
||||
const loadingTask2 = getDocument(
|
||||
buildGetDocumentParams("poppler-85140-0.pdf")
|
||||
);
|
||||
const loadingTask3 = getDocument(
|
||||
buildGetDocumentParams("poppler-85140-0.pdf", { stopAtErrors: true })
|
||||
);
|
||||
|
||||
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||
|
||||
const pdfDocument1 = await loadingTask1.promise;
|
||||
const pdfDocument2 = await loadingTask2.promise;
|
||||
const pdfDocument3 = await loadingTask3.promise;
|
||||
|
||||
expect(pdfDocument1.numPages).toEqual(1);
|
||||
expect(pdfDocument2.numPages).toEqual(1);
|
||||
expect(pdfDocument3.numPages).toEqual(1);
|
||||
|
||||
const page = await pdfDocument1.getPage(1);
|
||||
expect(page instanceof PDFPageProxy).toEqual(true);
|
||||
const pageA = await pdfDocument1.getPage(1);
|
||||
expect(pageA instanceof PDFPageProxy).toEqual(true);
|
||||
|
||||
const opList = await page.getOperatorList();
|
||||
expect(opList.fnArray.length).toBeGreaterThan(5);
|
||||
expect(opList.argsArray.length).toBeGreaterThan(5);
|
||||
expect(opList.lastChunk).toEqual(true);
|
||||
expect(opList.separateAnnots).toEqual(null);
|
||||
const opListA = await pageA.getOperatorList();
|
||||
expect(opListA.fnArray.length).toBeGreaterThan(5);
|
||||
expect(opListA.argsArray.length).toBeGreaterThan(5);
|
||||
expect(opListA.lastChunk).toEqual(true);
|
||||
expect(opListA.separateAnnots).toEqual(null);
|
||||
|
||||
const pageB = await pdfDocument2.getPage(1);
|
||||
expect(pageB instanceof PDFPageProxy).toEqual(true);
|
||||
|
||||
const opListB = await pageB.getOperatorList();
|
||||
expect(opListB.fnArray.length).toBe(0);
|
||||
expect(opListB.argsArray.length).toBe(0);
|
||||
expect(opListB.lastChunk).toEqual(true);
|
||||
expect(opListB.separateAnnots).toEqual(null);
|
||||
|
||||
try {
|
||||
await pdfDocument2.getPage(1);
|
||||
await pdfDocument3.getPage(1);
|
||||
|
||||
// Shouldn't get here.
|
||||
expect(false).toEqual(true);
|
||||
@ -631,7 +646,11 @@ describe("api", function () {
|
||||
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
|
||||
}
|
||||
|
||||
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
|
||||
await Promise.all([
|
||||
loadingTask1.destroy(),
|
||||
loadingTask2.destroy(),
|
||||
loadingTask3.destroy(),
|
||||
]);
|
||||
});
|
||||
|
||||
it("creates pdf doc from PDF files, with circular references", async function () {
|
||||
@ -751,17 +770,8 @@ describe("api", function () {
|
||||
OpenAction: ["func=function(){app.alert(1)};func();"],
|
||||
});
|
||||
|
||||
try {
|
||||
await pdfDocument.getPage(1);
|
||||
|
||||
// Shouldn't get here.
|
||||
expect(false).toEqual(true);
|
||||
} catch (reason) {
|
||||
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||
expect(reason.message).toEqual(
|
||||
"Page dictionary kids object is not an array."
|
||||
);
|
||||
}
|
||||
const page = await pdfDocument.getPage(1);
|
||||
expect(page instanceof PDFPageProxy).toEqual(true);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user