Abort parsing when the XRef /W-array contain bogus entries (issue 14303)

For this particular PDF document, we have `/W [1 2 166666666666666666666666666]` which obviously makes no sense.

While this patch makes no attempt at actually validating the entries in the /W-array, we'll now simply abort all processing when the end of the PDF document has been reached (thus preventing hanging the browser).
Please note that this patch doesn't enable the PDF document to be loaded/rendered, but at least it fails "correctly" now.

Fixes one of the issues listed in issue 14303, namely the `REDHAT-1531897-0.pdf`document.
This commit is contained in:
Jonas Jenwald 2021-11-25 13:28:24 +01:00
parent a2c380ccb3
commit ca8d2bdce4
4 changed files with 36 additions and 3 deletions

View File

@ -323,17 +323,29 @@ class XRef {
offset = 0,
generation = 0;
for (j = 0; j < typeFieldWidth; ++j) {
type = (type << 8) | stream.getByte();
const typeByte = stream.getByte();
if (typeByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'type'.");
}
type = (type << 8) | typeByte;
}
// if type field is absent, its default value is 1
if (typeFieldWidth === 0) {
type = 1;
}
for (j = 0; j < offsetFieldWidth; ++j) {
offset = (offset << 8) | stream.getByte();
const offsetByte = stream.getByte();
if (offsetByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'offset'.");
}
offset = (offset << 8) | offsetByte;
}
for (j = 0; j < generationFieldWidth; ++j) {
generation = (generation << 8) | stream.getByte();
const generationByte = stream.getByte();
if (generationByte === -1) {
throw new FormatError("Invalid XRef byteWidths 'generation'.");
}
generation = (generation << 8) | generationByte;
}
const entry = {};
entry.offset = offset;

View File

@ -488,3 +488,4 @@
!rc_annotation.pdf
!issue14267.pdf
!PDFBOX-4352-0.pdf
!REDHAT-1531897-0.pdf

Binary file not shown.

View File

@ -458,6 +458,26 @@ describe("api", function () {
await loadingTask.destroy();
});
it("creates pdf doc from PDF file with bad XRef byteWidths", async function () {
// A corrupt PDF file, where the XRef /W-array have (some) bogus entries.
const loadingTask = getDocument(
buildGetDocumentParams("REDHAT-1531897-0.pdf")
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
try {
await loadingTask.promise;
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof InvalidPDFException).toEqual(true);
expect(reason.message).toEqual("Invalid PDF structure.");
}
await loadingTask.destroy();
});
});
describe("PDFWorker", function () {