Ensure that ChunkedStream won't attempt to request data *beyond* the document size (issue 14303)

This bug was surprisingly difficult to track down, since it didn't just depend on range-requests being used but also on how quickly the document was loaded. To even be able to reproduce this locally, I had to use a very small `rangeChunkSize`-value (note the unit-test).

The cause of this bug is a bogus entry in the XRef-table, causing us to attempt to request data from *beyond* the actual document size and thus getting into an infinite loop.

Fixes *one* of the issues listed in issue 14303, namely the `PDFBOX-4352-0.pdf` document.
This commit is contained in:
Jonas Jenwald 2021-11-24 18:55:28 +01:00
parent 5e2aec7dd7
commit ae4f1ae3e7
4 changed files with 28 additions and 4 deletions

View File

@ -107,6 +107,9 @@ class ChunkedStream extends Stream {
}
const chunk = Math.floor(pos / this.chunkSize);
if (chunk > this.numChunks) {
return;
}
if (chunk === this.lastSuccessfulEnsureByteChunk) {
return;
}
@ -125,9 +128,14 @@ class ChunkedStream extends Stream {
return;
}
const chunkSize = this.chunkSize;
const beginChunk = Math.floor(begin / chunkSize);
const endChunk = Math.floor((end - 1) / chunkSize) + 1;
const beginChunk = Math.floor(begin / this.chunkSize);
if (beginChunk > this.numChunks) {
return;
}
const endChunk = Math.min(
Math.floor((end - 1) / this.chunkSize) + 1,
this.numChunks
);
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!this._loadedChunks.has(chunk)) {
throw new MissingDataException(begin, end);

View File

@ -486,4 +486,5 @@
!pr12828.pdf
!secHandler.pdf
!rc_annotation.pdf
!issue14267.pdf
!issue14267.pdf
!PDFBOX-4352-0.pdf

BIN
test/pdfs/PDFBOX-4352-0.pdf Normal file

Binary file not shown.

View File

@ -443,6 +443,21 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
it("creates pdf doc from PDF file with bad XRef table", async function () {
// A corrupt PDF file, where the XRef table have (some) bogus entries.
const loadingTask = getDocument(
buildGetDocumentParams("PDFBOX-4352-0.pdf", {
rangeChunkSize: 100,
})
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
expect(pdfDocument.numPages).toEqual(1);
await loadingTask.destroy();
});
});
describe("PDFWorker", function () {