Fallback to finding the first "obj" occurrence, when the trailer-dictionary is incomplete (issue 15590)

Note that the "trailer"-case is already a fallback, since normally we're able to use the "xref"-operator even in corrupt documents. However, when a "trailer"-operator is found we still expect "startxref" to exist and be usable in order to advance the stream position. When that's not the case, as happens in the referenced issue, we use a simple fallback to find the first "obj" occurrence instead.

This *partially* fixes issue 15590, since without this patch we fail to find any objects at all during `XRef.indexObjects`. However, note that the PDF document is still corrupt and won't render since there's no actual /Pages-dictionary and the /Root-entry simply points to the /OpenAction-dictionary instead.
This commit is contained in:
Jonas Jenwald 2022-10-20 16:24:17 +02:00
parent 2ae90f9615
commit 2516ffa78e
4 changed files with 75 additions and 2 deletions

View File

@ -503,7 +503,7 @@ class XRef {
// Find the next "obj" string, rather than "endobj", to ensure that
// we won't skip over a new 'obj' operator in corrupt files where
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
while (startPos < buffer.length) {
while (startPos < length) {
const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
contentLength = endPos - position;
@ -545,7 +545,29 @@ class XRef {
(token.length === 7 || /\s/.test(token[7]))
) {
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
const contentLength = skipUntil(buffer, position, startxrefBytes);
// Attempt to handle (some) corrupt documents, where no 'startxref'
// operators are present (fixes issue15590.pdf).
if (position + contentLength >= length) {
const endPos = position + skipUntil(buffer, position, objBytes) + 4;
const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position);
const tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
// Find the first "obj" occurrence after the 'trailer' operator.
const objToken = nestedObjRegExp.exec(tokenStr);
if (objToken && objToken[1]) {
warn(
'indexObjects: Found first "obj" after "trailer", ' +
'caused by missing "startxref" -- trying to recover.'
);
position = endPos - objToken[1].length;
continue;
}
}
position += contentLength;
} else {
position += token.length + 1;
}

View File

@ -351,6 +351,7 @@
!issue9534_reduced.pdf
!attachment.pdf
!basicapi.pdf
!issue15590.pdf
!issue15594_reduced.pdf
!issue2884_reduced.pdf
!mixedfonts.pdf

23
test/pdfs/issue15590.pdf Normal file
View File

@ -0,0 +1,23 @@
%PDF-1.7
trailer
<<
/Root 1 0 R
>>
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
/OpenAction 2 0 R
>>
endobj
2 0 obj
<<
/S /JavaScript
/JS(func=function(){app.alert(1)};func();)
>>
endobj
%%EOF

View File

@ -738,6 +738,33 @@ describe("api", function () {
await loadingTask.destroy();
});
it("creates pdf doc from PDF file, with incomplete trailer", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue15590.pdf"));
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
expect(pdfDocument.numPages).toEqual(1);
const jsActions = await pdfDocument.getJSActions();
expect(jsActions).toEqual({
OpenAction: ["func=function(){app.alert(1)};func();"],
});
try {
await pdfDocument.getPage(1);
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kids object is not an array."
);
}
await loadingTask.destroy();
});
});
describe("PDFWorker", function () {