Fallback to finding the first "obj" occurrence, when the trailer-dictionary is incomplete (issue 15590)
Note that the "trailer"-case is already a fallback, since normally we're able to use the "xref"-operator even in corrupt documents. However, when a "trailer"-operator is found we still expect "startxref" to exist and be usable in order to advance the stream position. When that's not the case, as happens in the referenced issue, we use a simple fallback to find the first "obj" occurrence instead. This *partially* fixes issue 15590, since without this patch we fail to find any objects at all during `XRef.indexObjects`. However, note that the PDF document is still corrupt and won't render since there's no actual /Pages-dictionary and the /Root-entry simply points to the /OpenAction-dictionary instead.
This commit is contained in:
parent
2ae90f9615
commit
2516ffa78e
@ -503,7 +503,7 @@ class XRef {
|
|||||||
// Find the next "obj" string, rather than "endobj", to ensure that
|
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||||
// we won't skip over a new 'obj' operator in corrupt files where
|
// we won't skip over a new 'obj' operator in corrupt files where
|
||||||
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
||||||
while (startPos < buffer.length) {
|
while (startPos < length) {
|
||||||
const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
||||||
contentLength = endPos - position;
|
contentLength = endPos - position;
|
||||||
|
|
||||||
@ -545,7 +545,29 @@ class XRef {
|
|||||||
(token.length === 7 || /\s/.test(token[7]))
|
(token.length === 7 || /\s/.test(token[7]))
|
||||||
) {
|
) {
|
||||||
trailers.push(position);
|
trailers.push(position);
|
||||||
position += skipUntil(buffer, position, startxrefBytes);
|
|
||||||
|
const contentLength = skipUntil(buffer, position, startxrefBytes);
|
||||||
|
// Attempt to handle (some) corrupt documents, where no 'startxref'
|
||||||
|
// operators are present (fixes issue15590.pdf).
|
||||||
|
if (position + contentLength >= length) {
|
||||||
|
const endPos = position + skipUntil(buffer, position, objBytes) + 4;
|
||||||
|
|
||||||
|
const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position);
|
||||||
|
const tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
||||||
|
|
||||||
|
// Find the first "obj" occurrence after the 'trailer' operator.
|
||||||
|
const objToken = nestedObjRegExp.exec(tokenStr);
|
||||||
|
|
||||||
|
if (objToken && objToken[1]) {
|
||||||
|
warn(
|
||||||
|
'indexObjects: Found first "obj" after "trailer", ' +
|
||||||
|
'caused by missing "startxref" -- trying to recover.'
|
||||||
|
);
|
||||||
|
position = endPos - objToken[1].length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
position += contentLength;
|
||||||
} else {
|
} else {
|
||||||
position += token.length + 1;
|
position += token.length + 1;
|
||||||
}
|
}
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -351,6 +351,7 @@
|
|||||||
!issue9534_reduced.pdf
|
!issue9534_reduced.pdf
|
||||||
!attachment.pdf
|
!attachment.pdf
|
||||||
!basicapi.pdf
|
!basicapi.pdf
|
||||||
|
!issue15590.pdf
|
||||||
!issue15594_reduced.pdf
|
!issue15594_reduced.pdf
|
||||||
!issue2884_reduced.pdf
|
!issue2884_reduced.pdf
|
||||||
!mixedfonts.pdf
|
!mixedfonts.pdf
|
||||||
|
23
test/pdfs/issue15590.pdf
Normal file
23
test/pdfs/issue15590.pdf
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
|
||||||
|
trailer
|
||||||
|
<<
|
||||||
|
/Root 1 0 R
|
||||||
|
>>
|
||||||
|
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
/OpenAction 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/S /JavaScript
|
||||||
|
/JS(func=function(){app.alert(1)};func();)
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
%%EOF
|
@ -738,6 +738,33 @@ describe("api", function () {
|
|||||||
|
|
||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("creates pdf doc from PDF file, with incomplete trailer", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue15590.pdf"));
|
||||||
|
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
|
||||||
|
|
||||||
|
const pdfDocument = await loadingTask.promise;
|
||||||
|
expect(pdfDocument.numPages).toEqual(1);
|
||||||
|
|
||||||
|
const jsActions = await pdfDocument.getJSActions();
|
||||||
|
expect(jsActions).toEqual({
|
||||||
|
OpenAction: ["func=function(){app.alert(1)};func();"],
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pdfDocument.getPage(1);
|
||||||
|
|
||||||
|
// Shouldn't get here.
|
||||||
|
expect(false).toEqual(true);
|
||||||
|
} catch (reason) {
|
||||||
|
expect(reason instanceof UnknownErrorException).toEqual(true);
|
||||||
|
expect(reason.message).toEqual(
|
||||||
|
"Page dictionary kids object is not an array."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("PDFWorker", function () {
|
describe("PDFWorker", function () {
|
||||||
|
Loading…
Reference in New Issue
Block a user