diff --git a/src/core/xref.js b/src/core/xref.js index e273eeeba..0f1e957ea 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -503,7 +503,7 @@ class XRef { // Find the next "obj" string, rather than "endobj", to ensure that // we won't skip over a new 'obj' operator in corrupt files where // 'endobj' operators are missing (fixes issue9105_reduced.pdf). - while (startPos < buffer.length) { + while (startPos < length) { const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; contentLength = endPos - position; @@ -545,7 +545,29 @@ class XRef { (token.length === 7 || /\s/.test(token[7])) ) { trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); + + const contentLength = skipUntil(buffer, position, startxrefBytes); + // Attempt to handle (some) corrupt documents, where no 'startxref' + // operators are present (fixes issue15590.pdf). + if (position + contentLength >= length) { + const endPos = position + skipUntil(buffer, position, objBytes) + 4; + + const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position); + const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); + + // Find the first "obj" occurrence after the 'trailer' operator. + const objToken = nestedObjRegExp.exec(tokenStr); + + if (objToken && objToken[1]) { + warn( + 'indexObjects: Found first "obj" after "trailer", ' + + 'caused by missing "startxref" -- trying to recover.' + ); + position = endPos - objToken[1].length; + continue; + } + } + position += contentLength; } else { position += token.length + 1; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 87d4c0278..c0ef6527d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -351,6 +351,7 @@ !issue9534_reduced.pdf !attachment.pdf !basicapi.pdf +!issue15590.pdf !issue15594_reduced.pdf !issue2884_reduced.pdf !mixedfonts.pdf diff --git a/test/pdfs/issue15590.pdf b/test/pdfs/issue15590.pdf new file mode 100644 index 000000000..7af8ce482 --- /dev/null +++ b/test/pdfs/issue15590.pdf @@ -0,0 +1,23 @@ +%PDF-1.7 + +trailer +<< +/Root 1 0 R +>> + +1 0 obj +<< +/Type /Catalog +/Pages 2 0 R +/OpenAction 2 0 R +>> +endobj + +2 0 obj +<< +/S /JavaScript +/JS(func=function(){app.alert(1)};func();) +>> +endobj + +%%EOF diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index cea08e235..1cb6f10cb 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -738,6 +738,33 @@ describe("api", function () { await loadingTask.destroy(); }); + + it("creates pdf doc from PDF file, with incomplete trailer", async function () { + const loadingTask = getDocument(buildGetDocumentParams("issue15590.pdf")); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument = await loadingTask.promise; + expect(pdfDocument.numPages).toEqual(1); + + const jsActions = await pdfDocument.getJSActions(); + expect(jsActions).toEqual({ + OpenAction: ["func=function(){app.alert(1)};func();"], + }); + + try { + await pdfDocument.getPage(1); + + // Shouldn't get here. + expect(false).toEqual(true); + } catch (reason) { + expect(reason instanceof UnknownErrorException).toEqual(true); + expect(reason.message).toEqual( + "Page dictionary kids object is not an array." + ); + } + + await loadingTask.destroy(); + }); }); describe("PDFWorker", function () {