From 2516ffa78e829c1cf80b564860e3b47862eb4e42 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 20 Oct 2022 16:24:17 +0200 Subject: [PATCH] Fallback to finding the first "obj" occurrence, when the trailer-dictionary is incomplete (issue 15590) Note that the "trailer"-case is already a fallback, since normally we're able to use the "xref"-operator even in corrupt documents. However, when a "trailer"-operator is found we still expect "startxref" to exist and be usable in order to advance the stream position. When that's not the case, as happens in the referenced issue, we use a simple fallback to find the first "obj" occurrence instead. This *partially* fixes issue 15590, since without this patch we fail to find any objects at all during `XRef.indexObjects`. However, note that the PDF document is still corrupt and won't render since there's no actual /Pages-dictionary and the /Root-entry simply points to the /OpenAction-dictionary instead. --- src/core/xref.js | 26 ++++++++++++++++++++++++-- test/pdfs/.gitignore | 1 + test/pdfs/issue15590.pdf | 23 +++++++++++++++++++++++ test/unit/api_spec.js | 27 +++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 test/pdfs/issue15590.pdf diff --git a/src/core/xref.js b/src/core/xref.js index e273eeeba..0f1e957ea 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -503,7 +503,7 @@ class XRef { // Find the next "obj" string, rather than "endobj", to ensure that // we won't skip over a new 'obj' operator in corrupt files where // 'endobj' operators are missing (fixes issue9105_reduced.pdf). - while (startPos < buffer.length) { + while (startPos < length) { const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4; contentLength = endPos - position; @@ -545,7 +545,29 @@ class XRef { (token.length === 7 || /\s/.test(token[7])) ) { trailers.push(position); - position += skipUntil(buffer, position, startxrefBytes); + + const contentLength = skipUntil(buffer, position, startxrefBytes); + // Attempt to handle (some) corrupt documents, where no 'startxref' + // operators are present (fixes issue15590.pdf). + if (position + contentLength >= length) { + const endPos = position + skipUntil(buffer, position, objBytes) + 4; + + const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position); + const tokenStr = bytesToString(buffer.subarray(checkPos, endPos)); + + // Find the first "obj" occurrence after the 'trailer' operator. + const objToken = nestedObjRegExp.exec(tokenStr); + + if (objToken && objToken[1]) { + warn( + 'indexObjects: Found first "obj" after "trailer", ' + + 'caused by missing "startxref" -- trying to recover.' + ); + position = endPos - objToken[1].length; + continue; + } + } + position += contentLength; } else { position += token.length + 1; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 87d4c0278..c0ef6527d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -351,6 +351,7 @@ !issue9534_reduced.pdf !attachment.pdf !basicapi.pdf +!issue15590.pdf !issue15594_reduced.pdf !issue2884_reduced.pdf !mixedfonts.pdf diff --git a/test/pdfs/issue15590.pdf b/test/pdfs/issue15590.pdf new file mode 100644 index 000000000..7af8ce482 --- /dev/null +++ b/test/pdfs/issue15590.pdf @@ -0,0 +1,23 @@ +%PDF-1.7 + +trailer +<< +/Root 1 0 R +>> + +1 0 obj +<< +/Type /Catalog +/Pages 2 0 R +/OpenAction 2 0 R +>> +endobj + +2 0 obj +<< +/S /JavaScript +/JS(func=function(){app.alert(1)};func();) +>> +endobj + +%%EOF diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index cea08e235..1cb6f10cb 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -738,6 +738,33 @@ describe("api", function () { await loadingTask.destroy(); }); + + it("creates pdf doc from PDF file, with incomplete trailer", async function () { + const loadingTask = getDocument(buildGetDocumentParams("issue15590.pdf")); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument = await loadingTask.promise; + expect(pdfDocument.numPages).toEqual(1); + + const jsActions = await pdfDocument.getJSActions(); + expect(jsActions).toEqual({ + OpenAction: ["func=function(){app.alert(1)};func();"], + }); + + try { + await pdfDocument.getPage(1); + + // Shouldn't get here. + expect(false).toEqual(true); + } catch (reason) { + expect(reason instanceof UnknownErrorException).toEqual(true); + expect(reason.message).toEqual( + "Page dictionary kids object is not an array." + ); + } + + await loadingTask.destroy(); + }); }); describe("PDFWorker", function () {