diff --git a/src/core/document.js b/src/core/document.js index 9069a0cd7..bac2e281b 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -16,7 +16,7 @@ import { assert, FormatError, getInheritableProperty, info, isArrayBuffer, isBool, isNum, isSpace, isString, MissingDataException, OPS, shadow, stringToBytes, - stringToPDFString, Util, warn + stringToPDFString, Util, warn, XRefEntryException, XRefParseException } from '../shared/util'; import { Catalog, ObjectLoader, XRef } from './obj'; import { Dict, isDict, isName, isStream, Ref } from './primitives'; @@ -649,6 +649,20 @@ var PDFDocument = (function PDFDocumentClosure() { }); }, + checkFirstPage() { + return this.getPage(0).catch((reason) => { + if (reason instanceof XRefEntryException) { + // Clear out the various caches to ensure that we haven't stored any + // inconsistent and/or incorrect state, since that could easily break + // subsequent `this.getPage` calls. + this._pagePromises.length = 0; + this.cleanup(); + + throw new XRefParseException(); + } + }); + }, + cleanup: function PDFDocument_cleanup() { return this.catalog.cleanup(); }, diff --git a/src/core/obj.js b/src/core/obj.js index 700a4ca35..c47d90621 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -17,7 +17,7 @@ import { bytesToString, createPromiseCapability, createValidAbsoluteUrl, FormatError, info, InvalidPDFException, isBool, isNum, isString, MissingDataException, PermissionFlag, shadow, stringToPDFString, stringToUTF8String, - toRomanNumerals, unreachable, warn, XRefParseException + toRomanNumerals, unreachable, warn, XRefEntryException, XRefParseException } from '../shared/util'; import { Dict, isCmd, isDict, isName, isRef, isRefsEqual, isStream, Ref, RefSet, @@ -1473,7 +1473,7 @@ var XRef = (function XRefClosure() { if (xrefEntry.uncompressed) { xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption); } else { - xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption); + xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption); } if (isDict(xrefEntry)) { xrefEntry.objId = ref.toString(); @@ -1483,12 +1483,11 @@ var XRef = (function XRefClosure() { return xrefEntry; }, - fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry, - suppressEncryption) { + fetchUncompressed(ref, xrefEntry, suppressEncryption = false) { var gen = ref.gen; var num = ref.num; if (xrefEntry.gen !== gen) { - throw new FormatError('inconsistent generation in XRef'); + throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`); } var stream = this.stream.makeSubStream(xrefEntry.offset + this.stream.start); @@ -1504,7 +1503,7 @@ var XRef = (function XRefClosure() { obj2 = parseInt(obj2, 10); } if (obj1 !== num || obj2 !== gen || !isCmd(obj3)) { - throw new FormatError('bad XRef entry'); + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); } if (obj3.cmd !== 'obj') { // some bad PDFs use "obj1234" and really mean 1234 @@ -1514,7 +1513,7 @@ var XRef = (function XRefClosure() { return num; } } - throw new FormatError('bad XRef entry'); + throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`); } if (this.encrypt && !suppressEncryption) { xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen)); @@ -1527,8 +1526,7 @@ var XRef = (function XRefClosure() { return xrefEntry; }, - fetchCompressed: function XRef_fetchCompressed(xrefEntry, - suppressEncryption) { + fetchCompressed(ref, xrefEntry, suppressEncryption = false) { var tableOffset = xrefEntry.offset; var stream = this.fetch(new Ref(tableOffset, 0)); if (!isStream(stream)) { @@ -1573,7 +1571,7 @@ var XRef = (function XRefClosure() { } xrefEntry = entries[xrefEntry.gen]; if (xrefEntry === undefined) { - throw new FormatError('bad XRef entry for compressed object'); + throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`); } return xrefEntry; }, diff --git a/src/core/worker.js b/src/core/worker.js index 83496d86f..910d2692f 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -258,33 +258,22 @@ var WorkerMessageHandler = { WorkerTasks.splice(i, 1); } - function loadDocument(recoveryMode) { - var loadDocumentCapability = createPromiseCapability(); + async function loadDocument(recoveryMode) { + await pdfManager.ensureDoc('checkHeader'); + await pdfManager.ensureDoc('parseStartXRef'); + await pdfManager.ensureDoc('parse', [recoveryMode]); - var parseSuccess = function parseSuccess() { - Promise.all([ - pdfManager.ensureDoc('numPages'), - pdfManager.ensureDoc('fingerprint'), - ]).then(function([numPages, fingerprint]) { - loadDocumentCapability.resolve({ - numPages, - fingerprint, - }); - }, parseFailure); - }; + if (!recoveryMode) { + // Check that at least the first page can be successfully loaded, + // since otherwise the XRef table is definitely not valid. + await pdfManager.ensureDoc('checkFirstPage'); + } - var parseFailure = function parseFailure(e) { - loadDocumentCapability.reject(e); - }; - - pdfManager.ensureDoc('checkHeader', []).then(function() { - pdfManager.ensureDoc('parseStartXRef', []).then(function() { - pdfManager.ensureDoc('parse', [recoveryMode]).then( - parseSuccess, parseFailure); - }, parseFailure); - }, parseFailure); - - return loadDocumentCapability.promise; + const [numPages, fingerprint] = await Promise.all([ + pdfManager.ensureDoc('numPages'), + pdfManager.ensureDoc('fingerprint'), + ]); + return { numPages, fingerprint, }; } function getPdfManager(data, evaluatorOptions) { diff --git a/src/shared/util.js b/src/shared/util.js index 48ed88be7..daac2f9c3 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -472,6 +472,18 @@ var MissingDataException = (function MissingDataExceptionClosure() { return MissingDataException; })(); +const XRefEntryException = (function XRefEntryExceptionClosure() { + function XRefEntryException(msg) { + this.message = msg; + } + + XRefEntryException.prototype = new Error(); + XRefEntryException.prototype.name = 'XRefEntryException'; + XRefEntryException.constructor = XRefEntryException; + + return XRefEntryException; +})(); + var XRefParseException = (function XRefParseExceptionClosure() { function XRefParseException(msg) { this.message = msg; @@ -1033,6 +1045,7 @@ export { UnknownErrorException, Util, toRomanNumerals, + XRefEntryException, XRefParseException, FormatError, arrayByteLength, diff --git a/test/pdfs/issue10326.pdf.link b/test/pdfs/issue10326.pdf.link new file mode 100644 index 000000000..43094fad9 --- /dev/null +++ b/test/pdfs/issue10326.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/2643238/test.1.pdf diff --git a/test/pdfs/issue7496.pdf.link b/test/pdfs/issue7496.pdf.link new file mode 100644 index 000000000..3786d9161 --- /dev/null +++ b/test/pdfs/issue7496.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/369694/repro-pdf.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 31000b690..e6f1253bf 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1275,6 +1275,22 @@ "rounds": 1, "type": "eq" }, + { "id": "issue7496", + "file": "pdfs/issue7496.pdf", + "md5": "b422981ae781166e75c0fb4c3634ed96", + "link": true, + "rounds": 1, + "lastPage": 1, + "type": "load" + }, + { "id": "issue10326", + "file": "pdfs/issue10326.pdf", + "md5": "015c13b09ef735ea1204f38992c60487", + "link": true, + "rounds": 1, + "lastPage": 1, + "type": "load" + }, { "id": "issue7544", "file": "pdfs/issue7544.pdf", "md5": "87e3a9fc7d6a6c1bd5b53af6926ce48e",