Merge pull request #10392 from Snuffleupagus/checkFirstPage

Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326)
This commit is contained in:
Tim van der Meij 2018-12-29 15:13:19 +01:00 committed by GitHub
commit e53877f372
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 68 additions and 36 deletions

View File

@ -16,7 +16,7 @@
import {
assert, FormatError, getInheritableProperty, info, isArrayBuffer, isBool,
isNum, isSpace, isString, MissingDataException, OPS, shadow, stringToBytes,
stringToPDFString, Util, warn
stringToPDFString, Util, warn, XRefEntryException, XRefParseException
} from '../shared/util';
import { Catalog, ObjectLoader, XRef } from './obj';
import { Dict, isDict, isName, isStream, Ref } from './primitives';
@ -649,6 +649,20 @@ var PDFDocument = (function PDFDocumentClosure() {
});
},
checkFirstPage() {
return this.getPage(0).catch((reason) => {
if (reason instanceof XRefEntryException) {
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls.
this._pagePromises.length = 0;
this.cleanup();
throw new XRefParseException();
}
});
},
cleanup: function PDFDocument_cleanup() {
return this.catalog.cleanup();
},

View File

@ -17,7 +17,7 @@ import {
bytesToString, createPromiseCapability, createValidAbsoluteUrl, FormatError,
info, InvalidPDFException, isBool, isNum, isString, MissingDataException,
PermissionFlag, shadow, stringToPDFString, stringToUTF8String,
toRomanNumerals, unreachable, warn, XRefParseException
toRomanNumerals, unreachable, warn, XRefEntryException, XRefParseException
} from '../shared/util';
import {
Dict, isCmd, isDict, isName, isRef, isRefsEqual, isStream, Ref, RefSet,
@ -1473,7 +1473,7 @@ var XRef = (function XRefClosure() {
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
if (isDict(xrefEntry)) {
xrefEntry.objId = ref.toString();
@ -1483,12 +1483,11 @@ var XRef = (function XRefClosure() {
return xrefEntry;
},
fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
suppressEncryption) {
fetchUncompressed(ref, xrefEntry, suppressEncryption = false) {
var gen = ref.gen;
var num = ref.num;
if (xrefEntry.gen !== gen) {
throw new FormatError('inconsistent generation in XRef');
throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`);
}
var stream = this.stream.makeSubStream(xrefEntry.offset +
this.stream.start);
@ -1504,7 +1503,7 @@ var XRef = (function XRefClosure() {
obj2 = parseInt(obj2, 10);
}
if (obj1 !== num || obj2 !== gen || !isCmd(obj3)) {
throw new FormatError('bad XRef entry');
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (obj3.cmd !== 'obj') {
// some bad PDFs use "obj1234" and really mean 1234
@ -1514,7 +1513,7 @@ var XRef = (function XRefClosure() {
return num;
}
}
throw new FormatError('bad XRef entry');
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (this.encrypt && !suppressEncryption) {
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
@ -1527,8 +1526,7 @@ var XRef = (function XRefClosure() {
return xrefEntry;
},
fetchCompressed: function XRef_fetchCompressed(xrefEntry,
suppressEncryption) {
fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
var tableOffset = xrefEntry.offset;
var stream = this.fetch(new Ref(tableOffset, 0));
if (!isStream(stream)) {
@ -1573,7 +1571,7 @@ var XRef = (function XRefClosure() {
}
xrefEntry = entries[xrefEntry.gen];
if (xrefEntry === undefined) {
throw new FormatError('bad XRef entry for compressed object');
throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`);
}
return xrefEntry;
},

View File

@ -258,33 +258,22 @@ var WorkerMessageHandler = {
WorkerTasks.splice(i, 1);
}
function loadDocument(recoveryMode) {
var loadDocumentCapability = createPromiseCapability();
async function loadDocument(recoveryMode) {
await pdfManager.ensureDoc('checkHeader');
await pdfManager.ensureDoc('parseStartXRef');
await pdfManager.ensureDoc('parse', [recoveryMode]);
var parseSuccess = function parseSuccess() {
Promise.all([
pdfManager.ensureDoc('numPages'),
pdfManager.ensureDoc('fingerprint'),
]).then(function([numPages, fingerprint]) {
loadDocumentCapability.resolve({
numPages,
fingerprint,
});
}, parseFailure);
};
if (!recoveryMode) {
// Check that at least the first page can be successfully loaded,
// since otherwise the XRef table is definitely not valid.
await pdfManager.ensureDoc('checkFirstPage');
}
var parseFailure = function parseFailure(e) {
loadDocumentCapability.reject(e);
};
pdfManager.ensureDoc('checkHeader', []).then(function() {
pdfManager.ensureDoc('parseStartXRef', []).then(function() {
pdfManager.ensureDoc('parse', [recoveryMode]).then(
parseSuccess, parseFailure);
}, parseFailure);
}, parseFailure);
return loadDocumentCapability.promise;
const [numPages, fingerprint] = await Promise.all([
pdfManager.ensureDoc('numPages'),
pdfManager.ensureDoc('fingerprint'),
]);
return { numPages, fingerprint, };
}
function getPdfManager(data, evaluatorOptions) {

View File

@ -472,6 +472,18 @@ var MissingDataException = (function MissingDataExceptionClosure() {
return MissingDataException;
})();
const XRefEntryException = (function XRefEntryExceptionClosure() {
function XRefEntryException(msg) {
this.message = msg;
}
XRefEntryException.prototype = new Error();
XRefEntryException.prototype.name = 'XRefEntryException';
XRefEntryException.constructor = XRefEntryException;
return XRefEntryException;
})();
var XRefParseException = (function XRefParseExceptionClosure() {
function XRefParseException(msg) {
this.message = msg;
@ -1033,6 +1045,7 @@ export {
UnknownErrorException,
Util,
toRomanNumerals,
XRefEntryException,
XRefParseException,
FormatError,
arrayByteLength,

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/2643238/test.1.pdf

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/369694/repro-pdf.pdf

View File

@ -1275,6 +1275,22 @@
"rounds": 1,
"type": "eq"
},
{ "id": "issue7496",
"file": "pdfs/issue7496.pdf",
"md5": "b422981ae781166e75c0fb4c3634ed96",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "load"
},
{ "id": "issue10326",
"file": "pdfs/issue10326.pdf",
"md5": "015c13b09ef735ea1204f38992c60487",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "load"
},
{ "id": "issue7544",
"file": "pdfs/issue7544.pdf",
"md5": "87e3a9fc7d6a6c1bd5b53af6926ce48e",