When parsing corrupt documents, avoid inserting obviously broken data in the XRef-table (issue 13783)

In cases where even the very *first* attempt at reading from an object will throw, simply ignoring such objects will help improve rendering of *some* corrupt documents.
Note that this will lead to more parsing in some cases, but considering that this only applies to *corrupt* documents that shouldn't be a big deal.
This commit is contained in:
Jonas Jenwald 2021-07-23 17:37:55 +02:00
parent 51f0a81085
commit b82c802dff
5 changed files with 51 additions and 10 deletions

View File

@ -58,6 +58,8 @@ class MissingDataException extends BaseException {
}
}
class ParserEOFException extends BaseException {}
class XRefEntryException extends BaseException {}
class XRefParseException extends BaseException {}
@ -450,6 +452,7 @@ export {
isWhiteSpace,
log2,
MissingDataException,
ParserEOFException,
parseXFAPath,
readInt8,
readUint16,

View File

@ -33,7 +33,11 @@ import {
Name,
Ref,
} from "./primitives.js";
import { isWhiteSpace, MissingDataException } from "./core_utils.js";
import {
isWhiteSpace,
MissingDataException,
ParserEOFException,
} from "./core_utils.js";
import { Ascii85Stream } from "./ascii_85_stream.js";
import { AsciiHexStream } from "./ascii_hex_stream.js";
import { CCITTFaxStream } from "./ccitt_stream.js";
@ -124,10 +128,10 @@ class Parser {
array.push(this.getObj(cipherTransform));
}
if (isEOF(this.buf1)) {
if (!this.recoveryMode) {
throw new FormatError("End of file inside array");
if (this.recoveryMode) {
return array;
}
return array;
throw new ParserEOFException("End of file inside array.");
}
this.shift();
return array;
@ -148,10 +152,10 @@ class Parser {
dict.set(key, this.getObj(cipherTransform));
}
if (isEOF(this.buf1)) {
if (!this.recoveryMode) {
throw new FormatError("End of file inside dictionary");
if (this.recoveryMode) {
return dict;
}
return dict;
throw new ParserEOFException("End of file inside dictionary.");
}
// Stream objects are not allowed inside content streams or

View File

@ -33,6 +33,7 @@ import {
import { Lexer, Parser } from "./parser.js";
import {
MissingDataException,
ParserEOFException,
XRefEntryException,
XRefParseException,
} from "./core_utils.js";
@ -453,15 +454,38 @@ class XRef {
} else if ((m = objRegExp.exec(token))) {
const num = m[1] | 0,
gen = m[2] | 0;
if (!this.entries[num] || this.entries[num].gen === gen) {
let contentLength,
startPos = position + token.length,
updateEntries = false;
if (!this.entries[num]) {
updateEntries = true;
} else if (this.entries[num].gen === gen) {
// Before overwriting an existing entry, ensure that the new one won't
// cause *immediate* errors when it's accessed (fixes issue13783.pdf).
try {
const parser = new Parser({
lexer: new Lexer(stream.makeSubStream(startPos)),
});
parser.getObj();
updateEntries = true;
} catch (ex) {
if (ex instanceof ParserEOFException) {
warn(`indexObjects -- checking object (${token}): "${ex}".`);
} else {
// The error may come from the `Parser`-instance being initialized
// without an `XRef`-instance (we don't have a usable one yet).
updateEntries = true;
}
}
}
if (updateEntries) {
this.entries[num] = {
offset: position - stream.start,
gen,
uncompressed: true,
};
}
let contentLength,
startPos = position + token.length;
// Find the next "obj" string, rather than "endobj", to ensure that
// we won't skip over a new 'obj' operator in corrupt files where

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/6869824/TimeTravel.pdf

View File

@ -1382,6 +1382,15 @@
"enableXfa": true,
"type": "eq"
},
{ "id": "issue13783",
"file": "pdfs/issue13783.pdf",
"md5": "6958d827afa566efbd82f53271ea5cd6",
"link": true,
"rounds": 1,
"firstPage": 7,
"lastPage": 7,
"type": "eq"
},
{ "id": "issue9262",
"file": "pdfs/issue9262_reduced.pdf",
"md5": "5347ce2d7b3866625c22e115fd90e0de",