When parsing corrupt documents, avoid inserting obviously broken data in the XRef-table (issue 13783)
In cases where even the very *first* attempt at reading from an object will throw, simply ignoring such objects will help improve rendering of *some* corrupt documents. Note that this will lead to more parsing in some cases, but considering that this only applies to *corrupt* documents that shouldn't be a big deal.
This commit is contained in:
parent
51f0a81085
commit
b82c802dff
@ -58,6 +58,8 @@ class MissingDataException extends BaseException {
|
||||
}
|
||||
}
|
||||
|
||||
class ParserEOFException extends BaseException {}
|
||||
|
||||
class XRefEntryException extends BaseException {}
|
||||
|
||||
class XRefParseException extends BaseException {}
|
||||
@ -450,6 +452,7 @@ export {
|
||||
isWhiteSpace,
|
||||
log2,
|
||||
MissingDataException,
|
||||
ParserEOFException,
|
||||
parseXFAPath,
|
||||
readInt8,
|
||||
readUint16,
|
||||
|
@ -33,7 +33,11 @@ import {
|
||||
Name,
|
||||
Ref,
|
||||
} from "./primitives.js";
|
||||
import { isWhiteSpace, MissingDataException } from "./core_utils.js";
|
||||
import {
|
||||
isWhiteSpace,
|
||||
MissingDataException,
|
||||
ParserEOFException,
|
||||
} from "./core_utils.js";
|
||||
import { Ascii85Stream } from "./ascii_85_stream.js";
|
||||
import { AsciiHexStream } from "./ascii_hex_stream.js";
|
||||
import { CCITTFaxStream } from "./ccitt_stream.js";
|
||||
@ -124,10 +128,10 @@ class Parser {
|
||||
array.push(this.getObj(cipherTransform));
|
||||
}
|
||||
if (isEOF(this.buf1)) {
|
||||
if (!this.recoveryMode) {
|
||||
throw new FormatError("End of file inside array");
|
||||
if (this.recoveryMode) {
|
||||
return array;
|
||||
}
|
||||
return array;
|
||||
throw new ParserEOFException("End of file inside array.");
|
||||
}
|
||||
this.shift();
|
||||
return array;
|
||||
@ -148,10 +152,10 @@ class Parser {
|
||||
dict.set(key, this.getObj(cipherTransform));
|
||||
}
|
||||
if (isEOF(this.buf1)) {
|
||||
if (!this.recoveryMode) {
|
||||
throw new FormatError("End of file inside dictionary");
|
||||
if (this.recoveryMode) {
|
||||
return dict;
|
||||
}
|
||||
return dict;
|
||||
throw new ParserEOFException("End of file inside dictionary.");
|
||||
}
|
||||
|
||||
// Stream objects are not allowed inside content streams or
|
||||
|
@ -33,6 +33,7 @@ import {
|
||||
import { Lexer, Parser } from "./parser.js";
|
||||
import {
|
||||
MissingDataException,
|
||||
ParserEOFException,
|
||||
XRefEntryException,
|
||||
XRefParseException,
|
||||
} from "./core_utils.js";
|
||||
@ -453,15 +454,38 @@ class XRef {
|
||||
} else if ((m = objRegExp.exec(token))) {
|
||||
const num = m[1] | 0,
|
||||
gen = m[2] | 0;
|
||||
if (!this.entries[num] || this.entries[num].gen === gen) {
|
||||
|
||||
let contentLength,
|
||||
startPos = position + token.length,
|
||||
updateEntries = false;
|
||||
if (!this.entries[num]) {
|
||||
updateEntries = true;
|
||||
} else if (this.entries[num].gen === gen) {
|
||||
// Before overwriting an existing entry, ensure that the new one won't
|
||||
// cause *immediate* errors when it's accessed (fixes issue13783.pdf).
|
||||
try {
|
||||
const parser = new Parser({
|
||||
lexer: new Lexer(stream.makeSubStream(startPos)),
|
||||
});
|
||||
parser.getObj();
|
||||
updateEntries = true;
|
||||
} catch (ex) {
|
||||
if (ex instanceof ParserEOFException) {
|
||||
warn(`indexObjects -- checking object (${token}): "${ex}".`);
|
||||
} else {
|
||||
// The error may come from the `Parser`-instance being initialized
|
||||
// without an `XRef`-instance (we don't have a usable one yet).
|
||||
updateEntries = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (updateEntries) {
|
||||
this.entries[num] = {
|
||||
offset: position - stream.start,
|
||||
gen,
|
||||
uncompressed: true,
|
||||
};
|
||||
}
|
||||
let contentLength,
|
||||
startPos = position + token.length;
|
||||
|
||||
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||
// we won't skip over a new 'obj' operator in corrupt files where
|
||||
|
1
test/pdfs/issue13783.pdf.link
Normal file
1
test/pdfs/issue13783.pdf.link
Normal file
@ -0,0 +1 @@
|
||||
https://github.com/mozilla/pdf.js/files/6869824/TimeTravel.pdf
|
@ -1382,6 +1382,15 @@
|
||||
"enableXfa": true,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue13783",
|
||||
"file": "pdfs/issue13783.pdf",
|
||||
"md5": "6958d827afa566efbd82f53271ea5cd6",
|
||||
"link": true,
|
||||
"rounds": 1,
|
||||
"firstPage": 7,
|
||||
"lastPage": 7,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue9262",
|
||||
"file": "pdfs/issue9262_reduced.pdf",
|
||||
"md5": "5347ce2d7b3866625c22e115fd90e0de",
|
||||
|
Loading…
Reference in New Issue
Block a user