Attempt to find truncated endstream commands, in the fallback code-path, in Parser.makeStream (issue 10004)

Apparently there's some PDF generators, in this case the culprit is "Nooog Pdf Library / Nooog PStoPDF v1.5", that manage to mess up PDF creation enough that endstream[1] commands actually become truncated.

*Please note:* The solution implemented here isn't perfect, since it won't be able to cope with PDF files that contains a *mixture* of correct and truncated endstream commands.
However, considering that this particular mode of corruption *fortunately* doesn't seem very common[2], a slightly less complex solution ought to suffice for now.

Fixes 10004.

---
[1] Scanning through the PDF data to find endstream commands becomes necessary, in order to determine the stream length in cases where the `Length` entry of the (stream) dictionary is missing/incorrect.

[2] I cannot recall having seen any (previous) issues/bugs with "Missing endstream" errors.
This commit is contained in:
Jonas Jenwald 2018-08-26 01:49:31 +02:00
parent c81cbe113c
commit 95e5bad4c4
3 changed files with 38 additions and 3 deletions

View File

@ -18,8 +18,8 @@ import {
PredictorStream, RunLengthStream
} from './stream';
import {
assert, FormatError, info, isNum, isSpace, isString, MissingDataException,
StreamType, warn
assert, bytesToString, FormatError, info, isNum, isSpace, isString,
MissingDataException, StreamType, warn
} from '../shared/util';
import {
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
@ -532,7 +532,34 @@ var Parser = (function ParserClosure() {
let actualLength = this._findStreamLength(startPos,
ENDSTREAM_SIGNATURE);
if (actualLength < 0) {
throw new FormatError('Missing endstream command.');
// Only allow limited truncation of the endstream signature,
// to prevent false positives.
const MAX_TRUNCATION = 1;
// Check if the PDF generator included truncated endstream commands,
// such as e.g. "endstrea" (fixes issue10004.pdf).
for (let i = 1; i <= MAX_TRUNCATION; i++) {
const end = ENDSTREAM_SIGNATURE.length - i;
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
let maybeLength = this._findStreamLength(startPos,
TRUNCATED_SIGNATURE);
if (maybeLength >= 0) {
// Ensure that the byte immediately following the truncated
// endstream command is a space, to prevent false positives.
const lastByte = stream.peekBytes(end + 1)[end];
if (!isSpace(lastByte)) {
break;
}
info(`Found "${bytesToString(TRUNCATED_SIGNATURE)}" when ` +
'searching for endstream command.');
actualLength = maybeLength;
break;
}
}
if (actualLength < 0) {
throw new FormatError('Missing endstream command.');
}
}
length = actualLength;

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/2315390/2371410.pdf

View File

@ -726,6 +726,13 @@
"link": false,
"type": "load"
},
{ "id": "issue10004",
"file": "pdfs/issue10004.pdf",
"md5": "64d1853060cefe3be50e5c4617dd0505",
"rounds": 1,
"link": true,
"type": "load"
},
{ "id": "issue7507",
"file": "pdfs/issue7507.pdf",
"md5": "f7aeaafe0c89b94436e94eaa63307303",