Merge pull request #3376 from yurydelendik/bad-pdfs

Misc fixes for corrupted PDFs
This commit is contained in:
Brendan Dahl 2013-06-27 15:33:06 -07:00
commit f06dadab3b
5 changed files with 71 additions and 7 deletions

View File

@ -141,6 +141,12 @@ var ChunkedStream = (function ChunkedStreamClosure() {
return bytes.subarray(pos, end);
},
peekBytes: function ChunkedStream_peekBytes(length) {
var bytes = this.getBytes(length);
this.pos -= bytes.length;
return bytes;
},
getByteRange: function ChunkedStream_getBytes(begin, end) {
this.ensureRange(begin, end);
return this.bytes.subarray(begin, end);

View File

@ -124,7 +124,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Images
BI: { fnName: 'beginInlineImage', numArgs: 0, variableArgs: false },
ID: { fnName: 'beginImageData', numArgs: 0, variableArgs: false },
EI: { fnName: 'endInlineImage', numArgs: 0, variableArgs: false },
EI: { fnName: 'endInlineImage', numArgs: 1, variableArgs: false },
// XObjects
Do: { fnName: 'paintXObject', numArgs: 1, variableArgs: false },

View File

@ -266,6 +266,18 @@ var Catalog = (function CatalogClosure() {
return shadow(this, 'toplevelPagesDict', pagesObj);
},
get documentOutline() {
var obj = null;
try {
obj = this.readDocumentOutline();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read document outline');
}
return shadow(this, 'documentOutline', obj);
},
readDocumentOutline: function Catalog_readDocumentOutline() {
var xref = this.xref;
var obj = this.catDict.get('Outlines');
var root = { items: [] };
@ -316,8 +328,7 @@ var Catalog = (function CatalogClosure() {
}
}
}
obj = root.items.length > 0 ? root.items : null;
return shadow(this, 'documentOutline', obj);
return root.items.length > 0 ? root.items : null;
},
get numPages() {
var obj = this.toplevelPagesDict.get('Count');
@ -598,6 +609,12 @@ var XRef = (function XRefClosure() {
delete tableState.entryCount;
}
// Per issue 3248: hp scanners generate bad XRef
if (first === 1 && this.entries[1] && this.entries[1].free) {
// shifting the entries
this.entries.shift();
}
// Sanity check: as per spec, first object must be free
if (this.entries[0] && !this.entries[0].free)
error('Invalid XRef table: unexpected first object');

View File

@ -217,9 +217,47 @@ var Parser = (function ParserClosure() {
stream.pos = pos + length;
this.shift(); // '>>'
this.shift(); // 'stream'
if (!isCmd(this.buf1, 'endstream'))
error('Missing endstream');
this.shift();
if (!isCmd(this.buf1, 'endstream')) {
// bad stream length, scanning for endstream
stream.pos = pos;
var SCAN_BLOCK_SIZE = 2048;
var ENDSTREAM_SIGNATURE_LENGTH = 9;
var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65,
0x61, 0x6D];
var skipped = 0, found = false;
while (stream.pos < stream.end) {
var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE);
var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH;
var found = false, i, ii, j;
for (i = 0, j = 0; i < scanLength; i++) {
var b = scanBytes[i];
if (b !== ENDSTREAM_SIGNATURE[j]) {
i -= j;
j = 0;
} else {
j++;
if (j >= ENDSTREAM_SIGNATURE_LENGTH) {
found = true;
break;
}
}
}
if (found) {
skipped += i - ENDSTREAM_SIGNATURE_LENGTH;
stream.pos += i - ENDSTREAM_SIGNATURE_LENGTH;
break;
}
skipped += scanLength;
stream.pos += scanLength;
}
if (!found) {
error('Missing endstream');
}
length = skipped;
this.shift();
this.shift();
}
this.shift(); // 'endstream'
stream = stream.makeSubStream(pos, length, dict);
if (cipherTransform)

View File

@ -504,8 +504,11 @@ var FlateStream = (function FlateStreamClosure() {
if (typeof (b = bytes[bytesPos++]) == 'undefined')
error('Bad block header in flate stream');
check |= (b << 8);
if (check != (~blockLen & 0xffff))
if (check != (~blockLen & 0xffff) &&
(blockLen !== 0 || check !== 0)) {
// Ignoring error for bad "empty" block (see issue 1277)
error('Bad uncompressed block length in flate stream');
}
this.codeBuf = 0;
this.codeSize = 0;