Merge pull request #520 from notmasteryet/invalidpdf-1
Recovering from the invalid PDF
This commit is contained in:
commit
3745ce9480
110
pdf.js
110
pdf.js
@ -3164,6 +3164,113 @@ var XRef = (function xRefXRef() {
|
||||
this.readXRef(prev);
|
||||
return streamParameters;
|
||||
},
|
||||
indexObjects: function indexObjects() {
|
||||
// Simple scan through the PDF content to find objects,
|
||||
// trailers and XRef streams.
|
||||
function readToken(data, offset) {
|
||||
var token = '', ch = data[offset];
|
||||
while (ch !== 13 && ch !== 10) {
|
||||
if (++offset >= data.length)
|
||||
break;
|
||||
token += String.fromCharCode(ch);
|
||||
ch = data[offset];
|
||||
}
|
||||
return token;
|
||||
}
|
||||
function skipUntil(data, offset, what) {
|
||||
var length = what.length, dataLength = data.length;
|
||||
var skipped = 0;
|
||||
// finding byte sequence
|
||||
while (offset < dataLength) {
|
||||
var i = 0;
|
||||
while (i < length && data[offset + i] == what[i])
|
||||
++i;
|
||||
if (i >= length)
|
||||
break; // sequence found
|
||||
|
||||
offset++;
|
||||
skipped++;
|
||||
}
|
||||
return skipped;
|
||||
}
|
||||
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
||||
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
||||
101, 102]);
|
||||
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
||||
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
||||
|
||||
var stream = this.stream;
|
||||
stream.pos = 0;
|
||||
var buffer = stream.getBytes();
|
||||
var position = 0, length = buffer.length;
|
||||
var trailers = [], xrefStms = [];
|
||||
var state = 0;
|
||||
var currentToken;
|
||||
while (position < length) {
|
||||
var ch = buffer[position];
|
||||
if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
|
||||
++position;
|
||||
continue;
|
||||
}
|
||||
if (ch === 37) { // %-comment
|
||||
do {
|
||||
++position;
|
||||
ch = buffer[position];
|
||||
} while (ch !== 13 && ch !== 10);
|
||||
continue;
|
||||
}
|
||||
var token = readToken(buffer, position);
|
||||
var m;
|
||||
if (token === 'xref') {
|
||||
position += skipUntil(buffer, position, trailerBytes);
|
||||
trailers.push(position);
|
||||
position += skipUntil(buffer, position, startxrefBytes);
|
||||
} else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) {
|
||||
this.entries[m[1]] = {
|
||||
offset: position,
|
||||
gen: m[2] | 0,
|
||||
uncompressed: true
|
||||
};
|
||||
|
||||
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
|
||||
var content = buffer.subarray(position, position + contentLength);
|
||||
|
||||
// checking XRef stream suspect
|
||||
// (it shall have '/XRef' and next char is not a letter)
|
||||
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
|
||||
if (xrefTagOffset < contentLength &&
|
||||
content[xrefTagOffset + 5] < 64) {
|
||||
xrefStms.push(position);
|
||||
this.xrefstms[position] = 1; // don't read it recursively
|
||||
}
|
||||
|
||||
position += contentLength;
|
||||
} else
|
||||
position += token.length + 1;
|
||||
}
|
||||
// reading XRef streams
|
||||
for (var i = 0; i < xrefStms.length; ++i) {
|
||||
this.readXRef(xrefStms[i]);
|
||||
}
|
||||
// finding main trailer
|
||||
for (var i = 0; i < trailers.length; ++i) {
|
||||
stream.pos = trailers[i];
|
||||
var parser = new Parser(new Lexer(stream), true);
|
||||
var obj = parser.getObj();
|
||||
if (!IsCmd(obj, 'trailer'))
|
||||
continue;
|
||||
// read the trailer dictionary
|
||||
var dict;
|
||||
if (!IsDict(dict = parser.getObj()))
|
||||
continue;
|
||||
// taking the first one with 'ID'
|
||||
if (dict.has('ID'))
|
||||
return dict;
|
||||
}
|
||||
// nothing helps
|
||||
error('Invalid PDF structure');
|
||||
return null;
|
||||
},
|
||||
readXRef: function readXref(startXRef) {
|
||||
var stream = this.stream;
|
||||
stream.pos = startXRef;
|
||||
@ -3181,8 +3288,7 @@ var XRef = (function xRefXRef() {
|
||||
}
|
||||
return this.readXRefStream(obj);
|
||||
}
|
||||
error('Invalid XRef');
|
||||
return null;
|
||||
return this.indexObjects();
|
||||
},
|
||||
getEntry: function xRefGetEntry(i) {
|
||||
var e = this.entries[i];
|
||||
|
@ -125,6 +125,13 @@ function nextPage(task, loadError) {
|
||||
}
|
||||
}
|
||||
|
||||
if (task.skipPages && task.skipPages.indexOf(task.pageNum) >= 0) {
|
||||
log(' skipping page ' + task.pageNum + '/' + task.pdfDoc.numPages +
|
||||
'... ');
|
||||
snapshotCurrentPage(task, '');
|
||||
return;
|
||||
}
|
||||
|
||||
var page = null;
|
||||
|
||||
if (!failure) {
|
||||
|
1
test/pdfs/ibwa-bad.pdf.link
Normal file
1
test/pdfs/ibwa-bad.pdf.link
Normal file
@ -0,0 +1 @@
|
||||
http://www.bottledwater.org/public/pdf/IBWA05ModelCode_Mar2.pdf
|
@ -157,5 +157,12 @@
|
||||
"link": false,
|
||||
"rounds": 1,
|
||||
"type": "load"
|
||||
},
|
||||
{ "id": "ibwa-bad",
|
||||
"file": "pdfs/ibwa-bad.pdf",
|
||||
"link": true,
|
||||
"rounds": 1,
|
||||
"skipPages": [ 16 ],
|
||||
"type": "load"
|
||||
}
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user