Merge pull request #6375 from Snuffleupagus/more-robust-XRef_indexObjects

Make `XRef_indexObjects` more robust against bad PDF files (issue 5752)
This commit is contained in:
Yury Delendik 2015-08-24 15:05:05 -05:00
commit 5dcd409d3b
3 changed files with 22 additions and 5 deletions

View File

@ -1013,9 +1013,12 @@ var XRef = (function XRefClosure() {
indexObjects: function XRef_indexObjects() {
// Simple scan through the PDF content to find objects,
// trailers and XRef streams.
var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
var PERCENT = 0x25, LT = 0x3C;
function readToken(data, offset) {
var token = '', ch = data[offset];
while (ch !== 13 && ch !== 10) {
while (ch !== LF && ch !== CR && ch !== LT) {
if (++offset >= data.length) {
break;
}
@ -1047,6 +1050,9 @@ var XRef = (function XRefClosure() {
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
// Clear out any existing entries, since they may be bogus.
this.entries.length = 0;
var stream = this.stream;
stream.pos = 0;
var buffer = stream.getBytes();
@ -1054,23 +1060,24 @@ var XRef = (function XRefClosure() {
var trailers = [], xrefStms = [];
while (position < length) {
var ch = buffer[position];
if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
++position;
continue;
}
if (ch === 37) { // %-comment
if (ch === PERCENT) { // %-comment
do {
++position;
if (position >= length) {
break;
}
ch = buffer[position];
} while (ch !== 13 && ch !== 10);
} while (ch !== LF && ch !== CR);
continue;
}
var token = readToken(buffer, position);
var m;
if (token === 'xref') {
if (token.indexOf('xref') === 0 &&
(token.length === 4 || /\s/.test(token[4]))) {
position += skipUntil(buffer, position, trailerBytes);
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);

View File

@ -0,0 +1 @@
http://web.archive.org/web/20150821144004/http://222.247.54.152/Fulltext/qkyxlcyjy200504007.pdf

View File

@ -1071,6 +1071,15 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue5752",
"file": "pdfs/issue5752.pdf",
"md5": "aa20ad7cff71e9481c0cd623ddbff3b7",
"rounds": 1,
"link": true,
"firstPage": 1,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue2931",
"file": "pdfs/issue2931.pdf",
"md5": "ea40940eaf3541b312bda9329167da11",