diff --git a/pdf.js b/pdf.js index 11fff4a57..d5d82a724 100644 --- a/pdf.js +++ b/pdf.js @@ -3164,6 +3164,113 @@ var XRef = (function xRefXRef() { this.readXRef(prev); return streamParameters; }, + indexObjects: function indexObjects() { + // Simple scan through the PDF content to find objects, + // trailers and XRef streams. + function readToken(data, offset) { + var token = '', ch = data[offset]; + while (ch !== 13 && ch !== 10) { + if (++offset >= data.length) + break; + token += String.fromCharCode(ch); + ch = data[offset]; + } + return token; + } + function skipUntil(data, offset, what) { + var length = what.length, dataLength = data.length; + var skipped = 0; + // finding byte sequence + while (offset < dataLength) { + var i = 0; + while (i < length && data[offset + i] == what[i]) + ++i; + if (i >= length) + break; // sequence found + + offset++; + skipped++; + } + return skipped; + } + var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]); + var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114, + 101, 102]); + var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]); + var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]); + + var stream = this.stream; + stream.pos = 0; + var buffer = stream.getBytes(); + var position = 0, length = buffer.length; + var trailers = [], xrefStms = []; + var state = 0; + var currentToken; + while (position < length) { + var ch = buffer[position]; + if (ch === 32 || ch === 9 || ch === 13 || ch === 10) { + ++position; + continue; + } + if (ch === 37) { // %-comment + do { + ++position; + ch = buffer[position]; + } while (ch !== 13 && ch !== 10); + continue; + } + var token = readToken(buffer, position); + var m; + if (token === 'xref') { + position += skipUntil(buffer, position, trailerBytes); + trailers.push(position); + position += skipUntil(buffer, position, startxrefBytes); + } else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) { + this.entries[m[1]] = { + offset: position, + gen: m[2] | 0, + uncompressed: true + }; + + var contentLength = skipUntil(buffer, position, endobjBytes) + 7; + var content = buffer.subarray(position, position + contentLength); + + // checking XRef stream suspect + // (it shall have '/XRef' and next char is not a letter) + var xrefTagOffset = skipUntil(content, 0, xrefBytes); + if (xrefTagOffset < contentLength && + content[xrefTagOffset + 5] < 64) { + xrefStms.push(position); + this.xrefstms[position] = 1; // don't read it recursively + } + + position += contentLength; + } else + position += token.length + 1; + } + // reading XRef streams + for (var i = 0; i < xrefStms.length; ++i) { + this.readXRef(xrefStms[i]); + } + // finding main trailer + for (var i = 0; i < trailers.length; ++i) { + stream.pos = trailers[i]; + var parser = new Parser(new Lexer(stream), true); + var obj = parser.getObj(); + if (!IsCmd(obj, 'trailer')) + continue; + // read the trailer dictionary + var dict; + if (!IsDict(dict = parser.getObj())) + continue; + // taking the first one with 'ID' + if (dict.has('ID')) + return dict; + } + // nothing helps + error('Invalid PDF structure'); + return null; + }, readXRef: function readXref(startXRef) { var stream = this.stream; stream.pos = startXRef; @@ -3181,8 +3288,7 @@ var XRef = (function xRefXRef() { } return this.readXRefStream(obj); } - error('Invalid XRef'); - return null; + return this.indexObjects(); }, getEntry: function xRefGetEntry(i) { var e = this.entries[i]; diff --git a/test/driver.js b/test/driver.js index 4aa5149f6..8f873fc46 100644 --- a/test/driver.js +++ b/test/driver.js @@ -125,6 +125,13 @@ function nextPage(task, loadError) { } } + if (task.skipPages && task.skipPages.indexOf(task.pageNum) >= 0) { + log(' skipping page ' + task.pageNum + '/' + task.pdfDoc.numPages + + '... '); + snapshotCurrentPage(task, ''); + return; + } + var page = null; if (!failure) { diff --git a/test/pdfs/ibwa-bad.pdf.link b/test/pdfs/ibwa-bad.pdf.link new file mode 100644 index 000000000..feec66ea2 --- /dev/null +++ b/test/pdfs/ibwa-bad.pdf.link @@ -0,0 +1 @@ +http://www.bottledwater.org/public/pdf/IBWA05ModelCode_Mar2.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 39d1f50ab..71738f192 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -157,5 +157,12 @@ "link": false, "rounds": 1, "type": "load" + }, + { "id": "ibwa-bad", + "file": "pdfs/ibwa-bad.pdf", + "link": true, + "rounds": 1, + "skipPages": [ 16 ], + "type": "load" } ]