Make XRef_indexObjects
more robust against bad PDF files (issue 5752)
This patch improves the detection of `xref` in files where it is followed by an arbitrary whitespace character (not just a line-breaking char). It also adds a check for missing whitespace, e.g. `1 0 obj<<`, to speed up `readToken` for the PDF file in the referenced issue. Finally, the patch also replaces a bunch of magic numbers with suitably named constants. Fixes 5752. Also improves 6243, but there are still issues.
This commit is contained in:
parent
23cb01c8af
commit
56a43a3181
@ -1013,9 +1013,12 @@ var XRef = (function XRefClosure() {
|
|||||||
indexObjects: function XRef_indexObjects() {
|
indexObjects: function XRef_indexObjects() {
|
||||||
// Simple scan through the PDF content to find objects,
|
// Simple scan through the PDF content to find objects,
|
||||||
// trailers and XRef streams.
|
// trailers and XRef streams.
|
||||||
|
var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
|
||||||
|
var PERCENT = 0x25, LT = 0x3C;
|
||||||
|
|
||||||
function readToken(data, offset) {
|
function readToken(data, offset) {
|
||||||
var token = '', ch = data[offset];
|
var token = '', ch = data[offset];
|
||||||
while (ch !== 13 && ch !== 10) {
|
while (ch !== LF && ch !== CR && ch !== LT) {
|
||||||
if (++offset >= data.length) {
|
if (++offset >= data.length) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1047,6 +1050,9 @@ var XRef = (function XRefClosure() {
|
|||||||
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
||||||
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
||||||
|
|
||||||
|
// Clear out any existing entries, since they may be bogus.
|
||||||
|
this.entries.length = 0;
|
||||||
|
|
||||||
var stream = this.stream;
|
var stream = this.stream;
|
||||||
stream.pos = 0;
|
stream.pos = 0;
|
||||||
var buffer = stream.getBytes();
|
var buffer = stream.getBytes();
|
||||||
@ -1054,23 +1060,24 @@ var XRef = (function XRefClosure() {
|
|||||||
var trailers = [], xrefStms = [];
|
var trailers = [], xrefStms = [];
|
||||||
while (position < length) {
|
while (position < length) {
|
||||||
var ch = buffer[position];
|
var ch = buffer[position];
|
||||||
if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
|
if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
|
||||||
++position;
|
++position;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (ch === 37) { // %-comment
|
if (ch === PERCENT) { // %-comment
|
||||||
do {
|
do {
|
||||||
++position;
|
++position;
|
||||||
if (position >= length) {
|
if (position >= length) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ch = buffer[position];
|
ch = buffer[position];
|
||||||
} while (ch !== 13 && ch !== 10);
|
} while (ch !== LF && ch !== CR);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
var token = readToken(buffer, position);
|
var token = readToken(buffer, position);
|
||||||
var m;
|
var m;
|
||||||
if (token === 'xref') {
|
if (token.indexOf('xref') === 0 &&
|
||||||
|
(token.length === 4 || /\s/.test(token[4]))) {
|
||||||
position += skipUntil(buffer, position, trailerBytes);
|
position += skipUntil(buffer, position, trailerBytes);
|
||||||
trailers.push(position);
|
trailers.push(position);
|
||||||
position += skipUntil(buffer, position, startxrefBytes);
|
position += skipUntil(buffer, position, startxrefBytes);
|
||||||
|
1
test/pdfs/issue5752.pdf.link
Normal file
1
test/pdfs/issue5752.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
http://web.archive.org/web/20150821144004/http://222.247.54.152/Fulltext/qkyxlcyjy200504007.pdf
|
@ -1071,6 +1071,15 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue5752",
|
||||||
|
"file": "pdfs/issue5752.pdf",
|
||||||
|
"md5": "aa20ad7cff71e9481c0cd623ddbff3b7",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": true,
|
||||||
|
"firstPage": 1,
|
||||||
|
"lastPage": 1,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "issue2931",
|
{ "id": "issue2931",
|
||||||
"file": "pdfs/issue2931.pdf",
|
"file": "pdfs/issue2931.pdf",
|
||||||
"md5": "ea40940eaf3541b312bda9329167da11",
|
"md5": "ea40940eaf3541b312bda9329167da11",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user