Merge pull request #9288 from Snuffleupagus/issue-9105-2
Handle PDF files with missing 'endobj' operators, by searching for the "obj" string rather than "endobj" in `XRef.indexObjects` (issue 9105)
This commit is contained in:
commit
8ae3fd49f9
@ -1102,10 +1102,14 @@ var XRef = (function XRefClosure() {
|
|||||||
return skipped;
|
return skipped;
|
||||||
}
|
}
|
||||||
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
|
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
|
||||||
|
const endobjRegExp = /\bendobj[\b\s]$/;
|
||||||
|
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s])$/;
|
||||||
|
const CHECK_CONTENT_LENGTH = 25;
|
||||||
|
|
||||||
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
||||||
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
||||||
101, 102]);
|
101, 102]);
|
||||||
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
const objBytes = new Uint8Array([111, 98, 106]);
|
||||||
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
||||||
|
|
||||||
// Clear out any existing entries, since they may be bogus.
|
// Clear out any existing entries, since they may be bogus.
|
||||||
@ -1147,8 +1151,36 @@ var XRef = (function XRefClosure() {
|
|||||||
uncompressed: true,
|
uncompressed: true,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
|
let contentLength, startPos = position + token.length;
|
||||||
var content = buffer.subarray(position, position + contentLength);
|
|
||||||
|
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||||
|
// we won't skip over a new 'obj' operator in corrupt files where
|
||||||
|
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
||||||
|
while (startPos < buffer.length) {
|
||||||
|
let endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
||||||
|
contentLength = endPos - position;
|
||||||
|
|
||||||
|
let checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos);
|
||||||
|
let tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
||||||
|
|
||||||
|
// Check if the current object ends with an 'endobj' operator.
|
||||||
|
if (endobjRegExp.test(tokenStr)) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// Check if an "obj" occurance is actually a new object,
|
||||||
|
// i.e. the current object is missing the 'endobj' operator.
|
||||||
|
let objToken = nestedObjRegExp.exec(tokenStr);
|
||||||
|
|
||||||
|
if (objToken && objToken[1]) {
|
||||||
|
warn('indexObjects: Found new "obj" inside of another "obj", ' +
|
||||||
|
'caused by missing "endobj" -- trying to recover.');
|
||||||
|
contentLength -= objToken[1].length;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
startPos += contentLength;
|
||||||
|
}
|
||||||
|
let content = buffer.subarray(position, position + contentLength);
|
||||||
|
|
||||||
// checking XRef stream suspect
|
// checking XRef stream suspect
|
||||||
// (it shall have '/XRef' and next char is not a letter)
|
// (it shall have '/XRef' and next char is not a letter)
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -64,6 +64,7 @@
|
|||||||
!issue8798r.pdf
|
!issue8798r.pdf
|
||||||
!issue8823.pdf
|
!issue8823.pdf
|
||||||
!issue9084.pdf
|
!issue9084.pdf
|
||||||
|
!issue9105_reduced.pdf
|
||||||
!bad-PageLabels.pdf
|
!bad-PageLabels.pdf
|
||||||
!filled-background.pdf
|
!filled-background.pdf
|
||||||
!ArabicCIDTrueType.pdf
|
!ArabicCIDTrueType.pdf
|
||||||
|
74
test/pdfs/issue9105_reduced.pdf
Normal file
74
test/pdfs/issue9105_reduced.pdf
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
%âãÏÓ
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Title (Issue 9105)
|
||||||
|
/Author (Snuffleupagus)
|
||||||
|
>>
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/Pages 3 0 R
|
||||||
|
/Type /Catalog
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/Kids [4 0 R]
|
||||||
|
/Count 1
|
||||||
|
/Type /Pages
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Parent 3 0 R
|
||||||
|
/MediaBox [0 0 200 50]
|
||||||
|
/Resources
|
||||||
|
<<
|
||||||
|
/Font
|
||||||
|
<<
|
||||||
|
/F1 5 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
/Contents 6 0 R
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Times-Roman
|
||||||
|
/Subtype /Type1
|
||||||
|
/Encoding /WinAnsiEncoding
|
||||||
|
/Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/Length 41
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
10 20 TD
|
||||||
|
/F1 20 Tf
|
||||||
|
(Issue 9105) Tj
|
||||||
|
ET
|
||||||
|
|
||||||
|
endstream
|
||||||
|
endobj xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000001 00000 n
|
||||||
|
0000000002 00000 n
|
||||||
|
0000000003 00000 n
|
||||||
|
0000000004 00000 n
|
||||||
|
0000000005 00000 n
|
||||||
|
0000000006 00000 n
|
||||||
|
trailer
|
||||||
|
|
||||||
|
<<
|
||||||
|
/Info 1 0 R
|
||||||
|
/Root 2 0 R
|
||||||
|
/Size 7
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
491
|
||||||
|
%%EOF
|
@ -741,6 +741,13 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue9105",
|
||||||
|
"file": "pdfs/issue9105_reduced.pdf",
|
||||||
|
"md5": "f3889f7c7b60e1ab998aac430cc7e08e",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "issue6289",
|
{ "id": "issue6289",
|
||||||
"file": "pdfs/issue6289.pdf",
|
"file": "pdfs/issue6289.pdf",
|
||||||
"md5": "0869f3d147c734ec484ffd492104095d",
|
"md5": "0869f3d147c734ec484ffd492104095d",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user