Merge pull request #9288 from Snuffleupagus/issue-9105-2
Handle PDF files with missing 'endobj' operators, by searching for the "obj" string rather than "endobj" in `XRef.indexObjects` (issue 9105)
This commit is contained in:
commit
8ae3fd49f9
@ -1102,10 +1102,14 @@ var XRef = (function XRefClosure() {
|
||||
return skipped;
|
||||
}
|
||||
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
|
||||
const endobjRegExp = /\bendobj[\b\s]$/;
|
||||
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s])$/;
|
||||
const CHECK_CONTENT_LENGTH = 25;
|
||||
|
||||
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
||||
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
||||
101, 102]);
|
||||
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
||||
const objBytes = new Uint8Array([111, 98, 106]);
|
||||
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
||||
|
||||
// Clear out any existing entries, since they may be bogus.
|
||||
@ -1147,8 +1151,36 @@ var XRef = (function XRefClosure() {
|
||||
uncompressed: true,
|
||||
};
|
||||
}
|
||||
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
|
||||
var content = buffer.subarray(position, position + contentLength);
|
||||
let contentLength, startPos = position + token.length;
|
||||
|
||||
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||
// we won't skip over a new 'obj' operator in corrupt files where
|
||||
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
||||
while (startPos < buffer.length) {
|
||||
let endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
||||
contentLength = endPos - position;
|
||||
|
||||
let checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos);
|
||||
let tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
||||
|
||||
// Check if the current object ends with an 'endobj' operator.
|
||||
if (endobjRegExp.test(tokenStr)) {
|
||||
break;
|
||||
} else {
|
||||
// Check if an "obj" occurance is actually a new object,
|
||||
// i.e. the current object is missing the 'endobj' operator.
|
||||
let objToken = nestedObjRegExp.exec(tokenStr);
|
||||
|
||||
if (objToken && objToken[1]) {
|
||||
warn('indexObjects: Found new "obj" inside of another "obj", ' +
|
||||
'caused by missing "endobj" -- trying to recover.');
|
||||
contentLength -= objToken[1].length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
startPos += contentLength;
|
||||
}
|
||||
let content = buffer.subarray(position, position + contentLength);
|
||||
|
||||
// checking XRef stream suspect
|
||||
// (it shall have '/XRef' and next char is not a letter)
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -64,6 +64,7 @@
|
||||
!issue8798r.pdf
|
||||
!issue8823.pdf
|
||||
!issue9084.pdf
|
||||
!issue9105_reduced.pdf
|
||||
!bad-PageLabels.pdf
|
||||
!filled-background.pdf
|
||||
!ArabicCIDTrueType.pdf
|
||||
|
74
test/pdfs/issue9105_reduced.pdf
Normal file
74
test/pdfs/issue9105_reduced.pdf
Normal file
@ -0,0 +1,74 @@
|
||||
%PDF-1.7
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<<
|
||||
/Title (Issue 9105)
|
||||
/Author (Snuffleupagus)
|
||||
>>
|
||||
2 0 obj
|
||||
<<
|
||||
/Pages 3 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Kids [4 0 R]
|
||||
/Count 1
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/Parent 3 0 R
|
||||
/MediaBox [0 0 200 50]
|
||||
/Resources
|
||||
<<
|
||||
/Font
|
||||
<<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 6 0 R
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/BaseFont /Times-Roman
|
||||
/Subtype /Type1
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/Length 41
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
10 20 TD
|
||||
/F1 20 Tf
|
||||
(Issue 9105) Tj
|
||||
ET
|
||||
|
||||
endstream
|
||||
endobj xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000001 00000 n
|
||||
0000000002 00000 n
|
||||
0000000003 00000 n
|
||||
0000000004 00000 n
|
||||
0000000005 00000 n
|
||||
0000000006 00000 n
|
||||
trailer
|
||||
|
||||
<<
|
||||
/Info 1 0 R
|
||||
/Root 2 0 R
|
||||
/Size 7
|
||||
>>
|
||||
startxref
|
||||
491
|
||||
%%EOF
|
@ -741,6 +741,13 @@
|
||||
"lastPage": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue9105",
|
||||
"file": "pdfs/issue9105_reduced.pdf",
|
||||
"md5": "f3889f7c7b60e1ab998aac430cc7e08e",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue6289",
|
||||
"file": "pdfs/issue6289.pdf",
|
||||
"md5": "0869f3d147c734ec484ffd492104095d",
|
||||
|
Loading…
x
Reference in New Issue
Block a user