Handle more cases of corrupt PDF files with missing 'endobj' operators, where the "obj" string is immediately followed by the dictionary (PR 9288 follow-up)

This commit is contained in:
Jonas Jenwald 2019-01-10 17:49:33 +01:00
parent e4d2a1604e
commit d4a3858ed5
4 changed files with 82 additions and 1 deletions

View File

@ -1199,7 +1199,7 @@ var XRef = (function XRefClosure() {
}
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
const endobjRegExp = /\bendobj[\b\s]$/;
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s])$/;
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/;
const CHECK_CONTENT_LENGTH = 25;
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);

View File

@ -72,6 +72,7 @@
!issue9458.pdf
!issue9915_reduced.pdf
!issue9940.pdf
!issue10438_reduced.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf
!filled-background.pdf

View File

@ -0,0 +1,73 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Title (Issue 10438)
/Author (Snuffleupagus)
>>
2 0 obj<<
/Pages 3 0 R
/Type /Catalog
>>
endobj
3 0 obj
<<
/Kids [4 0 R]
/Count 1
/Type /Pages
>>
endobj
4 0 obj
<<
/Parent 3 0 R
/MediaBox [0 0 200 50]
/Resources
<<
/Font
<<
/F1 5 0 R
>>
>>
/Contents 6 0 R
/Type /Page
>>
endobj
5 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Encoding /WinAnsiEncoding
/Type /Font
>>
endobj
6 0 obj
<<
/Length 41
>>
stream
BT
10 20 TD
/F1 20 Tf
(Issue 10438) Tj
ET
endstream
endobj xref
0 7
0000000000 65535 f
0000000001 00000 n
0000000002 00000 n
0000000003 00000 n
0000000004 00000 n
0000000005 00000 n
0000000006 00000 n
trailer
<<
/Info 1 0 R
/Root 2 0 R
/Size 7
>>
startxref
491
%%EOF

View File

@ -826,6 +826,13 @@
"link": false,
"type": "eq"
},
{ "id": "issue10438",
"file": "pdfs/issue10438_reduced.pdf",
"md5": "bb26f68493e33af17b256a6ffe777a24",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue6289",
"file": "pdfs/issue6289.pdf",
"md5": "0869f3d147c734ec484ffd492104095d",