Subtract start offset for xrefs in recovery mode

Xref offsets are relative to the start of the PDF data, not to the start
of the PDF file. This is clear if you look at the other code:

- In the XRef's readXRefTable and processXRefTable methods of XRef, the
  offset of a xref entry is set to the bytes as given by a PDF file.
  These values are always relative to the start of the PDF file (%PDF-).

- The XRef's readXRef method adds the start offset of the stream to
  Xref entry's offset: "stream.pos = startXRef + stream.start".
  Clearly, this line assumes that the entry offset excludes the start
  offset.

However, when the PDF is parsed in recovery mode, the xref table is
filled with entries whose offset is relative to the start of the stream
rather than the PDF file. This is incorrect, and the fix is to subtract
the start offset of the stream from the entry's byte offset.

The manually created PDF file serves as a regression test. It is a valid
PDF, except:
- The integer to point to the start of the xref table and the %%EOF
  trailer are missing. This will activate recovery mode in PDF.js
- Some junk was added before the start of the PDF file. This exposes the
  bad offset bug.
This commit is contained in:
Rob Wu 2015-07-10 20:18:53 +02:00
parent eb2ad11571
commit fd29bb0c57
4 changed files with 38 additions and 3 deletions

View File

@ -1081,7 +1081,7 @@ var XRef = (function XRefClosure() {
} else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) {
if (typeof this.entries[m[1]] === 'undefined') {
this.entries[m[1]] = {
offset: position,
offset: position - stream.start,
gen: m[2] | 0,
uncompressed: true
};
@ -1094,8 +1094,8 @@ var XRef = (function XRefClosure() {
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
if (xrefTagOffset < contentLength &&
content[xrefTagOffset + 5] < 64) {
xrefStms.push(position);
this.xrefstms[position] = 1; // don't read it recursively
xrefStms.push(position - stream.start);
this.xrefstms[position - stream.start] = 1; // Avoid recursion
}
position += contentLength;

View File

@ -145,3 +145,4 @@
!issue6010_2.pdf
!issue6068.pdf
!issue6081.pdf
!issue6069.pdf

28
test/pdfs/issue6069.pdf Normal file
View File

@ -0,0 +1,28 @@
Some junk before the header
%PDF-1.1
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 400 50]>>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Arial>>>>>>/Contents 4 0 R>>
endobj
4 0 obj
<</Length 43>>
stream
BT/F1 14 Tf 20 20 Td(Missing value for startxref and junk before magic header) Tj ET
endstream
endobj
xref
0 5
0000000000 65535 f
0000000008 00000 n
0000000054 00000 n
0000000128 00000 n
0000000254 00000 n
trailer
<</Root 1 0 R/Size 5>>
startxref

View File

@ -2278,5 +2278,11 @@
"md5": "51a724136c0c10008bd061a78ea4b8fc",
"rounds": 1,
"type": "load"
},
{ "id": "issue6069",
"file": "pdfs/issue6069.pdf",
"md5": "d0ad8871f4116bca8e39513ffa8b7d8e",
"rounds": 1,
"type": "load"
}
]