From fd29bb0c575be5f5ac2d6732e4f01674e44be0d0 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Fri, 10 Jul 2015 20:18:53 +0200 Subject: [PATCH] Subtract start offset for xrefs in recovery mode Xref offsets are relative to the start of the PDF data, not to the start of the PDF file. This is clear if you look at the other code: - In the XRef's readXRefTable and processXRefTable methods of XRef, the offset of a xref entry is set to the bytes as given by a PDF file. These values are always relative to the start of the PDF file (%PDF-). - The XRef's readXRef method adds the start offset of the stream to Xref entry's offset: "stream.pos = startXRef + stream.start". Clearly, this line assumes that the entry offset excludes the start offset. However, when the PDF is parsed in recovery mode, the xref table is filled with entries whose offset is relative to the start of the stream rather than the PDF file. This is incorrect, and the fix is to subtract the start offset of the stream from the entry's byte offset. The manually created PDF file serves as a regression test. It is a valid PDF, except: - The integer to point to the start of the xref table and the %%EOF trailer are missing. This will activate recovery mode in PDF.js - Some junk was added before the start of the PDF file. This exposes the bad offset bug. --- src/core/obj.js | 6 +++--- test/pdfs/.gitignore | 1 + test/pdfs/issue6069.pdf | 28 ++++++++++++++++++++++++++++ test/test_manifest.json | 6 ++++++ 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue6069.pdf diff --git a/src/core/obj.js b/src/core/obj.js index 7e3192987..8c618eb4c 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -1081,7 +1081,7 @@ var XRef = (function XRefClosure() { } else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) { if (typeof this.entries[m[1]] === 'undefined') { this.entries[m[1]] = { - offset: position, + offset: position - stream.start, gen: m[2] | 0, uncompressed: true }; @@ -1094,8 +1094,8 @@ var XRef = (function XRefClosure() { var xrefTagOffset = skipUntil(content, 0, xrefBytes); if (xrefTagOffset < contentLength && content[xrefTagOffset + 5] < 64) { - xrefStms.push(position); - this.xrefstms[position] = 1; // don't read it recursively + xrefStms.push(position - stream.start); + this.xrefstms[position - stream.start] = 1; // Avoid recursion } position += contentLength; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index f500e35cd..d428da4f4 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -145,3 +145,4 @@ !issue6010_2.pdf !issue6068.pdf !issue6081.pdf +!issue6069.pdf diff --git a/test/pdfs/issue6069.pdf b/test/pdfs/issue6069.pdf new file mode 100644 index 000000000..3d148da6f --- /dev/null +++ b/test/pdfs/issue6069.pdf @@ -0,0 +1,28 @@ +Some junk before the header + +%PDF-1.1 +1 0 obj +<> +endobj +2 0 obj +<> +endobj +3 0 obj +<>>>>>/Contents 4 0 R>> +endobj +4 0 obj +<> +stream +BT/F1 14 Tf 20 20 Td(Missing value for startxref and junk before magic header) Tj ET +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000008 00000 n +0000000054 00000 n +0000000128 00000 n +0000000254 00000 n +trailer +<> +startxref diff --git a/test/test_manifest.json b/test/test_manifest.json index 4b38c7532..c303e9f6b 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2278,5 +2278,11 @@ "md5": "51a724136c0c10008bd061a78ea4b8fc", "rounds": 1, "type": "load" + }, + { "id": "issue6069", + "file": "pdfs/issue6069.pdf", + "md5": "d0ad8871f4116bca8e39513ffa8b7d8e", + "rounds": 1, + "type": "load" } ]