diff --git a/src/core/obj.js b/src/core/obj.js index 9069cf747..89dfa4df3 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -855,7 +855,16 @@ var XRef = (function XRefClosure() { } trailerDict.assignXref(this); this.trailer = trailerDict; - var encrypt = trailerDict.get('Encrypt'); + + let encrypt; + try { + encrypt = trailerDict.get('Encrypt'); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`); + } if (isDict(encrypt)) { var ids = trailerDict.get('ID'); var fileId = (ids && ids.length) ? ids[0] : ''; @@ -868,8 +877,22 @@ var XRef = (function XRefClosure() { this.pdfManager.password); } - // get the root dictionary (catalog) object - if (!(this.root = trailerDict.get('Root'))) { + // Get the root dictionary (catalog) object, and do some basic validation. + let root; + try { + root = trailerDict.get('Root'); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn(`XRef.parse - Invalid "Root" reference: "${ex}".`); + } + if (isDict(root) && root.has('Pages')) { + this.root = root; + } else { + if (!recoveryMode) { + throw new XRefParseException(); + } throw new FormatError('Invalid root reference'); } }, @@ -1208,7 +1231,7 @@ var XRef = (function XRefClosure() { break; } } - startPos += contentLength; + startPos = endPos; } let content = buffer.subarray(position, position + contentLength); @@ -1237,7 +1260,7 @@ var XRef = (function XRefClosure() { this.readXRef(/* recoveryMode */ true); } // finding main trailer - var dict; + let trailerDict; for (i = 0, ii = trailers.length; i < ii; ++i) { stream.pos = trailers[i]; var parser = new Parser(new Lexer(stream), /* allowStreams = */ true, @@ -1247,18 +1270,33 @@ var XRef = (function XRefClosure() { continue; } // read the trailer dictionary - dict = parser.getObj(); + let dict = parser.getObj(); if (!isDict(dict)) { continue; } + // Do some basic validation of the trailer/root dictionary candidate. + let rootDict; + try { + rootDict = dict.get('Root'); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + continue; + } + if (!isDict(rootDict) || !rootDict.has('Pages')) { + continue; + } // taking the first one with 'ID' if (dict.has('ID')) { return dict; } + // The current dictionary is a candidate, but continue searching. + trailerDict = dict; } - // no tailer with 'ID', taking last one (if exists) - if (dict) { - return dict; + // No trailer with 'ID', taking last one (if exists). + if (trailerDict) { + return trailerDict; } // nothing helps throw new InvalidPDFException('Invalid PDF structure'); diff --git a/src/core/parser.js b/src/core/parser.js index c383a7656..68bbe7e0c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -18,8 +18,8 @@ import { PredictorStream, RunLengthStream } from './stream'; import { - assert, FormatError, info, isNum, isString, MissingDataException, StreamType, - warn + assert, FormatError, info, isNum, isSpace, isString, MissingDataException, + StreamType, warn } from '../shared/util'; import { Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref @@ -721,7 +721,7 @@ var Lexer = (function LexerClosure() { var ch = this.currentChar; var eNotation = false; var divideBy = 0; // different from 0 if it's a floating point value - var sign = 1; + var sign = 0; if (ch === 0x2D) { // '-' sign = -1; @@ -732,10 +732,7 @@ var Lexer = (function LexerClosure() { ch = this.nextChar(); } } else if (ch === 0x2B) { // '+' - ch = this.nextChar(); - } - if (ch === 0x2E) { // '.' - divideBy = 10; + sign = 1; ch = this.nextChar(); } if (ch === 0x0A || ch === 0x0D) { // LF, CR @@ -744,11 +741,22 @@ var Lexer = (function LexerClosure() { ch = this.nextChar(); } while (ch === 0x0A || ch === 0x0D); } + if (ch === 0x2E) { // '.' + divideBy = 10; + ch = this.nextChar(); + } if (ch < 0x30 || ch > 0x39) { // '0' - '9' + if (divideBy === 10 && sign === 0 && + (isSpace(ch) || ch === /* EOF = */ -1)) { + // This is consistent with Adobe Reader (fixes issue9252.pdf). + warn('Lexer.getNumber - treating a single decimal point as zero.'); + return 0; + } throw new FormatError( `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`); } + sign = sign || 1; var baseValue = ch - 0x30; // '0' var powerValue = 0; var powerValueSign = 1; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 80905620c..1d643cb1d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -65,8 +65,10 @@ !issue8823.pdf !issue9084.pdf !issue9105_reduced.pdf +!issue9252.pdf !issue9262_reduced.pdf !issue9291.pdf +!issue9418.pdf !issue9458.pdf !bad-PageLabels.pdf !decodeACSuccessive.pdf diff --git a/test/pdfs/issue9252.pdf b/test/pdfs/issue9252.pdf new file mode 100644 index 000000000..db498fdd7 --- /dev/null +++ b/test/pdfs/issue9252.pdf @@ -0,0 +1,90 @@ +%PDF-1.4 +1 0 obj +<< +/Type /Catalog +/Version /1.4 +/Pages 5 0 R +/Outlines 3 0 R +>> +endobj +2 0 obj +<< +/Title (Test) +/Author (Test) +/Creator (sharpPDF) +/CreationDate (2017124) +>> +endobj +3 0 obj +<< +/Type /Outlines +/Count 0 +>> +endobj +4 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding +>> +endobj +5 0 obj +<< +/Type /Pages +/Count 1 +/Kids [6 0 R ] +>> +endobj +6 0 obj +<< +/Type /Page +/Parent 5 0 R +/Resources <> +>> +/MediaBox [0 0 612 792] +/CropBox [0 0 612 792] +/Rotate 0 +/ProcSet [/PDF /Text /ImageC] +/Contents [7 0 R ] +>> +endobj +7 0 obj +<< +/Filter [/ASCIIHexDecode] +/Length 105 +>> +stream +710A42540A2F46312031352054660A2E202E3539202E38342072670A3530203735302054640A28546573742920546A0A45540A51> +endstream +endobj +0 19 +0000000000 65535 f +0000000010 00000 n +0000000098 00000 n +0000000202 00000 n +0000000254 00000 n +0000000370 00000 n +0000000491 00000 n +0000000615 00000 n +0000000743 00000 n +0000000857 00000 n +0000000976 00000 n +0000001099 00000 n +0000001226 00000 n +0000001345 00000 n +0000001464 00000 n +0000001585 00000 n +0000001710 00000 n +0000001777 00000 n +0000002102 00000 n +trailer +<< +/Size 102 +/Root 1 0 R +/Info 2 0 R +>> +startxref +36673 +%%EOF \ No newline at end of file diff --git a/test/pdfs/issue9418.pdf b/test/pdfs/issue9418.pdf new file mode 100644 index 000000000..c52cde328 Binary files /dev/null and b/test/pdfs/issue9418.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 99213a1a7..e80912ad4 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -741,6 +741,20 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue9252", + "file": "pdfs/issue9252.pdf", + "md5": "c7d039d808d9344a95d2c9cfa7586ca3", + "rounds": 1, + "link": false, + "type": "eq" + }, + { "id": "issue9418", + "file": "pdfs/issue9418.pdf", + "md5": "32ecad8098acb1938539d47944ecb54b", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "issue9262", "file": "pdfs/issue9262_reduced.pdf", "md5": "5347ce2d7b3866625c22e115fd90e0de", diff --git a/test/unit/parser_spec.js b/test/unit/parser_spec.js index 95fa81ef6..7c10ba253 100644 --- a/test/unit/parser_spec.js +++ b/test/unit/parser_spec.js @@ -14,6 +14,7 @@ */ import { Lexer, Linearization } from '../../src/core/parser'; +import { FormatError } from '../../src/shared/util'; import { Name } from '../../src/core/primitives'; import { StringStream } from '../../src/core/stream'; @@ -58,11 +59,32 @@ describe('parser', function() { it('should ignore line-breaks between operator and digit in number', function() { - var input = new StringStream('-\r\n205.88'); - var lexer = new Lexer(input); - var result = lexer.getNumber(); + let minusInput = new StringStream('-\r\n205.88'); + let minusLexer = new Lexer(minusInput); - expect(result).toEqual(-205.88); + expect(minusLexer.getNumber()).toEqual(-205.88); + + let plusInput = new StringStream('+\r\n205.88'); + let plusLexer = new Lexer(plusInput); + + expect(plusLexer.getNumber()).toEqual(205.88); + }); + + it('should treat a single decimal point as zero', function() { + let input = new StringStream('.'); + let lexer = new Lexer(input); + + expect(lexer.getNumber()).toEqual(0); + + let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.']; + for (let number of numbers) { + let input = new StringStream(number); + let lexer = new Lexer(input); + + expect(function() { + return lexer.getNumber(); + }).toThrowError(FormatError, /^Invalid number:\s/); + } }); it('should handle glued numbers and operators', function() {