From 6bbcafcd26beb19f44131e944457a3c23f510342 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 19 Jun 2018 09:37:56 +0200 Subject: [PATCH] Let `Lexer.getNumber` treat a single decimal point as zero (issue 9252) This is consistent with the behaviour in Adobe Reader. --- src/core/parser.js | 14 +++++-- test/pdfs/.gitignore | 1 + test/pdfs/issue9252.pdf | 90 ++++++++++++++++++++++++++++++++++++++++ test/test_manifest.json | 7 ++++ test/unit/parser_spec.js | 18 ++++++++ 5 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue9252.pdf diff --git a/src/core/parser.js b/src/core/parser.js index 84d17f798..68bbe7e0c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -18,8 +18,8 @@ import { PredictorStream, RunLengthStream } from './stream'; import { - assert, FormatError, info, isNum, isString, MissingDataException, StreamType, - warn + assert, FormatError, info, isNum, isSpace, isString, MissingDataException, + StreamType, warn } from '../shared/util'; import { Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref @@ -721,7 +721,7 @@ var Lexer = (function LexerClosure() { var ch = this.currentChar; var eNotation = false; var divideBy = 0; // different from 0 if it's a floating point value - var sign = 1; + var sign = 0; if (ch === 0x2D) { // '-' sign = -1; @@ -732,6 +732,7 @@ var Lexer = (function LexerClosure() { ch = this.nextChar(); } } else if (ch === 0x2B) { // '+' + sign = 1; ch = this.nextChar(); } if (ch === 0x0A || ch === 0x0D) { // LF, CR @@ -745,10 +746,17 @@ var Lexer = (function LexerClosure() { ch = this.nextChar(); } if (ch < 0x30 || ch > 0x39) { // '0' - '9' + if (divideBy === 10 && sign === 0 && + (isSpace(ch) || ch === /* EOF = */ -1)) { + // This is consistent with Adobe Reader (fixes issue9252.pdf). + warn('Lexer.getNumber - treating a single decimal point as zero.'); + return 0; + } throw new FormatError( `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`); } + sign = sign || 1; var baseValue = ch - 0x30; // '0' var powerValue = 0; var powerValueSign = 1; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 80905620c..164de3e75 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -65,6 +65,7 @@ !issue8823.pdf !issue9084.pdf !issue9105_reduced.pdf +!issue9252.pdf !issue9262_reduced.pdf !issue9291.pdf !issue9458.pdf diff --git a/test/pdfs/issue9252.pdf b/test/pdfs/issue9252.pdf new file mode 100644 index 000000000..db498fdd7 --- /dev/null +++ b/test/pdfs/issue9252.pdf @@ -0,0 +1,90 @@ +%PDF-1.4 +1 0 obj +<< +/Type /Catalog +/Version /1.4 +/Pages 5 0 R +/Outlines 3 0 R +>> +endobj +2 0 obj +<< +/Title (Test) +/Author (Test) +/Creator (sharpPDF) +/CreationDate (2017124) +>> +endobj +3 0 obj +<< +/Type /Outlines +/Count 0 +>> +endobj +4 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding +>> +endobj +5 0 obj +<< +/Type /Pages +/Count 1 +/Kids [6 0 R ] +>> +endobj +6 0 obj +<< +/Type /Page +/Parent 5 0 R +/Resources <> +>> +/MediaBox [0 0 612 792] +/CropBox [0 0 612 792] +/Rotate 0 +/ProcSet [/PDF /Text /ImageC] +/Contents [7 0 R ] +>> +endobj +7 0 obj +<< +/Filter [/ASCIIHexDecode] +/Length 105 +>> +stream +710A42540A2F46312031352054660A2E202E3539202E38342072670A3530203735302054640A28546573742920546A0A45540A51> +endstream +endobj +0 19 +0000000000 65535 f +0000000010 00000 n +0000000098 00000 n +0000000202 00000 n +0000000254 00000 n +0000000370 00000 n +0000000491 00000 n +0000000615 00000 n +0000000743 00000 n +0000000857 00000 n +0000000976 00000 n +0000001099 00000 n +0000001226 00000 n +0000001345 00000 n +0000001464 00000 n +0000001585 00000 n +0000001710 00000 n +0000001777 00000 n +0000002102 00000 n +trailer +<< +/Size 102 +/Root 1 0 R +/Info 2 0 R +>> +startxref +36673 +%%EOF \ No newline at end of file diff --git a/test/test_manifest.json b/test/test_manifest.json index 99213a1a7..e9092caec 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -741,6 +741,13 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue9252", + "file": "pdfs/issue9252.pdf", + "md5": "c7d039d808d9344a95d2c9cfa7586ca3", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "issue9262", "file": "pdfs/issue9262_reduced.pdf", "md5": "5347ce2d7b3866625c22e115fd90e0de", diff --git a/test/unit/parser_spec.js b/test/unit/parser_spec.js index 86e775c88..7c10ba253 100644 --- a/test/unit/parser_spec.js +++ b/test/unit/parser_spec.js @@ -14,6 +14,7 @@ */ import { Lexer, Linearization } from '../../src/core/parser'; +import { FormatError } from '../../src/shared/util'; import { Name } from '../../src/core/primitives'; import { StringStream } from '../../src/core/stream'; @@ -69,6 +70,23 @@ describe('parser', function() { expect(plusLexer.getNumber()).toEqual(205.88); }); + it('should treat a single decimal point as zero', function() { + let input = new StringStream('.'); + let lexer = new Lexer(input); + + expect(lexer.getNumber()).toEqual(0); + + let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.']; + for (let number of numbers) { + let input = new StringStream(number); + let lexer = new Lexer(input); + + expect(function() { + return lexer.getNumber(); + }).toThrowError(FormatError, /^Invalid number:\s/); + } + }); + it('should handle glued numbers and operators', function() { var input = new StringStream('123ET'); var lexer = new Lexer(input);