From 71bd8b4de9ec45d2f00ed8793582d84d20e24506 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 20 Oct 2022 19:40:25 +0200 Subject: [PATCH] Let `Lexer.getNumber` treat more invalid "numbers" as zero (issue 15604) In the referenced PDF document there are "numbers" which consist only of `-.`, and while that's obviously not valid Adobe Reader seems to handle it just fine. Letting this method ignore more invalid "numbers" was suggested during the review of PR 14543, so let's simply relax our the validation here. --- src/core/parser.js | 20 +++++++------------- test/pdfs/issue15604.pdf.link | 1 + test/test_manifest.json | 7 +++++++ test/unit/parser_spec.js | 18 +++++++++--------- 4 files changed, 24 insertions(+), 22 deletions(-) create mode 100644 test/pdfs/issue15604.pdf.link diff --git a/src/core/parser.js b/src/core/parser.js index 836dd0ff4..088a26c6e 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -897,21 +897,15 @@ class Lexer { ch = this.nextChar(); } if (ch < /* '0' = */ 0x30 || ch > /* '9' = */ 0x39) { + const msg = `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`; + if (isWhiteSpace(ch) || ch === /* EOF = */ -1) { - // This is consistent with Adobe Reader (fixes issue9252.pdf). - if (divideBy === 10 && sign === 0) { - warn("Lexer.getNumber - treating a single decimal point as zero."); - return 0; - } - // This is consistent with Adobe Reader (fixes bug1753983.pdf). - if (divideBy === 0 && sign === -1) { - warn("Lexer.getNumber - treating a single minus sign as zero."); - return 0; - } + // This is consistent with Adobe Reader (fixes issue9252.pdf, + // issue15604.pdf, bug1753983.pdf). + info(`Lexer.getNumber - "${msg}".`); + return 0; } - throw new FormatError( - `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})` - ); + throw new FormatError(msg); } sign = sign || 1; diff --git a/test/pdfs/issue15604.pdf.link b/test/pdfs/issue15604.pdf.link new file mode 100644 index 000000000..cc76a844d --- /dev/null +++ b/test/pdfs/issue15604.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/9832017/SP_Page1.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index bad62f408..c782b5efa 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -288,6 +288,13 @@ "link": true, "type": "eq" }, + { "id": "issue15604", + "file": "pdfs/issue15604.pdf", + "md5": "505040e5634434ae97118a4c39bf27e5", + "rounds": 1, + "link": true, + "type": "eq" + }, { "id": "bug921760", "file": "pdfs/bug921760.pdf", "md5": "1aa136d786a65b0d7cce7bdb3c58c6c3", diff --git a/test/unit/parser_spec.js b/test/unit/parser_spec.js index e346de6da..b286dde86 100644 --- a/test/unit/parser_spec.js +++ b/test/unit/parser_spec.js @@ -151,17 +151,17 @@ describe("parser", function () { expect(plusLexer.getNumber()).toEqual(205.88); }); - it("should treat a single decimal point, or minus sign, as zero", function () { - const dotInput = new StringStream("."); - const dotLexer = new Lexer(dotInput); - expect(dotLexer.getNumber()).toEqual(0); + it("should treat a single decimal point, or minus/plus sign, as zero", function () { + const validNums = [".", "-", "+", "-.", "+.", "-\r\n.", "+\r\n."]; + for (const number of validNums) { + const validInput = new StringStream(number); + const validLexer = new Lexer(validInput); - const minusInput = new StringStream("-"); - const minusLexer = new Lexer(minusInput); - expect(minusLexer.getNumber()).toEqual(0); + expect(validLexer.getNumber()).toEqual(0); + } - const numbers = ["..", "-.", "+.", "-\r\n.", "+\r\n."]; - for (const number of numbers) { + const invalidNums = ["..", ".-", ".+"]; + for (const number of invalidNums) { const invalidInput = new StringStream(number); const invalidLexer = new Lexer(invalidInput);