diff --git a/src/core/parser.js b/src/core/parser.js index 47b2ef98b..516aba63b 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -1255,7 +1255,7 @@ class Lexer { return Cmd.get("}"); case 0x29: // ')' // Consume the current character in order to avoid permanently hanging - // the worker thread if `Lexer.getObject` is called from within a loop + // the worker thread if `Lexer.getObj` is called from within a loop // containing try-catch statements, since we would otherwise attempt // to parse the *same* character over and over (fixes issue8061.pdf). this.nextChar(); @@ -1264,6 +1264,15 @@ class Lexer { // Start reading a command. let str = String.fromCharCode(ch); + // A valid command cannot start with a non-visible ASCII character, + // and the next character may be (the start of) a valid command. + if (ch < 0x20 || ch > 0x7f) { + const nextCh = this.peekChar(); + if (nextCh >= 0x20 && nextCh <= 0x7f) { + this.nextChar(); + return Cmd.get(str); + } + } const knownCommands = this.knownCommands; let knownCommandFound = knownCommands && knownCommands[str] !== undefined; while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) { diff --git a/test/pdfs/issue13999.pdf.link b/test/pdfs/issue13999.pdf.link new file mode 100644 index 000000000..7c5f5e29a --- /dev/null +++ b/test/pdfs/issue13999.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/7147166/default.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 7a86fa714..cbb51a523 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -5885,5 +5885,13 @@ "rounds": 1, "enableXfa": true, "type": "eq" + }, + { "id": "issue13999", + "file": "pdfs/issue13999.pdf", + "md5": "8829c76887e7827390e9a5f73cab23f2", + "link": true, + "rounds": 1, + "lastPage": 1, + "type": "eq" } ] diff --git a/test/unit/parser_spec.js b/test/unit/parser_spec.js index 15c163133..7729f2547 100644 --- a/test/unit/parser_spec.js +++ b/test/unit/parser_spec.js @@ -13,9 +13,9 @@ * limitations under the License. */ +import { Cmd, EOF, Name } from "../../src/core/primitives.js"; import { Lexer, Linearization, Parser } from "../../src/core/parser.js"; import { FormatError } from "../../src/shared/util.js"; -import { Name } from "../../src/core/primitives.js"; import { StringStream } from "../../src/core/stream.js"; describe("parser", function () { @@ -217,6 +217,32 @@ describe("parser", function () { } }); }); + + describe("getObj", function () { + it( + "should stop immediately when the start of a command is " + + "a non-visible ASCII character (issue 13999)", + function () { + const input = new StringStream("\x14q\nQ"); + const lexer = new Lexer(input); + + let obj = lexer.getObj(); + expect(obj instanceof Cmd).toEqual(true); + expect(obj.cmd).toEqual("\x14"); + + obj = lexer.getObj(); + expect(obj instanceof Cmd).toEqual(true); + expect(obj.cmd).toEqual("q"); + + obj = lexer.getObj(); + expect(obj instanceof Cmd).toEqual(true); + expect(obj.cmd).toEqual("Q"); + + obj = lexer.getObj(); + expect(obj).toEqual(EOF); + } + ); + }); }); describe("Linearization", function () {