Let Lexer.getObj return a dummy-Cmd for commands that start with a non-visible ASCII character (issue 13999)

This way we avoid breaking badly generated PDF documents where a non-visible ASCII character is "glued" to a valid command.
This commit is contained in:
Jonas Jenwald 2021-09-11 18:29:31 +02:00
parent 99e442941c
commit a47844d1fc
4 changed files with 46 additions and 2 deletions

View File

@ -1255,7 +1255,7 @@ class Lexer {
return Cmd.get("}");
case 0x29: // ')'
// Consume the current character in order to avoid permanently hanging
// the worker thread if `Lexer.getObject` is called from within a loop
// the worker thread if `Lexer.getObj` is called from within a loop
// containing try-catch statements, since we would otherwise attempt
// to parse the *same* character over and over (fixes issue8061.pdf).
this.nextChar();
@ -1264,6 +1264,15 @@ class Lexer {
// Start reading a command.
let str = String.fromCharCode(ch);
// A valid command cannot start with a non-visible ASCII character,
// and the next character may be (the start of) a valid command.
if (ch < 0x20 || ch > 0x7f) {
const nextCh = this.peekChar();
if (nextCh >= 0x20 && nextCh <= 0x7f) {
this.nextChar();
return Cmd.get(str);
}
}
const knownCommands = this.knownCommands;
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/7147166/default.pdf

View File

@ -5885,5 +5885,13 @@
"rounds": 1,
"enableXfa": true,
"type": "eq"
},
{ "id": "issue13999",
"file": "pdfs/issue13999.pdf",
"md5": "8829c76887e7827390e9a5f73cab23f2",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "eq"
}
]

View File

@ -13,9 +13,9 @@
* limitations under the License.
*/
import { Cmd, EOF, Name } from "../../src/core/primitives.js";
import { Lexer, Linearization, Parser } from "../../src/core/parser.js";
import { FormatError } from "../../src/shared/util.js";
import { Name } from "../../src/core/primitives.js";
import { StringStream } from "../../src/core/stream.js";
describe("parser", function () {
@ -217,6 +217,32 @@ describe("parser", function () {
}
});
});
describe("getObj", function () {
it(
"should stop immediately when the start of a command is " +
"a non-visible ASCII character (issue 13999)",
function () {
const input = new StringStream("\x14q\nQ");
const lexer = new Lexer(input);
let obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("\x14");
obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("q");
obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("Q");
obj = lexer.getObj();
expect(obj).toEqual(EOF);
}
);
});
});
describe("Linearization", function () {