Merge pull request #1728 from yurydelendik/lexer-1

Allow parsing of the "glued" commands
This commit is contained in:
Artur Adib 2012-05-21 13:55:29 -07:00
commit 5ac7513fa3
3 changed files with 81 additions and 57 deletions

View File

@ -108,39 +108,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Compatibility
BX: 'beginCompat',
EX: 'endCompat'
EX: 'endCompat',
// (reserved partial commands for the lexer)
BM: null,
BD: null,
'true': null,
fa: null,
fal: null,
fals: null,
'false': null,
nu: null,
nul: null,
'null': null
};
function splitCombinedOperations(operations) {
// Two or more operations can be combined together, trying to find which
// operations were concatenated.
var result = [];
var opIndex = 0;
if (!operations) {
return null;
}
while (opIndex < operations.length) {
var currentOp = '';
for (var op in OP_MAP) {
if (op == operations.substr(opIndex, op.length) &&
op.length > currentOp.length) {
currentOp = op;
}
}
if (currentOp.length > 0) {
result.push(operations.substr(opIndex, currentOp.length));
opIndex += currentOp.length;
} else {
return null;
}
}
return result;
}
PartialEvaluator.prototype = {
getOperatorList: function PartialEvaluator_getOperatorList(stream,
resources,
@ -284,39 +266,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
resources = resources || new Dict();
var xobjs = resources.get('XObject') || new Dict();
var patterns = resources.get('Pattern') || new Dict();
var parser = new Parser(new Lexer(stream), false, xref);
var parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
var res = resources;
var hasNextObj = false, nextObjs;
var args = [], obj;
var TILING_PATTERN = 1, SHADING_PATTERN = 2;
while (true) {
if (hasNextObj) {
obj = nextObjs.pop();
hasNextObj = (nextObjs.length > 0);
} else {
obj = parser.getObj();
if (isEOF(obj))
break;
}
obj = parser.getObj();
if (isEOF(obj))
break;
if (isCmd(obj)) {
var cmd = obj.cmd;
var fn = OP_MAP[cmd];
if (!fn) {
// invalid content command, trying to recover
var cmds = splitCombinedOperations(cmd);
if (cmds) {
cmd = cmds[0];
fn = OP_MAP[cmd];
// feeding other command on the next iteration
hasNextObj = true;
nextObjs = [];
for (var idx = 1; idx < cmds.length; idx++) {
nextObjs.push(Cmd.get(cmds[idx]));
}
}
}
assertWellFormed(fn, 'Unknown command "' + cmd + '"');
// TODO figure out how to type-check vararg functions

View File

@ -264,8 +264,16 @@ var Parser = (function ParserClosure() {
})();
var Lexer = (function LexerClosure() {
function Lexer(stream) {
function Lexer(stream, knownCommands) {
this.stream = stream;
// The PDFs might have "glued" commands with other commands, operands or
// literals, e.g. "q1". The knownCommands is a dictionary of the valid
// commands and their prefixes. The prefixes are built the following way:
// if there a command that is a prefix of the other valid command or
// literal (e.g. 'f' and 'false') the following prefixes must be included,
// 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no
// other commands or literals as a prefix. The knowCommands is optional.
this.knownCommands = knownCommands;
}
Lexer.isSpace = function Lexer_isSpace(ch) {
@ -529,12 +537,18 @@ var Lexer = (function LexerClosure() {
// command
var str = ch;
var knownCommands = this.knownCommands;
var knownCommandFound = knownCommands && (str in knownCommands);
while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) {
// stop if known command is found and next character does not make
// the str a command
if (knownCommandFound && !((str + ch) in knownCommands))
break;
stream.skip();
if (str.length == 128)
error('Command token too long: ' + str.length);
str += ch;
knownCommandFound = knownCommands && (str in knownCommands);
}
if (str == 'true')
return true;

View File

@ -78,6 +78,54 @@ describe('evaluator', function() {
expect(result.fnArray[1]).toEqual('save');
expect(result.fnArray[2]).toEqual('save');
});
it('should handle three glued operations #2', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var resources = new ResourcesMock();
resources.Res1 = {};
var stream = new StringStream('B*BBMC');
var result = evaluator.getOperatorList(stream, resources, []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('eoFillStroke');
expect(result.fnArray[1]).toEqual('fillStroke');
expect(result.fnArray[2]).toEqual('beginMarkedContent');
});
it('should handle glued operations and operands', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('q5 Ts');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(2);
expect(result.fnArray[0]).toEqual('save');
expect(result.fnArray[1]).toEqual('setTextRise');
expect(result.argsArray.length).toEqual(2);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(5);
});
it('should handle glued operations and literals', function() {
var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(),
'prefix');
var stream = new StringStream('trueifalserinulli');
var result = evaluator.getOperatorList(stream, new ResourcesMock(), []);
expect(!!result.fnArray && !!result.argsArray).toEqual(true);
expect(result.fnArray.length).toEqual(3);
expect(result.fnArray[0]).toEqual('setFlatness');
expect(result.fnArray[1]).toEqual('setRenderingIntent');
expect(result.fnArray[2]).toEqual('setFlatness');
expect(result.argsArray.length).toEqual(3);
expect(result.argsArray[0].length).toEqual(1);
expect(result.argsArray[0][0]).toEqual(true);
expect(result.argsArray[1].length).toEqual(1);
expect(result.argsArray[1][0]).toEqual(false);
});
});
});