From ec6c185cf5b5f8ac3af3f9e2f45f3bf40b32e22f Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Sun, 20 May 2012 13:44:03 -0500 Subject: [PATCH 1/3] Allow parsing of the "glued" commands --- src/evaluator.js | 64 ++++++------------------------------- src/parser.js | 11 +++++-- test/unit/evaluator_spec.js | 30 +++++++++++++++++ 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/src/evaluator.js b/src/evaluator.js index ae443fa81..2c07db88c 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -108,39 +108,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // Compatibility BX: 'beginCompat', - EX: 'endCompat' + EX: 'endCompat', + + // (reserved partial commands for the lexer) + BM: null, + BD: null }; - function splitCombinedOperations(operations) { - // Two or more operations can be combined together, trying to find which - // operations were concatenated. - var result = []; - var opIndex = 0; - - if (!operations) { - return null; - } - - while (opIndex < operations.length) { - var currentOp = ''; - for (var op in OP_MAP) { - if (op == operations.substr(opIndex, op.length) && - op.length > currentOp.length) { - currentOp = op; - } - } - - if (currentOp.length > 0) { - result.push(operations.substr(opIndex, currentOp.length)); - opIndex += currentOp.length; - } else { - return null; - } - } - - return result; - } - PartialEvaluator.prototype = { getOperatorList: function PartialEvaluator_getOperatorList(stream, resources, @@ -284,39 +258,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { resources = resources || new Dict(); var xobjs = resources.get('XObject') || new Dict(); var patterns = resources.get('Pattern') || new Dict(); - var parser = new Parser(new Lexer(stream), false, xref); + var parser = new Parser(new Lexer(stream, OP_MAP), false, xref); var res = resources; - var hasNextObj = false, nextObjs; var args = [], obj; var TILING_PATTERN = 1, SHADING_PATTERN = 2; while (true) { - if (hasNextObj) { - obj = nextObjs.pop(); - hasNextObj = (nextObjs.length > 0); - } else { - obj = parser.getObj(); - if (isEOF(obj)) - break; - } + obj = parser.getObj(); + if (isEOF(obj)) + break; if (isCmd(obj)) { var cmd = obj.cmd; var fn = OP_MAP[cmd]; - if (!fn) { - // invalid content command, trying to recover - var cmds = splitCombinedOperations(cmd); - if (cmds) { - cmd = cmds[0]; - fn = OP_MAP[cmd]; - // feeding other command on the next iteration - hasNextObj = true; - nextObjs = []; - for (var idx = 1; idx < cmds.length; idx++) { - nextObjs.push(Cmd.get(cmds[idx])); - } - } - } assertWellFormed(fn, 'Unknown command "' + cmd + '"'); // TODO figure out how to type-check vararg functions diff --git a/src/parser.js b/src/parser.js index 2855018a6..3a2218f0d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -264,8 +264,9 @@ var Parser = (function ParserClosure() { })(); var Lexer = (function LexerClosure() { - function Lexer(stream) { + function Lexer(stream, knownCommands) { this.stream = stream; + this.knownCommands = knownCommands; } Lexer.isSpace = function Lexer_isSpace(ch) { @@ -529,12 +530,18 @@ var Lexer = (function LexerClosure() { // command var str = ch; + var knownCommands = this.knownCommands; + var knownCommandFound = knownCommands && (str in knownCommands); while (!!(ch = stream.lookChar()) && !specialChars[ch.charCodeAt(0)]) { + // stop if known command is found and next character does not make + // the str a command + if (knownCommandFound && !((str + ch) in knownCommands)) + break; stream.skip(); if (str.length == 128) error('Command token too long: ' + str.length); - str += ch; + knownCommandFound = knownCommands && (str in knownCommands); } if (str == 'true') return true; diff --git a/test/unit/evaluator_spec.js b/test/unit/evaluator_spec.js index 4ee0768a7..286b8158a 100644 --- a/test/unit/evaluator_spec.js +++ b/test/unit/evaluator_spec.js @@ -78,6 +78,36 @@ describe('evaluator', function() { expect(result.fnArray[1]).toEqual('save'); expect(result.fnArray[2]).toEqual('save'); }); + + it('should handle three glued operations #2', function() { + var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(), + 'prefix'); + var resources = new ResourcesMock(); + resources.Res1 = {}; + var stream = new StringStream('B*BBMC'); + var result = evaluator.getOperatorList(stream, resources, []); + + expect(!!result.fnArray && !!result.argsArray).toEqual(true); + expect(result.fnArray.length).toEqual(3); + expect(result.fnArray[0]).toEqual('eoFillStroke'); + expect(result.fnArray[1]).toEqual('fillStroke'); + expect(result.fnArray[2]).toEqual('beginMarkedContent'); + }); + + it('should handle glued operations and operands', function() { + var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(), + 'prefix'); + var stream = new StringStream('q5 Ts'); + var result = evaluator.getOperatorList(stream, new ResourcesMock(), []); + + expect(!!result.fnArray && !!result.argsArray).toEqual(true); + expect(result.fnArray.length).toEqual(2); + expect(result.fnArray[0]).toEqual('save'); + expect(result.fnArray[1]).toEqual('setTextRise'); + expect(result.argsArray.length).toEqual(2); + expect(result.argsArray[1].length).toEqual(1); + expect(result.argsArray[1][0]).toEqual(5); + }); }); }); From 43f1946c7a57b42cf1306c5857859cec9209edbc Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Sun, 20 May 2012 14:05:23 -0500 Subject: [PATCH 2/3] Add prefixes for literals --- src/evaluator.js | 10 +++++++++- test/unit/evaluator_spec.js | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/evaluator.js b/src/evaluator.js index 2c07db88c..1dab7de0b 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -112,7 +112,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // (reserved partial commands for the lexer) BM: null, - BD: null + BD: null, + 'true': null, + fa: null, + fal: null, + fals: null, + 'false': null, + nu: null, + nul: null, + 'null': null }; PartialEvaluator.prototype = { diff --git a/test/unit/evaluator_spec.js b/test/unit/evaluator_spec.js index 286b8158a..e31a525ac 100644 --- a/test/unit/evaluator_spec.js +++ b/test/unit/evaluator_spec.js @@ -108,6 +108,24 @@ describe('evaluator', function() { expect(result.argsArray[1].length).toEqual(1); expect(result.argsArray[1][0]).toEqual(5); }); + + it('should handle glued operations and literals', function() { + var evaluator = new PartialEvaluator(new XrefMock(), new HandlerMock(), + 'prefix'); + var stream = new StringStream('trueifalserinulli'); + var result = evaluator.getOperatorList(stream, new ResourcesMock(), []); + + expect(!!result.fnArray && !!result.argsArray).toEqual(true); + expect(result.fnArray.length).toEqual(3); + expect(result.fnArray[0]).toEqual('setFlatness'); + expect(result.fnArray[1]).toEqual('setRenderingIntent'); + expect(result.fnArray[2]).toEqual('setFlatness'); + expect(result.argsArray.length).toEqual(3); + expect(result.argsArray[0].length).toEqual(1); + expect(result.argsArray[0][0]).toEqual(true); + expect(result.argsArray[1].length).toEqual(1); + expect(result.argsArray[1][0]).toEqual(false); + }); }); }); From 874357aac1762f111192c212452ba43bc9d0ee70 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 21 May 2012 15:23:49 -0500 Subject: [PATCH 3/3] Comment for knownCommands --- src/parser.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/parser.js b/src/parser.js index 3a2218f0d..6c9b4e67f 100644 --- a/src/parser.js +++ b/src/parser.js @@ -266,6 +266,13 @@ var Parser = (function ParserClosure() { var Lexer = (function LexerClosure() { function Lexer(stream, knownCommands) { this.stream = stream; + // The PDFs might have "glued" commands with other commands, operands or + // literals, e.g. "q1". The knownCommands is a dictionary of the valid + // commands and their prefixes. The prefixes are built the following way: + // if there a command that is a prefix of the other valid command or + // literal (e.g. 'f' and 'false') the following prefixes must be included, + // 'fa', 'fal', 'fals'. The prefixes are not needed, if the command has no + // other commands or literals as a prefix. The knowCommands is optional. this.knownCommands = knownCommands; }