Recently, I asked a question here: Boost Spirit Segfault In Parser
In this post it was pointed out the grammar I was working with was absolutely left recursive and that spirit is a PEG parser generator, meaning left recursion is impossible.
I converted the grammar to a non-left recursive grammar, using the rule from the section dealing with left recursion in the Dragon Book.
Given a left recursive grammar
A -> A >> alpha | beta
It can be converted to the equivalent right recursive grammar by doing the following:
A -> beta >> A'
A' -> alpha >> A' | epsilon
Here is the resulting parser, with what I believe to be the the non-left recursive productions:
namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
using boost::phoenix::ref;
start %= functionList >> endList >> qi::eoi;
// different expressions
exp %=
qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
|
qi::token(k_numericTerminal) >> expPrime
|
qi::token(k_trueTok) >> expPrime
|
qi::token(k_falseTok) >> expPrime;
expPrime %=
qi::token(k_equalTo) >> exp >> expPrime
|
qi::token(k_notEq) >> exp >> expPrime
|
qi::token(k_less) >> exp >> expPrime
|
qi::token(k_lessEq) >> exp >> expPrime
|
qi::token(k_greater) >> exp >> expPrime
|
qi::token(k_greaterEq) >> exp >> expPrime
|
qi::token(k_andTok) >> exp >> expPrime
|
qi::token(k_orTok) >> exp >> expPrime
|
qi::token(k_notTok) >> exp
|
qi::token(k_plues) >> exp >> expPrime
|
qi::token(k_minus) >> exp >> expPrime
|
qi::token(k_mult) >> exp >> expPrime
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::eps;
// parameter list
paramList %= exp >> paramListPrime;
paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
|
qi::eps;
// return statements
returnStatement %= qi::token(k_returnTok) >> exp
|
qi::token(k_returnTok);
// function call statements
callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
// variable assignment
assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
// list of integers
intList %= qi::token(k_numericTerminal) >> intListPrime;
intListPrime %=
qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
|
qi::eps;
// print out a variable
printStatement %= qi::token(k_print) >> exp;
// take input
inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);
// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;
// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
// actual program statements
endList %= end >> endListPrime;
endListPrime %= end >> endListPrime
|
qi::eps;
// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;
// function parameters
param %=
qi::token(k_alphaTerminal) >> paramPrime
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
>> paramPrime;
// for handling left recursion in paramlist
paramPrime %=
qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
|
qi::eps;
// define a statement as assignment print input condition while or call
statement %=
assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;
// general statement list
statements %= statement >> statementsPrime;
// for handling left recursion in statements
statementsPrime %= statement >> statementsPrime
|
qi::eps;
// functions
functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> param >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
Here is the lexer
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
namespace interpreter
{
namespace lex = boost::spirit::lex;
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_alphaTerminal = 34,
k_numericTerminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9]*"),
numericTerminal("[0-9]*")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
And here is my example test program
int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)");
char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
These revisions have given rise to a few questions, first, by looking at the grammar informally, without constructing the full LL(1) parsing table, I don't believe this grammar is LL(1). I still need to verify this, but I was wondering will spirit, be able to parse this grammar? I know PEGs typically use the / operator to do lookahead, does spirit do this currently? I read in another post that spirit may not?
Second, this grammar fails, I notice that when, in the start production, I simplify the start production and make it:
start %= functionList;
and then change the input to be:
def print_it(x, y): print 3*x + y return fed
the grammar debug statement states that the parse was successful. However, I see the string remaining is:
print_it(x, y): print 3*x + y return fed
So only the first token was actually parsed. After a bit of debugging I am unsure why the parse is successful and why only a single symbol is consumed, could this be an issue with the lexer?
Additionally, I see similar results when I change the start production to be:
start %= endList;
and using the input
y = x
This however, fails to parse and only consumes the character y.
Finally, the output of my debug statement is not very helpful, when running with the debug statement the output that is produced is:
<start>
<try>[][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0
Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption? Also why are the [] empty? I typically see them with some text that is useful in debugging. Is it because the grammar is automatically a success if the regular expression is matched? If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?
Any help or points in the correct direction is appreciated, thank you.
Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption?
No. The []
indicate input tokens.
Also why are the [] empty?
There's likely no useful way to print them so they show up as empty.
If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?
I'd think so. But I never use Lex. So, It could be a while to figure it out.
First thing that catches my attention is this:
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
Your AttributeTypes says omit
. Indeed, changing to
typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;
Does show signs of life. For the input x=y
(no whitespace!) it prints:
<start>
<try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining
R is false
Now, for the def print_it(x, y): print 3*x + y return fed
input, the output still is:
<start>
<try>[def][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed
R is false
Slightly more informative. Interestingly, it also seems to fail on the first whitespace. The whiteSpace
regex looks okay, so I searched for lex in_state
in my answers to remember what I once learned about skipping and Lex.
I played around with some suggestions. Following the second post lead me to this comment:
Nice! You can add to your answer that having a separate state inside the lexer for skipping white spaces has another considerable drawback: lack of debugging support. If you use a separate state for skipping and then use BOOST_SPIRIT_DEBUG_NODE, you won't get the full output of the token stream. This is because the default debug handler advances the lexer iterator to get tokens, the lexer is of course in the INITIAL state. As soon as it meets a white space, the iteration stops, because the lexer cannot find a match. The token stream will be cut at the first white space in the rule's trace. – noxmetus Sep 20 '17 at 18:28
Let's, just for fun, try without the separate state and instead use the one from Troubles with boost::spirit::lex & whitespace.
Now, the expression doesn't parse because
print_it
is not a valid identifier. Let's add _
to alphaTerminal
alphaTerminal
requires =
to follow. Fixing the expression a bit:
exp = (
qi::token(k_alphaTerminal)
| qi::token(k_numericTerminal)
| qi::token(k_trueTok)
| qi::token(k_falseTok))
>> expPrime
;
In fact, those token ids arent valid. You need to start with min_token_id
(which is 0x10000):
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};
Why even do you want to dictate the token IDs? Guessing from the fact you are passing a reference to the token definitions to the parser, I would think you were going to use them?
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;
Also, let's enable debugging for all the rules:
BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)
And, after spending entirely too much time trying to understand why the state-switching skipper seems not to work, I'm giving up. Here's the upshot of my tinkering:
//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;
namespace interpreter {
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9_]*"),
numericTerminal("[0-9]*")
{
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
#ifdef USE_STATES
this->self("WHITESPACE") = whiteSpace;
#else
this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
start
= functionList >> endList >> qi::eoi
;
// different expressions
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;
expPrime
= tok.equalTo >> exp >> expPrime
| tok.notEq >> exp >> expPrime
| tok.less >> exp >> expPrime
| tok.lessEq >> exp >> expPrime
| tok.greater >> exp >> expPrime
| tok.greaterEq >> exp >> expPrime
| tok.andTok >> exp >> expPrime
| tok.orTok >> exp >> expPrime
| tok.notTok >> exp
| tok.plus >> exp >> expPrime
| tok.minus >> exp >> expPrime
| tok.mult >> exp >> expPrime
| tok.minus >> exp
| tok.leftParen >> exp >> tok.rightParen
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
| tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
| qi::eps
;
// parameter list
paramList
= exp >> paramListPrime
;
paramListPrime
= tok.comma >> exp >> paramListPrime
| qi::eps
;
// return statements
returnStatement
= tok.returnTok >> exp
| tok.returnTok
;
// function call statements
callStatement
= tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
;
// variable assignment
assignmentStatement
= tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.leftBracket >> exp
>> tok.rightBracket >> tok.assign >> exp
;
// list of integers
intList
= tok.numericTerminal >> intListPrime
;
intListPrime
= tok.comma >> tok.numericTerminal >> intListPrime
| qi::eps
;
// print out a variable
printStatement
= tok.print >> exp
;
// take input
inputStatement
= tok.alphaTerminal >> tok.input
;
// conditional statement
conditionStatement
= tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
;
// consitions have optional else
optionalElse
= tok.elseTok >> tok.colon >> statements
| qi::eps
;
// while loop
whileStatement
= tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
;
// actual program statements
endList
= end >> endListPrime
;
endListPrime
= end >> endListPrime
| qi::eps
;
// end possibilities of program in global space
end
= callStatement
| printStatement
| tok.alphaTerminal >> tok.assign >> tok.input
| tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
>> tok.rightBracket
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
>> tok.assign >> exp
;
// function parameters
param
= tok.alphaTerminal >> paramPrime
| tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
>> paramPrime
;
// for handling left recursion in paramlist
paramPrime
= tok.comma >> tok.alphaTerminal >> paramPrime
| qi::eps
;
// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;
// general statement list
statements
= statement >> statementsPrime
;
// for handling left recursion in statements
statementsPrime
= statement >> statementsPrime
| qi::eps
;
// functions
functionList
= tok.def >> tok.alphaTerminal >> tok.leftParen
>> param >> tok.rightParen >> tok.colon
>> statements >> tok.fed
| tok.def >> tok.alphaTerminal >> tok.leftParen
>> tok.rightParen >> tok.colon >> statements >> tok.fed
| qi::eps
;
BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)
}
private:
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
#include <fstream>
#include <iterator>
int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::unused_type skipper_type;
#endif
interpreter::LexerTokens<lexer_type> lexer;
interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
#ifdef USE_STATES
bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif
std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
std::cout << "R is " << std::boolalpha << ok << std::endl;
}
Prints
<start>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<functionList>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<param>
<try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[)][:][print][3][*][x][+][y][return][fed]</try>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</param>
<statements>
<try>[print][3][*][x][+][y][return][fed]</try>
<statement>
<try>[print][3][*][x][+][y][return][fed]</try>
<assignmentStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<exp>
<try>[3][*][x][+][y][return][fed]</try>
<expPrime>
<try>[*][x][+][y][return][fed]</try>
<exp>
<try>[x][+][y][return][fed]</try>
<expPrime>
<try>[+][y][return][fed]</try>
<exp>
<try>[y][return][fed]</try>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<success>[return][fed]</success>
<attributes>[]</attributes>
</printStatement>
<success>[return][fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[return][fed]</try>
<statement>
<try>[return][fed]</try>
<assignmentStatement>
<try>[return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[return][fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[return][fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[return][fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[return][fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[return][fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[return][fed]</try>
<exp>
<try>[fed]</try>
<fail/>
</exp>
<success>[fed]</success>
<attributes>[]</attributes>
</returnStatement>
<success>[fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[fed]</try>
<statement>
<try>[fed]</try>
<assignmentStatement>
<try>[fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[fed]</try>
<fail/>
</returnStatement>
<fail/>
</statement>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statements>
<success></success>
<attributes>[]</attributes>
</functionList>
<endList>
<try></try>
<end>
<try></try>
<callStatement>
<try></try>
<fail/>
</callStatement>
<printStatement>
<try></try>
<fail/>
</printStatement>
<fail/>
</end>
<fail/>
</endList>
<fail/>
</start>
Remaining ''
R is false