I have a working parser to parse IF-ELSE statement.
//#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace Ast {
using boost::recursive_wrapper;
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Identifier = String<struct TagId>;
using Literal = String<struct TagLiteral>;
using Variant = String<struct TagVariant>;
using Word = String<struct TagWord>;
using Obj = String<struct TagObj>;
using BinOp = String<struct TagOp>;
using Datatype = String<struct TagDatatype>;
struct Base {
Identifier id;
Literal literal;
};
using Ids = std::vector<Identifier>;
using Number = double;
using Value = boost::variant<Literal, Number, Identifier, Variant>;
// Expression block
struct Bioperator;
struct IEBlock;
using Expression = boost::variant<Value, recursive_wrapper<Bioperator>>;
struct Assign {
Variant var;
Expression value;
};
struct Bioperator {
Bioperator(Expression l = {}, BinOp o = {}, Expression r = {}, bool g = false)
: var(std::move(l))
, op(std::move(o))
, value(std::move(r))
, group(std::move(g)) {}
Expression var;
BinOp op;
Expression value;
bool group;
};
Expression set_group_flag(Expression e) {
// If this is the Bioperator, set the group flag to true
if (e.type() == typeid(Bioperator)) {
auto bo = boost::get<Bioperator>(e);
bo.group = true;
e = bo;
}
return e; // return the expression
}
using Statement = boost::make_recursive_variant<
std::vector<boost::recursive_variant_>,
Assign,
recursive_wrapper<IEBlock>
>::type;
using Statements = std::vector<Statement>;
struct IEBlock {
Expression condition;
Statement true_stmt;
boost::optional<Statement> false_stmt;
};
using Task = std::vector<boost::variant<Statement>>;
} // namespace Ast
// Expressions
BOOST_FUSION_ADAPT_STRUCT(Ast::Assign, var, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Bioperator, var, op, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::IEBlock, condition, true_stmt, false_stmt);
namespace Parser {
template <typename It> struct Task : qi::grammar<It, Ast::Task()> {
Task() : Task::base_type(start) {
using namespace qi;
start = skip(space)[task_];
// lexemes:
id_ = raw[alpha >> *(alnum | '_' | ':')];
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
variant_ = raw[(alpha|'_') >> *(alnum | '_')];
auto optlit = copy(literal_ | attr(std::string(" ")));
task_ = *task_item > eoi;
task_item = statement_;
value_ = literal_ | number_ | id_ | variant_;
number_ = double_;
// Expression
statement_ = (assign_ | ifel_block_) > ';'; // expr_
stmt_block_ = *statement_;
assign_ = no_case["assign"] >> variant_ >> expr_;
expr_ = boolterm_[_val = _1]
>> *(boolop_ >> boolterm_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
;
boolterm_ = binterm_[_val = _1]
>> *(boolfacop_ >> binterm_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
;
binterm_ = term_[_val = _1]
>> *(bintermop_ >> term_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
;
term_ = factor_[_val = _1]
>> *(termop_ >> factor_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
;
factor_ = value_
| '(' >> expr_ >> ')'
;
ifel_block_ = no_case["if"] >> condtion_core_ >> no_case["endif"];
condtion_core_ = '(' >> expr_ >> ')'
>> stmt_block_ // true block
>> -(no_case["elseif"] >> condtion_core_ | elsepart_) // false block
; //
elsepart_ = no_case["else"] >> stmt_block_;
bin_ops += "==", "!=", ">", ">=", "<", "<=";
boolfacop_ = raw[bin_ops];
bool_ops += "||", "&&";
boolop_ = raw[bool_ops];
binterm_ops += "|", "&", "*";
bintermop_ = raw[binterm_ops];
term_ops += "+", "-";
termop_ = raw[term_ops];
BOOST_SPIRIT_DEBUG_NODES(
(task_)(task_item)
(id_)(literal_)(variant_)(value_)(number_)
(assign_)(ifel_block_)(condtion_core_)
(expr_)(expr_)(term_)(binterm_)(boolfac_)(boolterm_)(factor_)
(boolfacop_)(boolop_)(bintermop_)(termop_)
(statement_)(stmt_block_)
)
}
private:
qi::rule<It, Ast::Task()> start;
qi::symbols<char> bin_ops, bool_ops, binterm_ops, term_ops;
using Skipper = qi::space_type;
qi::rule<It, Ast::Task(), Skipper> task_, task_item;
qi::rule<It, Ast::Assign(), Skipper> assign_;
qi::rule<It, Ast::Statement(), Skipper> statement_;
qi::rule<It, Ast::Expression(), Skipper> expr_, term_, binterm_, boolfac_, boolterm_, factor_, grp_expr_;
qi::rule<It, Ast::IEBlock(), Skipper> ifel_block_, condtion_core_;
qi::rule<It, Ast::Statement(), Skipper> elsepart_;
qi::rule<It, Ast::Bioperator(), Skipper> bioperator_;
qi::rule<It, Ast::Statements(), Skipper> stmt_block_;
// lexemes:
qi::rule<It, Ast::Identifier()> id_;
qi::rule<It, Ast::Literal()> literal_;
qi::rule<It, Ast::Value()> value_;
qi::rule<It, Ast::Number()> number_;
qi::rule<It, Ast::Variant()> variant_;
qi::rule<It, Ast::BinOp()> boolfacop_, boolop_, bintermop_, termop_;
};
}
#include <pugixml.hpp>
namespace Generate {
using namespace Ast;
struct XML {
using Node = pugi::xml_node;
// callable for variant visiting:
template <typename T> void operator()(Node parent, T const& node) const { apply(parent, node); }
private:
template <typename... Ts>
void apply(Node parent, boost::variant<Ts...> const& v) const {
using std::placeholders::_1;
boost::apply_visitor(std::bind(*this, parent, _1), v);
}
void apply(Node parent, Number const& num) const {
create_child(parent, "num").text().set(num);
}
void apply(Node parent, Identifier const& id) const {
create_child(parent, "identifier").text().set(id.c_str());
}
void apply(Node parent, Variant const& v) const {
create_child(parent, "variant").text().set(v.c_str());
}
void apply(Node parent, Literal const& literal) const {
create_child(parent, "literal").text().set(literal.c_str());
}
template <typename T> void apply(Node parent, boost::optional<T> const& opt) const {
if (opt)
apply(parent, *opt);
}
void apply(Node parent, Assign const& a) const {
auto asn_ = create_child(parent, "assign");
apply(asn_, a.var);
apply(asn_, a.value);
}
void apply(Node parent, Bioperator const& bo) const {
auto use_parent = bo.group ? create_child(parent, "group") : parent;
auto botag = create_child(use_parent, "bioperator");
botag.text().set(bo.op.c_str());
apply(botag, bo.var);
apply(botag, bo.value);
}
void apply(Node parent, IEBlock const& c, char const* name = "if") const {
auto if_stmt = create_child(parent, name);
apply(if_stmt, c.condition);
auto use_parent = name == "if" ? create_child(if_stmt, "then") : if_stmt;
apply(use_parent, c.true_stmt); // only show then for if block
if (c.false_stmt) {
// make sure elseif is always a child of if stmt.
use_parent = find_parent_by_name(if_stmt, "if");
if (auto nested = is_ifel_block(*c.false_stmt)) {
apply(use_parent, *nested, "elseif");
}
else {
apply(create_child(use_parent, "else"), *c.false_stmt);
}
}
}
void apply(Node parent, Statements const& b) const {
if (b.size() == 1) // simplify single-statement block
return apply(parent, b.front());
for (auto& s : b)
apply(parent, s);
}
void apply(Node parent, Task const& t) const {
auto task = create_child(parent, "task");
for (auto& item : t)
apply(create_child(task, "item"), item);
}
private:
Node create_child(Node parent, std::string const& name) const {
auto child = parent.append_child();
child.set_name(name.c_str());
return child;
}
Node find_parent_by_name(Node node, std::string const& name) const {
Node found = node;
while ((std::string)found.name() != name) {
found = found.parent();
}
return found;
}
static IEBlock const* is_ifel_block(IEBlock const& c) { return &c; }
static IEBlock const* is_ifel_block(Statements const& b) {
return b.size() == 1 ? is_ifel_block(b.front()) : nullptr;
}
template <typename... Ts> static IEBlock const* is_ifel_block(boost::variant<Ts...> const& v) {
return boost::apply_visitor([](auto const& ast) { return is_ifel_block(ast); }, v);
}
template <typename T> static IEBlock const* is_ifel_block(T const&) { return nullptr; }
};
} // namespace Generate
static const std::string cases[] = {
R"(
If ((Var3 == "A" && Var4 == 20) || (Var4 == "B" && Var5 > 0))
Assign VarName (start + end + 1);
EndIf;
)",
};
int main() {
using It = std::string::const_iterator;
static const Parser::Task<It> p;
static const Generate::XML to_xml;
int i = 0;
for (std::string const& input : cases) {
try {
Ast::Task t;
std::cout << "*** Sample #" << ++i << std::endl;
if (qi::parse(begin(input), end(input), p, t)) {
pugi::xml_document doc;
to_xml(doc.root(), t);
doc.print(std::cout, " ", pugi::format_default);
std::cout << std::endl;
}
else {
std::cout << " -> INVALID" << std::endl;
}
}
catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
//#pragma GCC diagnostic push
//#pragma GCC diagnostic ignored "-Wsign-conversion"
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(static_cast<int>(p - bol)) << ""
<< "^--- here" << std::endl;
//#pragma GCC diagnostic pop
}
}
}
Which will produce the following XML output
<task>
<item>
<if>
<bioperator>||<bioperator>&&<bioperator>==<identifier>Var3</identifier>
<literal>A</literal>
</bioperator>
<bioperator>==<identifier>Var4</identifier>
<num>20</num>
</bioperator>
</bioperator>
<bioperator>&&<bioperator>==<identifier>Var4</identifier>
<literal>B</literal>
</bioperator>
<bioperator>><identifier>Var5</identifier>
<num>0</num>
</bioperator>
</bioperator>
</bioperator>
<then>
<assign>
<variant>VarName</variant>
<bioperator>+<bioperator>+<identifier>start</identifier>
<identifier>end</identifier>
</bioperator>
<num>1</num>
</bioperator>
</assign>
</then>
</if>
</item>
</task>
All good. Now I want to put a <group>
tag around an expression when it's inside the parenthesis. So I'm modifying the factor_ rule
from:
factor_ = value_
| '(' >> expr_ >> ')'
;
to:
factor_ = value_
| '(' >> expr_[_val = px::bind(&Ast::set_group_flag, _1)] >> ')'
;
to call this function
Expression set_group_flag(Expression e) {
// If this is the Bioperator, set the group flag to true
if (e.type() == typeid(Bioperator)) {
auto bo = boost::get<Bioperator>(e);
bo.group = true;
e = bo;
}
return e; // return the expression
}
The code compile ok but the value of Bioperator.var and Bioperator.value are lost. The updated factor_ rule produce the below result
<task>
<item>
<if>
<bioperator>||<group>
<bioperator>&&<bioperator>==<literal></literal>
<literal></literal>
</bioperator>
<bioperator>==<literal></literal>
<literal></literal>
</bioperator>
</bioperator>
</group>
<group>
<bioperator>&&<bioperator>==<literal></literal>
<literal></literal>
</bioperator>
<bioperator>><literal></literal>
<literal></literal>
</bioperator>
</bioperator>
</group>
</bioperator>
<then>
<assign>
<variant>VarName</variant>
<group>
<bioperator>+<bioperator>+<literal></literal>
<literal></literal>
</bioperator>
<literal></literal>
</bioperator>
</group>
</assign>
</then>
</if>
</item>
</task>
Notice the bioperator values are all empty <literal></literal>
tag now.
My question is Is this the right way to set the group flag? Did I do something wrong that make the expr value disappear like this?
Semantic actions suppress automatic attribute propagation. Either explicitly state it:
factor_ = value_ [_val = _1]
| '(' >> expr_ [_val = px::bind(&Ast::set_group_flag, _1)] >> ')'
;
Or use operator %=
to initialize the rule (see e.g. Boost.Spirit: Difference between operators "%=" and "=" or the documentation):
Auto Rules It is typical to see rules like:
r = p[_val = _1];
[...]
NOTE
r %= p
andr = p
are equivalent if there are no semantic actions associated withp
.
That said, I urge you not to duct-tape your precedence problem. For one thing, a "group flag" is a boolean and not suited to express nesting. More importantly, your AST already reflects relations between the nodes. Instead, fix your parser to reflect the associatity/precedence if necessary and emit the grouping as required only. E.g. add(2, mul(3, 2))
is 2+32 (6), and mul(2, add(3, 1))
is 2(3+1) (note the added parentheses here).