I've been learning C++ and boost spirit lately to help my team parsing complex structure into AST then to XML. Blessing with a lot of helps from this community (mostly from Sehe), thing been moving pretty good. With my limit knowledge of C++ & boost, I'm stuck again...:( What's new right?
I'm trying to parse the following structure:
If (Var1 == "Test" && Var2 <= 10 && Var3 == "Done")
Verify Word 32 Objective;
If ((Var3 == "A" || Var4 == "B") && Var5 > 0)
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue"
End If;
Else
Assign VarName "Value2"
EndIf;
Notes
The expected XML output the code block above is like this:
<if>
<bioperator>
&&
<bioperator>
&&
<bioperator>
==
<variant>Var1</variant>
<literal>"Test"</literal>
</bioperator>
<bioperator>
<=
<variant>Var2/variant>
<num>10</num>
</bioperator>
</bioperator>
<bioperator>
==
<variant>Var3/variant>
<literal>"DONE"</literal>
</bioperator>
</bioperator>
<then>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
<if>
<bioperator>
&&
<bioperator>
||
<bioperator>
==
<variant>Var3</variant>
<literal>"A"</literal>
</bioperator>
<bioperator>
==
<variant>Var4</variant>
<literal>"B"</literal>
</bioperator>
</bioperator>
<bioperator>
>
<variant>Var5</variant>
<num>0</num>
</bioperator>
</bioperator>
<then>
<assign>
<variant>VarName</variant>
<literal>"Value1"</literal>
</assign>
<assign>
<variant>Var2</variant>
<num>10</num>
</assign>
</then>
<elseif>
<bioperator>
==
<variant>Var3</variant>
<literal>"C"</literal>
</bioperator>
<assign>
<variant>VarName</variant>
<literal>"Value2"</literal>
</assign>
</elseif>
</if>
</then>
<else>
<assign>
<variant>VarName</variant>
<literal>"Value2"</literal>
</assign>
</else>
</if>
Output Notes:
<then>...</then>
tag<bioperator>
tag is nested.For simplicity, I'm using the working example that sehe help me before and extend from it to add capability to parse the above.
FULL CODE
#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace Ast {
using boost::recursive_wrapper;
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Identifier = String<struct TagId>;
using Literal = String<struct TagLiteral>;
using Variant = String<struct TagVariant>;
using Word = String<struct TagWord>;
using Obj = String<struct TagObj>;
using BinOp = String<struct TagOp>;
using Datatype = String<struct TagDatatype>;
struct Base {
Identifier id;
Literal literal;
};
using Ids = std::vector<Identifier>;
using Enum = Ids;
using Number = double;
using Value = boost::variant<Literal, Number, Identifier, Variant>;
struct Simple : Base {
boost::optional<Enum> enumeration;
boost::optional<Datatype> datatype;
boost::optional<Value> default_;
};
struct Complex;
struct Container;
;
using Class = boost::variant<
Simple,
recursive_wrapper<Complex>,
recursive_wrapper<Container>
>;
using Classes = std::vector<Class>;
struct Container : Base { Class element; };
struct Complex : Base { Ids bases; Classes members; };
// Expression block
struct Verify {
Word word;
Number num;
Obj obj;
};
struct Assign {
Variant var;
Value value;
};
struct Bioperator;
struct Conditional;
using Expression = boost::variant<
Value,
Verify,
Assign,
recursive_wrapper<Bioperator>,
recursive_wrapper<Conditional>
>;
struct Bioperator {
Variant var;
BinOp op;
Value value;
};
struct Conditional {
Expression condition, true_block;
boost::optional<Expression> false_block;
};
using Code = boost::variant<Conditional, Verify, Assign>;
using Task = std::vector<boost::variant<Class, Code>>;
} // namespace Ast
// Classes
BOOST_FUSION_ADAPT_STRUCT(Ast::Simple, id, literal, enumeration, datatype, default_)
BOOST_FUSION_ADAPT_STRUCT(Ast::Complex, id, literal, bases, members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Container, id, literal, element)
// Expressions
BOOST_FUSION_ADAPT_STRUCT(Ast::Verify, word, num, obj);
BOOST_FUSION_ADAPT_STRUCT(Ast::Assign, var, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Bioperator, var, op, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Conditional, condition, true_block, false_block);
namespace Parser {
template <typename It> struct Task : qi::grammar<It, Ast::Task()> {
Task() : Task::base_type(start) {
using namespace qi;
start = skip(space)[task_];
// lexemes:
id_ = raw[alpha >> *(alnum | '_' | ':')];
variant_ = id_;
word_ = variant_;
obj_ = word_;
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
auto optlit = copy(literal_ | attr(std::string(" ")));
task_ = *task_item > eoi;
task_item = class_ | code_;
subclass_ = simple_class_ | complex_ | container_;
class_ = lit("Class") > subclass_ > ';';
simple_class_ = lit("Simple") >> id_ >> optlit >> -enum_ >> -datatype_ >> -default_;
inherit_ = lit("Inherit") >> id_;
complex_ = lit("Complex") >> id_ >> optlit >> '(' >> *inherit_ >> *subclass_ >> ')';
container_ = lit("Container") >> id_ >> optlit >> '(' >> subclass_ > ')';
enum_ = lit("enumeration") >> '(' >> -(id_ % ',') > ')';
datatype_ = lit("datatype") >> id_;
value_ = literal_ | number_ | id_;
number_ = double_;
default_ = lit("Default") >> value_;
// Expression
code_ = conditional_ | assign_ | verify_; // more to come
expr_ = simple_expr | assign_ | verify_;// | *(boolop_ >> bioperator_);
simple_expr = value_ | bioperator_ | verify_ | assign_ | conditional_;
bioperator_ = '(' >> variant_ >> binop_ >> value_ >> ')';
assign_ = no_case["assign"] >> variant_ >> value_ > ';';
verify_ = no_case["verify"] >> word_ >> number_ >> obj_ > ';';
conditional_
= no_case["if"] >> '(' >> expr_ >> ')' >> expr_
//>> -((lit("else") | lit("elseif")) >> expr_) // else & elseif
>> no_case["endif"] > ';'
;
//elsepart_ = no_case[lit("else")] >> expr_;
//elseifpart_ = no_case["elseif"] >> conditional_;
binop_ = string("==") | string("!=") | string(">") | string(">=") | string("<") | string("<=");
boolop_ = string("||") | string("&&");
BOOST_SPIRIT_DEBUG_NODES(
(task_)(task_item)(class_)(subclass_)(simple_class_)(complex_)(container_)(enum_)(datatype_)(default_)(inherit_)
(id_)(literal_)(variant_)(word_)(value_)(number_)(obj_)
(expr_)(verify_)(assign_)(conditional_)(assign_)(binop_)(boolop_)
)
}
private:
qi::rule<It, Ast::Task()> start;
using Skipper = qi::space_type;
qi::rule<It, Ast::Task(), Skipper> task_, task_item;
qi::rule<It, Ast::Class(), Skipper> class_, subclass_;
qi::rule<It, Ast::Simple(), Skipper> simple_class_;
qi::rule<It, Ast::Complex(), Skipper> complex_;
qi::rule<It, Ast::Container(), Skipper> container_;
qi::rule<It, Ast::Enum(), Skipper> enum_;
qi::rule<It, Ast::Datatype(), Skipper> datatype_;
qi::rule<It, Ast::Value(), Skipper> default_;
qi::rule<It, Ast::Identifier(), Skipper> inherit_;
qi::rule<It, Ast::Verify(), Skipper> verify_;
qi::rule<It, Ast::Assign(), Skipper> assign_;
qi::rule<It, Ast::Code(), Skipper> code_;
qi::rule<It, Ast::Expression(), Skipper> expr_, simple_expr;
qi::rule<It, Ast::Conditional(), Skipper> conditional_, elsepart_, elseifpart_;
qi::rule<It, Ast::Bioperator(), Skipper> bioperator_;
// lexemes:
qi::rule<It, Ast::Identifier()> id_;
qi::rule<It, Ast::Literal()> literal_;
qi::rule<It, Ast::Variant()> variant_;
qi::rule<It, Ast::Word()> word_;
qi::rule<It, Ast::Obj()> obj_;
qi::rule<It, Ast::Value()> value_;
qi::rule<It, Ast::Number()> number_;
qi::rule<It, Ast::BinOp()> binop_, boolop_;
};
}
#include <pugixml.hpp>
namespace Generate {
using namespace Ast;
struct XML {
using Node = pugi::xml_node;
// callable for variant visiting:
template <typename T> void operator()(Node parent, T const& node) const { apply(parent, node); }
private:
template <typename... Ts>
void apply(Node parent, boost::variant<Ts...> const& v) const {
using std::placeholders::_1;
boost::apply_visitor(std::bind(*this, parent, _1), v);
}
void apply(Node parent, Number const& num) const {
named_child(parent, "num").text().set(num);
}
void apply(Node parent, Identifier const& id) const {
named_child(parent, "identifier").text().set(id.c_str());
}
void apply(Node parent, Obj const& o) const {
named_child(parent, "obj").text().set(o.c_str());
}
void apply(Node parent, Word const& w) const {
named_child(parent, "word").text().set(w.c_str());
}
void apply(Node parent, Variant const& v) const {
named_child(parent, "variant").text().set(v.c_str());
}
void apply(Node parent, Literal const& literal) const {
named_child(parent, "literal").text().set(literal.c_str());
}
void apply(Node parent, Datatype const& datatype) const {
named_child(parent, "datatype").text().set(datatype.c_str());
}
template <typename T> void apply(Node parent, boost::optional<T> const& opt) const {
if (opt)
apply(parent, *opt);
}
void apply(Node parent, Simple const& s) const {
auto simple = named_child(parent, "simple");
apply(simple, s.id);
apply(simple, s.literal);
apply(simple, s.enumeration);
apply(simple, s.datatype);
if (s.default_.has_value()) {
apply(named_child(simple, "default"), *s.default_);
}
}
void apply(Node parent, Enum const& e) const {
auto enum_ = named_child(parent, "enumeration");
for (auto& v : e)
named_child(enum_, "word").text().set(v.c_str());
}
void apply(Node parent, Complex const& c) const {
auto complex_ = named_child(parent, "complex");
apply(complex_, c.id);
for (auto& base : c.bases)
apply(named_child(complex_, "inherit"), base);
apply(complex_, c.literal);
for (auto& m : c.members)
apply(complex_, m);
}
void apply(Node parent, Container const& c) const {
auto cont = named_child(parent, "container");
apply(cont, c.id);
apply(cont, c.literal);
apply(cont, c.element);
}
void apply(Node parent, Assign const& a) const {
auto asn_ = named_child(parent, "assign");
apply(asn_, a.var);
apply(asn_, a.value);
}
void apply(Node parent, Verify const& v) const {
auto v_ = named_child(parent, "verify");
apply(v_, v.word);
apply(v_, v.num);
apply(v_, v.obj);
}
//void apply(Node parent, Bioperator const& bo) const {
// auto botag = named_child(parent, "bioperator").text().set(bo.op.c_str());
// //apply(botag, bo.var);
// //apply(botag, bo.value);
//}
void apply(Node parent, Conditional const& c) const {
auto task = named_child(parent, "not-implement-yet");
}
void apply(Node parent, Task const& t) const {
auto task = named_child(parent, "task");
for (auto& item : t) {
std::string node = "class";
if (item.type() == typeid(Code)) {
apply(task, item);
}
else if (item.type() == typeid(Class)) {
apply(task.append_child("class"), item);
}
}
}
private:
Node named_child(Node parent, std::string const& name) const {
auto child = parent.append_child();
child.set_name(name.c_str());
return child;
}
};
} // namespace Generate
int main() {
using It = std::string::const_iterator;
static const Parser::Task<It> p;
static const Generate::XML to_xml;
for (std::string const input : {
R"(
If (Var1 == "Test" && Var2 <= 10 && Var3 == "Done")
Verify Word 32 Objective;
If ((Var3 == "A" || Var4 == "B") && Var5 > 0)
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue"
End If;
Else
Assign VarName "Value2"
EndIf;
)"
}) {
try {
Ast::Task t;
if (qi::parse(begin(input), end(input), p, t)) {
pugi::xml_document doc;
to_xml(doc.root(), t);
doc.print(std::cout, " ", pugi::format_default);
std::cout << std::endl;
}
else {
std::cout << " -> INVALID" << std::endl;
}
}
catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(p - bol) << ""
<< "^--- here" << std::endl;
}
}
}
I can't seem to compose the grammar to handle the recursive (nested) if-else. The above is what I have. The grammar does not seem correct as it won't parse the example text.
Any help would be greatly appreciated.
Thanks
I can't seem to compose the grammar to handle the recursive (nested) if-else.
Let's have look (you can get undressed behind the curtain, the doctor will be with you shortly).
I took the code and reviewed it into the baseline version that indeed reproduces the error: https://compiler-explorer.com/z/4dnhM1a3a
However I fear your diagnosis is some ways off. I think you're underestimating the complexity of the endeavour. Specifically, the first compilation error doesn't even relate to the conditional statement, rather it relates to the boolean expression:
expr_ = simple_expr | *(boolop_ >> bioperator_);
The structure of the rule does not match the AST:
struct Bioperator {
Variable var;
BinOp op;
Value value;
};
There are good reasons to do that, but you wil have to come up with the necessary glue, say "instructions" to tell Spirit how to compose your AST based on the synthesized attributes (which contain the repeated *()
construct). I'd point at various expression parsers that I've worked on in the past on StackOverflow, but it looks like you might have used an example or two, just missing the glue.
Jumping ahead, imagining those basic problems "solved", I can see more dragons on the horizon, e.g.:
binop_ = string("==") | string("!=") | string(">") | string(">=") | string("<") | string("<=");
While I admire a propensity to keep things simple when possible, this misses more hidden complexity, e.g. that string("<")
will by definition prevent string("<=")
from ever matching. You could solve it by reordering to PEG-friendly order, but the safer (and more efficient) way is to use a symbol lookup.
qi::symbols<char> rel_ops_, bool_ops;
rel_ops_ += "==", "!=", ">", ">=", "<", "<=";
bool_ops += "||", "&&";
I would never in a million years model the AST for operators as a string, but I'll leave that as an exorcism for the reader.
binop_ = raw[rel_ops_];
boolop_ = raw[bool_ops];
You probably sense that I think your naming is not accurate. After all, those boolean operators ARE binary operators, so maybe don't confuse yourself with arbitrary naming. This goes back to understanding your application domain accurately and sticking to it
I started trying to fix the missing expression bits but I figured out that I lack the domain information. E.g. the expr_
production suggests that the LHS operand is a simple_expr
- however, the the Ast present it as Variant
(which I relabeled Variable
to avoid confusion with the C++ variants used, and because my intuition tells me you actually need it to mean something like "an lvalue expression", aka something that can be assigned, i.e. a variable). I also noted that you commented the expr_
branch in statement_
, so I'll just skip it for now. Just for reference, here is a sketch of the direction I see this going (once you fix the Ast/production mismatches):
expr_ = simple_expr[_val = _1] >>
*(boolop_ >> bioperator_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)];
Similarly, code like this makes me confused:
void apply(Node parent, Task const& t) const {
auto task = named_child(parent, "task");
for (auto& item : t) {
std::string node = "class";
if (item.type() == typeid(Statement)) {
apply(task, item);
}
else if (item.type() == typeid(ClassDef)) {
apply(task.append_child("class"), item);
}
}
}
We structured the entire XML transformation around variant visitation, and here you hardcode a type-switch without actually differentiating on the item
type after all. There's a spurious line std::string node = "class"
which does nothing.
Plugging that gap for now, and putting in your desired input, brings us to the next hurdle: Conditional.
The AST looks reasonable at first glance:
struct Conditional {
Expression condition, true_block;
boost::optional<Expression> false_block;
};
The weirdest thing is that all blocks are expressions, whereas your sample clearly suggests statements for the blocks, and expression for the condition.
Going to the productions, they are somehow all declared with Conditional
as the corresponding attribute ¯\(ツ)/¯:
qi::rule<It, Ast::Conditional(), Skipper> conditional_, elsepart_, elseifpart_;
That makes little sense. So, from the example I expect the true/false branches to be at least Statement
- or even Task
?
Let's bend the AST to fit the sample input domain:
struct Conditional;
using Statement = boost::make_recursive_variant< //
Verify, //
Assign, //
recursive_wrapper<Conditional>, //
std::vector<boost::recursive_variant_> // a Block of Statement
>::type; //
struct Conditional {
Expression condition;
Statement true_block;
boost::optional<Statement> false_block;
};
using Block = std::vector<Statement>;
using Task = std::vector<boost::variant<ClassDef, Statement>>;
Next, the parser expression suggests that all branches are optional. Either that or the true-branch is completely missing, which also seems a problem. My suggestion.
To get elseif
working I wouldn't do anything, except the same trick that separated "class" >> type
from just type
:
conditional_ = no_case["if"] >> condcore_ >> no_case["endif"] > ';';
condcore_ = expr_ //
>> statement_ // true block
>> -(no_case["elseif"] >> condcore_ | elsepart_) // false block
; //
elsepart_ = no_case["else"] >> statement_;
Note how Condition
is already part of Statement
.
qi::rule<It, Ast::Conditional(), Skipper> conditional_, condcore_;
qi::rule<It, Ast::Statement(), Skipper> elsepart_;
This here be the result of lots of fiddling (you will be very familiar with that process. Only, for you it is by choice, and for me it is by necessity, for lack of information :))
I fiddled with the input as well, as your expression grammar is actually the more hobbled part for now. I feel you've been neglecting this core part from day 1 (actually, I was the one to notice the new requirement here:
The "arbitrary" appearance of a num XML element tells me that your language has a type system with ditto literals. And that the parser expression
default_ = -(lit("Default") >> alnum)
was likely woefully underspecified. Do yourself a favor and don't settle for sloppy a solution.
It looks like you keep accidentally expanding the scope with each question. Pretty soon you're writing a compiler by accident).
//#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace Ast {
using boost::recursive_wrapper;
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Identifier = String<struct TagId>;
using Literal = String<struct TagLiteral>;
using Variable = String<struct TagVariant>;
using Word = String<struct TagWord>;
using Obj = String<struct TagObj>;
using BinOp = String<struct TagOp>;
using Datatype = String<struct TagDatatype>;
struct Base {
Identifier id;
Literal literal;
};
using Ids = std::vector<Identifier>;
using Enum = Ids;
using Number = double;
using Value = boost::variant<Literal, Number, Identifier, Variable>;
struct Simple : Base {
boost::optional<Enum> enumeration;
boost::optional<Datatype> datatype;
boost::optional<Value> default_;
};
struct Complex;
struct Container;
using ClassDef = boost::variant<
Simple,
recursive_wrapper<Complex>,
recursive_wrapper<Container>
>;
using ClassDefs = std::vector<ClassDef>;
struct Container : Base {
ClassDef element;
};
struct Complex : Base {
Ids bases;
ClassDefs members;
};
// Expression block
struct Verify {
Word word;
Number num;
Obj obj;
};
struct Assign {
Variable var;
Value value;
};
struct Bioperator;
struct Conditional;
using Expression = boost::variant<Value, Verify, Assign, recursive_wrapper<Bioperator>>;
struct Bioperator {
Bioperator(Expression l = {}, BinOp o = {}, Expression r = {})
: var(std::move(l))
, op(std::move(o))
, value(std::move(r)) {}
Expression var;
BinOp op;
Expression value;
};
using Statement = boost::make_recursive_variant< //
std::vector<boost::recursive_variant_>, // a Block of Statement
Verify, //
Assign, //
recursive_wrapper<Conditional> //
>::type; //
using Block = std::vector<Statement>;
struct Conditional {
Expression condition;
Block true_block;
boost::optional<Block> false_block;
};
using Task = std::vector<boost::variant<ClassDef, Statement>>;
} // namespace Ast
// Classes
BOOST_FUSION_ADAPT_STRUCT(Ast::Simple, id, literal, enumeration, datatype, default_)
BOOST_FUSION_ADAPT_STRUCT(Ast::Complex, id, literal, bases, members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Container, id, literal, element)
// Expressions
BOOST_FUSION_ADAPT_STRUCT(Ast::Verify, word, num, obj);
BOOST_FUSION_ADAPT_STRUCT(Ast::Assign, var, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Bioperator, var, op, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Conditional, condition, true_block, false_block);
namespace Parser {
template <typename It> struct Task : qi::grammar<It, Ast::Task()> {
Task() : Task::base_type(start) {
using namespace qi;
start = skip(space)[task_];
// lexemes:
id_ = raw[alpha >> *(alnum | '_' | ':')];
variable_ = id_;
word_ = variable_;
obj_ = word_;
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
auto optlit = copy(literal_ | attr(std::string(" ")));
task_ = *task_item > eoi;
task_item = classdef_ | statement_;
subclass_ = simple_class_ | complex_ | container_;
classdef_ = lit("Class") > subclass_ > ';';
simple_class_ = lit("Simple") >> id_ >> optlit >> -enum_ >> -datatype_ >> -default_;
inherit_ = lit("Inherit") >> id_;
complex_ = lit("Complex") >> id_ >> optlit >> '(' >> *inherit_ >> *subclass_ >> ')';
container_ = lit("Container") >> id_ >> optlit >> '(' >> subclass_ > ')';
enum_ = lit("enumeration") >> '(' >> -(id_ % ',') > ')';
datatype_ = lit("datatype") >> id_;
value_ = literal_ | number_ | id_;
number_ = double_;
default_ = lit("Default") >> value_;
// Expression
statement_ = (assign_ | verify_ | conditional_) > ';'; // expr_
block_ = *statement_;
expr_ = simple_expr[_val = _1] //
>> *(boolop_ >> expr_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)];
simple_expr = value_ | bioperator_;
bioperator_ = '(' >> variable_ >> binop_ >> value_ >> ')';
assign_ = no_case["assign"] >> variable_ >> value_;
verify_ = no_case["verify"] >> word_ >> number_ >> obj_;
conditional_ = no_case["if"] >> condcore_ >> no_case["endif"];
condcore_ = expr_ //
>> block_ // true block
>> -(no_case["elseif"] >> condcore_ | elsepart_) // false block
; //
elsepart_ = no_case["else"] >> block_;
rel_ops_ += "==", "!=", ">", ">=", "<", "<=";
bool_ops += "||", "&&";
binop_ = raw[rel_ops_];
boolop_ = raw[bool_ops];
BOOST_SPIRIT_DEBUG_NODES( //
(task_)(task_item)(classdef_)(subclass_)(simple_class_)(complex_)(container_) //
(enum_)(datatype_)(default_)(inherit_) //
(id_)(literal_)(variable_)(word_)(value_)(number_)(obj_) //
(expr_)(verify_)(assign_)(conditional_)(condcore_)(assign_)(binop_)(boolop_) //
(statement_)(block_) //
)
}
private:
qi::rule<It, Ast::Task()> start;
qi::symbols<char> rel_ops_, bool_ops;
using Skipper = qi::space_type;
qi::rule<It, Ast::Task(), Skipper> task_, task_item;
qi::rule<It, Ast::ClassDef(), Skipper> classdef_, subclass_;
qi::rule<It, Ast::Simple(), Skipper> simple_class_;
qi::rule<It, Ast::Complex(), Skipper> complex_;
qi::rule<It, Ast::Container(), Skipper> container_;
qi::rule<It, Ast::Enum(), Skipper> enum_;
qi::rule<It, Ast::Datatype(), Skipper> datatype_;
qi::rule<It, Ast::Value(), Skipper> default_;
qi::rule<It, Ast::Identifier(), Skipper> inherit_;
qi::rule<It, Ast::Verify(), Skipper> verify_;
qi::rule<It, Ast::Assign(), Skipper> assign_;
qi::rule<It, Ast::Statement(), Skipper> statement_;
qi::rule<It, Ast::Expression(), Skipper> expr_, simple_expr;
qi::rule<It, Ast::Conditional(), Skipper> conditional_, condcore_;
qi::rule<It, Ast::Statement(), Skipper> elsepart_;
qi::rule<It, Ast::Bioperator(), Skipper> bioperator_;
qi::rule<It, Ast::Block(), Skipper> block_;
// lexemes:
qi::rule<It, Ast::Identifier()> id_;
qi::rule<It, Ast::Literal()> literal_;
qi::rule<It, Ast::Variable()> variable_;
qi::rule<It, Ast::Word()> word_;
qi::rule<It, Ast::Obj()> obj_;
qi::rule<It, Ast::Value()> value_;
qi::rule<It, Ast::Number()> number_;
qi::rule<It, Ast::BinOp()> binop_, boolop_;
};
}
#include <pugixml.hpp>
namespace Generate {
using namespace Ast;
struct XML {
using Node = pugi::xml_node;
// callable for variant visiting:
template <typename T> void operator()(Node parent, T const& node) const { apply(parent, node); }
private:
template <typename... Ts>
void apply(Node parent, boost::variant<Ts...> const& v) const {
using std::placeholders::_1;
boost::apply_visitor(std::bind(*this, parent, _1), v);
}
void apply(Node parent, Number const& num) const {
named_child(parent, "num").text().set(num);
}
void apply(Node parent, Identifier const& id) const {
named_child(parent, "identifier").text().set(id.c_str());
}
void apply(Node parent, Obj const& o) const {
named_child(parent, "obj").text().set(o.c_str());
}
void apply(Node parent, Word const& w) const {
named_child(parent, "word").text().set(w.c_str());
}
void apply(Node parent, Variable const& v) const {
named_child(parent, "variant").text().set(v.c_str());
}
void apply(Node parent, Literal const& literal) const {
named_child(parent, "literal").text().set(literal.c_str());
}
void apply(Node parent, Datatype const& datatype) const {
named_child(parent, "datatype").text().set(datatype.c_str());
}
template <typename T> void apply(Node parent, boost::optional<T> const& opt) const {
if (opt)
apply(parent, *opt);
}
void apply(Node parent, Simple const& s) const {
auto simple = named_child(parent, "simple");
apply(simple, s.id);
apply(simple, s.literal);
apply(simple, s.enumeration);
apply(simple, s.datatype);
if (s.default_.has_value()) {
apply(named_child(simple, "default"), *s.default_);
}
}
void apply(Node parent, Enum const& e) const {
auto enum_ = named_child(parent, "enumeration");
for (auto& v : e)
named_child(enum_, "word").text().set(v.c_str());
}
void apply(Node parent, Complex const& c) const {
auto complex_ = named_child(parent, "complex");
apply(complex_, c.id);
for (auto& base : c.bases)
apply(named_child(complex_, "inherit"), base);
apply(complex_, c.literal);
for (auto& m : c.members)
apply(complex_, m);
}
void apply(Node parent, Container const& c) const {
auto cont = named_child(parent, "container");
apply(cont, c.id);
apply(cont, c.literal);
apply(cont, c.element);
}
void apply(Node parent, Assign const& a) const {
auto asn_ = named_child(parent, "assign");
apply(asn_, a.var);
apply(asn_, a.value);
}
void apply(Node parent, Verify const& v) const {
auto v_ = named_child(parent, "verify");
apply(v_, v.word);
apply(v_, v.num);
apply(v_, v.obj);
}
void apply(Node parent, Bioperator const& bo) const {
[[maybe_unused]] auto botag = named_child(parent, "bioperator").text().set(bo.op.c_str());
//apply(botag, bo.var);
//apply(botag, bo.value);
}
void apply(Node parent, Conditional const& c) const {
auto cond = named_child(parent, "conditional");
apply(named_child(cond, "expression"), c.condition);
apply(named_child(cond, "true_branch"), c.true_block);
if (c.false_block)
apply(named_child(cond, "false_branch"), *c.false_block);
}
void apply(Node parent, Block const& b) const {
auto block = named_child(parent, "block");
for (auto& s : b)
apply(block, s);
}
void apply(Node parent, Task const& t) const {
auto task = named_child(parent, "task");
for (auto& item : t)
apply(named_child(task, "item"), item);
}
private:
Node named_child(Node parent, std::string const& name) const {
auto child = parent.append_child();
child.set_name(name.c_str());
return child;
}
};
} // namespace Generate
static const std::string cases[] = {
R"(
If (Var1 == "Test") && (Var2 <= 10) && (Var3 == "Done")
Verify Word 32 Objective;
Assign VarName "Value2";
EndIf;)",
R"(
If (Var1 == "Test") && (Var2 <= 10) && (Var3 == "Done")
Verify Word 32 Objective;
Else
Assign VarName "Value2";
EndIf;)",
R"(
Verify Word 32 Objective;
If (Var3 == "A")
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue";
EndIf;)",
R"(
If (Var1 == "Test") && (Var2 <= 10) && (Var3 == "Done")
Verify Word 32 Objective;
If (Var3 == "A") || (Var4 == "B") && (Var5 > 0)
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue";
EndIf;
Else
Assign VarName "Value2";
EndIf;)",
};
int main() {
using It = std::string::const_iterator;
static const Parser::Task<It> p;
static const Generate::XML to_xml;
for (int i = 0; std::string const& input : cases) {
try {
Ast::Task t;
std::cout << "*** Sample #" << ++i << std::endl;
if (qi::parse(begin(input), end(input), p, t)) {
pugi::xml_document doc;
to_xml(doc.root(), t);
doc.print(std::cout, " ", pugi::format_default);
std::cout << std::endl;
} else {
std::cout << " -> INVALID" << std::endl;
}
} catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-conversion"
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(p - bol) << ""
<< "^--- here" << std::endl;
#pragma GCC diagnostic pop
}
}
}
I didn't bother to complete the XML generation step, so it's limited:
*** Sample #1
<task>
<item>
<conditional>
<expression>
<bioperator>&&</bioperator>
</expression>
<true_branch>
<block>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
<assign>
<variant>VarName</variant>
<literal>Value2</literal>
</assign>
</block>
</true_branch>
</conditional>
</item>
</task>
*** Sample #2
<task>
<item>
<conditional>
<expression>
<bioperator>&&</bioperator>
</expression>
<true_branch>
<block>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
</block>
</true_branch>
<false_branch>
<block>
<block>
<assign>
<variant>VarName</variant>
<literal>Value2</literal>
</assign>
</block>
</block>
</false_branch>
</conditional>
</item>
</task>
*** Sample #3
<task>
<item>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
</item>
<item>
<conditional>
<expression>
<bioperator>==</bioperator>
</expression>
<true_branch>
<block>
<assign>
<variant>VarName</variant>
<literal>Value1</literal>
</assign>
<assign>
<variant>Var2</variant>
<num>10</num>
</assign>
</block>
</true_branch>
<false_branch>
<block>
<conditional>
<expression>
<bioperator>==</bioperator>
</expression>
<true_branch>
<block>
<assign>
<variant>VarName</variant>
<literal>SomeValue</literal>
</assign>
</block>
</true_branch>
</conditional>
</block>
</false_branch>
</conditional>
</item>
</task>
*** Sample #4
<task>
<item>
<conditional>
<expression>
<bioperator>&&</bioperator>
</expression>
<true_branch>
<block>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
<conditional>
<expression>
<bioperator>||</bioperator>
</expression>
<true_branch>
<block>
<assign>
<variant>VarName</variant>
<literal>Value1</literal>
</assign>
<assign>
<variant>Var2</variant>
<num>10</num>
</assign>
</block>
</true_branch>
<false_branch>
<block>
<conditional>
<expression>
<bioperator>==</bioperator>
</expression>
<true_branch>
<block>
<assign>
<variant>VarName</variant>
<literal>SomeValue</literal>
</assign>
</block>
</true_branch>
</conditional>
</block>
</false_branch>
</conditional>
</block>
</true_branch>
<false_branch>
<block>
<block>
<assign>
<variant>VarName</variant>
<literal>Value2</literal>
</assign>
</block>
</block>
</false_branch>
</conditional>
</item>
</task>
I glossed over a ton of things. Note, e.g. that you were gearing up to put Conditional
inside Expression
. All your examples imply that you need it to act like nesting statements. You might need a conditional expression (popularly known ternary operator
) as well, like ?:
in C or CASE WHEN in SQL or a if cond else b
in Python. I'll leave it because none of your examples use it, and also your expression syntax needs a lot of work generally.
It all keeps coming down to this: Combat your complexity by being accurate about your domain semantics. As always, you can only solve the problem that you understand, and as long as (small) confusions creep in, they compound to insurmountable accidental complexity.
I think sometimes the professional thing to do is to stop the wild goose chase, ask for complete specs and wait for them before implementing anything.
I can almost guarantee a complete specification exists, as usually complicated grammars don't happen by accident (as you are actually finding out the hard way) and the apparent need for automated tooling reinforces the idea that this infrastructure is central to some part of a software development process. Ask for it, don't just keep working in the blind.