i got an working parser+AST based on Spirit that parses parameter initalization - everything works fine
sadly there is some (old) database that wants parts of the AST as the original string (im on my way to replace that thing but want to make a step-by-step change)
(trivialized) example: a=10,b=30,c=A *B +10,d={1,A*4,3}*4
KeyValue-Rule = Identifier >> '=' >> Number|Expression
Expression is a complete Sub-AST with +-* and Numbers (in this trivialized example)
std::vector<KeyValue> Parameters;
the database wants the "values" of the parameters as string for c
its A *B +10
- at best with the exact string (with blanks etc.)
is that something Spirit can preserve (or the begin/end positions)?
the example above is a super trivialized version of my real parser/AST, expressions can be deeply leveld by ()
etc. - so its not a simple split by ,
thing - just to explain what i try to reach
something like: https://astexplorer.net/ - i need start/end-points of my logical AST parts in the string
Yes. You could use semantic actions:
KeyValue-Rule = raw [ (Identifier >> '=' >> Number|Expression)[assign_ast] ][assign_raw_input_sequence];
You can use automatic attribute propagation in combination with semantic actions, using the %=
rule initialization instead of regular =
assignment.
You can also potentially automate things by using on_success
"error" handlers. See some examples of that in my older answers: https://stackoverflow.com/search?q=user%3A85371+qi+on_success
Coming up with the simplest possible grammar that doesn't have unary operators or operator precedence:
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
// simple expression AST
namespace Ast {
using Number = double;
using Variable = std::string;
using SimpleExpr = boost::variant<Number, Variable>;
struct BinaryOp;
struct Expr;
using ExprV = boost::variant<SimpleExpr, boost::recursive_wrapper<BinaryOp>>;
struct Expr : ExprV {
using ExprV::ExprV;
using ExprV::operator=;
};
struct BinaryOp {
char op;
Expr left, right;
};
using boost::fusion::operator<<;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::BinaryOp, left, op, right)
// simple expression grammar
template <typename It> struct SimpleExprGrammar : qi::grammar<It, Ast::Expr()> {
SimpleExprGrammar() : SimpleExprGrammar::base_type(start) {
using namespace qi;
start = skip(space)[expr_ >> eoi];
binary_ = simple_ >> char_("+*/-") >> expr_;
expr_ = binary_ | simple_;
simple_ = double_ | var_ | ('(' >> expr_ >> ')');
var_ = +alpha;
BOOST_SPIRIT_DEBUG_NODES((expr_)(binary_)(binary_)(simple_)(var_))
}
private:
qi::rule<It, Ast::Expr()> start;
using Skipper = qi::space_type;
qi::rule<It, Ast::Expr(), Skipper> expr_;
qi::rule<It, Ast::Expr(), Skipper> simple_;
qi::rule<It, Ast::BinaryOp(), Skipper> binary_;
// lexemes
qi::rule<It, Ast::Variable()> var_;
};
int main() {
SimpleExprGrammar<std::string_view::const_iterator> const p;
for (std::string_view input : {
"1 + 2",
"1 + 2 * 3",
"(1 + 2) * 3",
"1 + 2 * (3 + 4)",
"1 + 2 * (3 + 4) / 5",
"1 + 2 * (3 + 4) / 5 - 6",
}) {
std::cout << " -- Parsing: " << quoted(input) << "\n";
auto f = begin(input), l = end(input);
if (Ast::Expr expr; qi::parse(f, l, p, expr))
std::cout << " -> " << expr << "\n";
else
std::cout << " FAILED\n";
}
}
Prints
-- Parsing: "1 + 2"
-> (1 + 2)
-- Parsing: "1 + 2 * 3"
-> (1 + (2 * 3))
-- Parsing: "(1 + 2) * 3"
-> ((1 + 2) * 3)
-- Parsing: "1 + 2 * (3 + 4)"
-> (1 + (2 * (3 + 4)))
-- Parsing: "1 + 2 * (3 + 4) / 5"
-> (1 + (2 * ((3 + 4) / 5)))
-- Parsing: "1 + 2 * (3 + 4) / 5 - 6"
-> (1 + (2 * ((3 + 4) / (5 - 6))))
Extend Expr
:
struct Expr : ExprV {
using ExprV::ExprV;
using ExprV::operator=;
std::string_view source;
ExprV& base() { return *this; }
ExprV const& base() const { return *this; }
};
source
will contain our source. Craft a semantic action to set the source from raw input iterators:
phx::function source_ = [](Ast::Expr& ast, std::string_view sv) { ast.source = sv; };
auto set_raw_ = source_(_val, phx::construct<std::string_view>(phx::begin(_1), phx::end(_1)));
Sprinkle semantic actions across the rules that should propagate both the parsed Ast and the source:
expr_ = raw[(binary_[_val = _1] | simple_[_val = _1])][set_raw_];
simple_ = raw[ //
double_[_val = _1] //
| var_[_val = _1] //
| ('(' >> expr_ >> ')')[_val = _1] //
][set_raw_];
As a bonus add an AnnoatedPrint
visitor:
struct AnnotatedPrint {
std::ostream& os_;
std::string_view input;
std::string indent = "";
template <typename T> void operator()(T const& v) const { apply(v); }
private:
void apply(Ast::Expr const& e) const {
std::cout << indent << "Source: " << quoted(e.source) << "\n";
AnnotatedPrint{os_, input, indent + " "}(e.base());
}
template <typename... Ts> void apply(boost::variant<Ts...> const& v) const {
boost::apply_visitor(*this, v);
}
void apply(Ast::Number const& e) const { std::cout << indent << "Number: " << e << "\n"; }
void apply(Ast::Variable const& e) const { std::cout << indent << "Variable: " << e << "\n"; }
void apply(Ast::BinaryOp const& e) const {
std::cout << indent << "BinaryOp: " << e.op << "\n";
AnnotatedPrint{os_, input, indent + " "}(e.left);
AnnotatedPrint{os_, input, indent + " "}(e.right);
}
};
Now the output becomes Live On Coliru
-- Parsing: "1 + 2"
-> (1 + 2)
-- AST: Source: "1 + 2"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2"
-- AST: Number: 2
-- Parsing: "1 + 2 * 3"
-> (1 + (2 * 3))
-- AST: Source: "1 + 2 * 3"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2 * 3"
-- AST: BinaryOp: *
-- AST: Source: "2"
-- AST: Number: 2
-- AST: Source: "3"
-- AST: Number: 3
-- Parsing: "(1 + 2) * 3"
-> ((1 + 2) * 3)
-- AST: Source: "(1 + 2) * 3"
-- AST: BinaryOp: *
-- AST: Source: "(1 + 2)"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2"
-- AST: Number: 2
-- AST: Source: "3"
-- AST: Number: 3
-- Parsing: "1 + 2 * (3 + 4)"
-> (1 + (2 * (3 + 4)))
-- AST: Source: "1 + 2 * (3 + 4)"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2 * (3 + 4)"
-- AST: BinaryOp: *
-- AST: Source: "2"
-- AST: Number: 2
-- AST: Source: "(3 + 4)"
-- AST: BinaryOp: +
-- AST: Source: "3"
-- AST: Number: 3
-- AST: Source: "4"
-- AST: Number: 4
-- Parsing: "1 + 2 * (3 + 4) / 5"
-> (1 + (2 * ((3 + 4) / 5)))
-- AST: Source: "1 + 2 * (3 + 4) / 5"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2 * (3 + 4) / 5"
-- AST: BinaryOp: *
-- AST: Source: "2"
-- AST: Number: 2
-- AST: Source: "(3 + 4) / 5"
-- AST: BinaryOp: /
-- AST: Source: "(3 + 4)"
-- AST: BinaryOp: +
-- AST: Source: "3"
-- AST: Number: 3
-- AST: Source: "4"
-- AST: Number: 4
-- AST: Source: "5"
-- AST: Number: 5
-- Parsing: "1 + 2 * (3 + 4) / 5 - 6"
-> (1 + (2 * ((3 + 4) / (5 - 6))))
-- AST: Source: "1 + 2 * (3 + 4) / 5 - 6"
-- AST: BinaryOp: +
-- AST: Source: "1"
-- AST: Number: 1
-- AST: Source: "2 * (3 + 4) / 5 - 6"
-- AST: BinaryOp: *
-- AST: Source: "2"
-- AST: Number: 2
-- AST: Source: "(3 + 4) / 5 - 6"
-- AST: BinaryOp: /
-- AST: Source: "(3 + 4)"
-- AST: BinaryOp: +
-- AST: Source: "3"
-- AST: Number: 3
-- AST: Source: "4"
-- AST: Number: 4
-- AST: Source: "5 - 6"
-- AST: BinaryOp: -
-- AST: Source: "5"
-- AST: Number: 5
-- AST: Source: "6"
-- AST: Number: 6
Without further comment:
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
// simple expression AST
namespace Ast {
using Number = double;
using Variable = std::string;
using SimpleExpr = boost::variant<Number, Variable>;
struct BinaryOp;
struct Expr;
using ExprV = boost::variant<SimpleExpr, boost::recursive_wrapper<BinaryOp>>;
struct Expr : ExprV {
using ExprV::ExprV;
using ExprV::operator=;
std::string_view source;
ExprV& base() { return *this; }
ExprV const& base() const { return *this; }
};
struct BinaryOp {
char op;
Expr left, right;
};
using Def = std::pair<Variable, Expr>;
using Defs = std::map<Variable, Expr>;
using boost::fusion::operator<<;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::BinaryOp, left, op, right)
// simple expression grammar
template <typename It> struct DefParser : qi::grammar<It, Ast::Defs()> {
DefParser() : DefParser::base_type(start) {
using namespace qi;
phx::function source_ = [](Ast::Expr& ast, std::string_view sv) { ast.source = sv; };
auto set_raw_ = source_(_val, phx::construct<std::string_view>(phx::begin(_1), phx::end(_1)));
start = skip(space)[defs_ >> eoi];
defs_ = def_ % ',';
def_ = var_ >> '=' >> expr_;
binary_ = simple_ >> char_("+*/-") >> expr_;
expr_ = raw[(binary_[_val = _1] | simple_[_val = _1])][set_raw_];
simple_ = raw[ //
double_[_val = _1] //
| var_[_val = _1] //
| ('(' >> expr_ >> ')')[_val = _1] //
][set_raw_];
var_ = +alpha;
BOOST_SPIRIT_DEBUG_NODES((expr_)(binary_)(binary_)(simple_)(var_)(def_)(defs_))
}
private:
qi::rule<It, Ast::Defs()> start;
using Skipper = qi::space_type;
qi::rule<It, Ast::Def(), Skipper> def_;
qi::rule<It, Ast::Defs(), Skipper> defs_;
qi::rule<It, Ast::Expr(), Skipper> expr_;
qi::rule<It, Ast::Expr(), Skipper> simple_;
qi::rule<It, Ast::BinaryOp(), Skipper> binary_;
// lexemes
qi::rule<It, Ast::Variable()> var_;
};
int main() {
using It = std::string_view::const_iterator;
DefParser<It> const p;
std::string_view input = "a=10,b=30,c=A *B +10, PI = TAU / (((2)))";
auto f = begin(input), l = end(input);
if (Ast::Defs defs; qi::parse(f, l, p, defs)) {
for (auto const& [v, e] : defs) {
std::cout << " -> " << v << " = " << e << "\n";
auto pos = e.source.begin() - input.begin();
std::cout << "Input: " << input << "\n";
auto n = e.source.size() - 1;
std::cout << " " << std::setw(pos) << "" << "^" << std::string(n, '-') << "\n";
}
} else {
std::cout << "FAILED\n";
}
}
Prints
-> PI = (TAU / 2)
Input: a=10,b=30,c=A *B +10, PI = TAU / (((2)))
^------------
-> a = 10
Input: a=10,b=30,c=A *B +10, PI = TAU / (((2)))
^-
-> b = 30
Input: a=10,b=30,c=A *B +10, PI = TAU / (((2)))
^-
-> c = (A * (B + 10))
Input: a=10,b=30,c=A *B +10, PI = TAU / (((2)))
^-------