The examples in the Boost.Spirit documentation seem to fall in two cases:
1/ Define a parser in a function: semantic actions can access local variables and data as they are local lambdas. Like push_back
here: https://www.boost.org/doc/libs/master/libs/spirit/doc/x3/html/spirit_x3/tutorials/number_list___stuffing_numbers_into_a_std__vector.html
2/ Define a parser in a namespace, like here: https://www.boost.org/doc/libs/1_69_0/libs/spirit/doc/x3/html/spirit_x3/tutorials/minimal.html
which seems to be necessary to be able to invoke BOOST_SPIRIT_DEFINE
.
My question is: how to combine both (properly, without globals) ? My dream API would be to pass some argument to phrase_parse
and then do some x3::_arg(ctx)
but I couldn't find anything like this.
Here is for instance my parser: for now the actions are writing to std::cerr
. What if I wanted to write to a custom std::ostream&
instead, that would be passed to the parse
function?
using namespace boost::spirit;
using namespace boost::spirit::x3;
rule<struct id_action> action = "action";
rule<struct id_array> array = "array";
rule<struct id_empty_array> empty_array = "empty_array";
rule<struct id_atom> atom = "atom";
rule<struct id_sequence> sequence = "sequence";
rule<struct id_root> root = "root";
auto access_index_array = [] (const auto& ctx) { std::cerr << "access_array: " << x3::_attr(ctx) << "\n" ;};
auto access_empty_array = [] (const auto& ctx) { std::cerr << "access_empty_array\n" ;};
auto access_named_member = [] (const auto& ctx) { std::cerr << "access_named_member: " << x3::_attr(ctx) << "\n" ;};
auto start_action = [] (const auto& ctx) { std::cerr << "start action\n" ;};
auto finish_action = [] (const auto& ctx) { std::cerr << "finish action\n" ;};
auto create_array = [] (const auto& ctx) { std::cerr << "create_array\n" ;};
const auto action_def = +(lit('.')[start_action]
>> -((+alnum)[access_named_member])
>> *(('[' >> x3::int_ >> ']')[access_index_array] | lit("[]")[access_empty_array]));
const auto sequence_def = (action[finish_action] % '|');
const auto array_def = ('[' >> sequence >> ']')[create_array];
const auto root_def = array | action;
BOOST_SPIRIT_DEFINE(action)
BOOST_SPIRIT_DEFINE(array)
BOOST_SPIRIT_DEFINE(sequence)
BOOST_SPIRIT_DEFINE(root)
bool parse(std::string_view str)
{
using ascii::space;
auto first = str.begin();
auto last = str.end();
bool r = phrase_parse(
first, last,
parser::array_def | parser::sequence_def,
ascii::space
);
if (first != last)
return false;
return r;
}
About the approaches:
1/ Yes, this is viable for small, contained parsers. Typically only used in a single TU, and exposed via non-generic interface.
2/ This is the approach for (much) larger grammars, that you might wish to spread across TUs, and/or are instantiated across several TU's generically.
Note that you do NOT need BOOST_SPIRIT_DEFINE
unless you
My question is: how to combine both (properly, without globals) ?
You can't combine something with namespace level declarations, if one of the requiremenents is "without globals".
My dream API would be to pass some argument to phrase_parse and then do some x3::_arg(ctx) but I couldn't find anything like this.
I don't know what you think x3::_arg(ctx)
would do, in that particular dream :)
Here is for instance my parser: for now the actions are writing to std::cerr. What if I wanted to write to a custom std::ostream& instead, that would be passed to the parse function?
Now that's a concrete question. I'd say: use the context.
You could make it so that you can use x3::get<ostream>(ctx)
returns the stream:
struct ostream{};
auto access_index_array = [] (const auto& ctx) { x3::get<ostream>(ctx) << "access_array: " << x3::_attr(ctx) << "\n" ;};
auto access_empty_array = [] (const auto& ctx) { x3::get<ostream>(ctx) << "access_empty_array\n" ;};
auto access_named_member = [] (const auto& ctx) { x3::get<ostream>(ctx) << "access_named_member: " << x3::_attr(ctx) << "\n" ;};
auto start_action = [] (const auto& ctx) { x3::get<ostream>(ctx) << "start action\n" ;};
auto finish_action = [] (const auto& ctx) { x3::get<ostream>(ctx) << "finish action\n" ;};
auto create_array = [] (const auto& ctx) { x3::get<ostream>(ctx) << "create_array\n";};
Now you need to put the tagged param in the context during parsing:
bool r = phrase_parse(
f, l,
x3::with<parser::ostream>(std::cerr)[parser::array_def | parser::sequence_def],
x3::space);
Live Demo: http://coliru.stacked-crooked.com/a/a26c8eb0af6370b9
Prints
start action
access_named_member: a
finish action
start action
access_named_member: b
start action
start action
access_array: 2
start action
access_named_member: foo
start action
access_empty_array
finish action
start action
access_named_member: c
finish action
create_array
true
Intermixed with the standard X3 debug output:
<sequence>
<try>.a|.b..[2].foo.[]|.c</try>
<action>
<try>.a|.b..[2].foo.[]|.c</try>
<success>|.b..[2].foo.[]|.c]</success>
</action>
<action>
<try>.b..[2].foo.[]|.c]</try>
<success>|.c]</success>
</action>
<action>
<try>.c]</try>
<success>]</success>
</action>
<success>]</success>
</sequence>
It looks like you're parsing something similar to JSON Pointer or jq
syntax. In the case that you wanted to provide a callback-interface (SAX-events), why not bind the callback interface instead of the actions:
struct handlers {
using N = x3::unused_type;
virtual void index(int) {}
virtual void index(N) {}
virtual void property(std::string) {}
virtual void start(N) {}
virtual void finish(N) {}
virtual void create_array(N) {}
};
#define EVENT(e) ([](auto& ctx) { x3::get<handlers>(ctx).e(x3::_attr(ctx)); })
const auto action_def =
+(x3::lit('.')[EVENT(start)] >> -((+x3::alnum)[EVENT(property)]) >>
*(('[' >> x3::int_ >> ']')[EVENT(index)] | x3::lit("[]")[EVENT(index)]));
const auto sequence_def = action[EVENT(finish)] % '|';
const auto array_def = ('[' >> sequence >> ']')[EVENT(create_array)];
const auto root_def = array | action;
Now you can implement all handlers neatly in one interface:
struct default_handlers : parser::handlers {
std::ostream& os;
default_handlers(std::ostream& os) : os(os) {}
void index(int i) override { os << "access_array: " << i << "\n"; };
void index(N) override { os << "access_empty_array\n" ; };
void property(std::string n) override { os << "access_named_member: " << n << "\n" ; };
void start(N) override { os << "start action\n" ; };
void finish(N) override { os << "finish action\n" ; };
void create_array(N) override { os << "create_array\n"; };
};
auto f = str.begin(), l = str.end();
bool r = phrase_parse(f, l,
x3::with<parser::handlers>(default_handlers{std::cout}) //
[parser::array_def | parser::sequence_def],
x3::space);
See it Live On Coliru once again:
start action
access_named_member: a
finish action
start action
access_named_member: b
start action
start action
access_array: 2
start action
access_named_member: foo
start action
access_empty_array
finish action
start action
access_named_member: c
finish action
create_array
true
The natural way to expose attributes would be to build an AST. See also Boost Spirit: "Semantic actions are evil"?
Without further ado:
namespace AST {
using Id = std::string;
using Index = int;
struct Member {
std::optional<Id> name;
};
struct Indexer {
std::optional<int> index;
};
struct Action {
Member member;
std::vector<Indexer> indexers;
};
using Actions = std::vector<Action>;
using Sequence = std::vector<Actions>;
struct ArrayCtor {
Sequence actions;
};
using Root = boost::variant<ArrayCtor, Actions>;
}
Of course, I'm making some assumptions. The rules can be much simplified:
namespace parser {
template <typename> struct Tag {};
#define AS(T, p) (x3::rule<Tag<AST::T>, AST::T>{#T} = p)
auto id = AS(Id, +x3::alnum);
auto member = AS(Member, x3::lit('.') >> -id);
auto indexer = AS(Indexer,'[' >> -x3::int_ >> ']');
auto action = AS(Action, member >> *indexer);
auto actions = AS(Actions, +action);
auto sequence = AS(Sequence, actions % '|');
auto array = AS(ArrayCtor, '[' >> -sequence >> ']'); // covers empty array
auto root = AS(Root, array | actions);
} // namespace parser
And the parsing function returns the AST:
AST::Root parse(std::string_view str) {
auto f = str.begin(), l = str.end();
AST::Root parsed;
phrase_parse(f, l, x3::expect[parser::root >> x3::eoi], x3::space, parsed);
return parsed;
}
(Note that it now throws x3::expection_failure
if the input is invalid or not completely parsed)
int main() {
std::cout << parse("[.a|.b..[2].foo.[]|.c]");
}
Now prints:
[.a|.b./*none*/./*none*/[2].foo./*none*/[/*none*/]|.c]
See it Live On Coliru
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <ostream>
#include <optional>
namespace x3 = boost::spirit::x3;
namespace AST {
using Id = std::string;
using Index = int;
struct Member {
std::optional<Id> name;
};
struct Indexer {
std::optional<int> index;
};
struct Action {
Member member;
std::vector<Indexer> indexers;
};
using Actions = std::vector<Action>;
using Sequence = std::vector<Actions>;
struct ArrayCtor {
Sequence actions;
};
using Root = boost::variant<ArrayCtor, Actions>;
}
BOOST_FUSION_ADAPT_STRUCT(AST::Member, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Indexer, index)
BOOST_FUSION_ADAPT_STRUCT(AST::Action, member, indexers)
BOOST_FUSION_ADAPT_STRUCT(AST::ArrayCtor, actions)
namespace parser {
template <typename> struct Tag {};
#define AS(T, p) (x3::rule<Tag<AST::T>, AST::T>{#T} = p)
auto id = AS(Id, +x3::alnum);
auto member = AS(Member, x3::lit('.') >> -id);
auto indexer = AS(Indexer,'[' >> -x3::int_ >> ']');
auto action = AS(Action, member >> *indexer);
auto actions = AS(Actions, +action);
auto sequence = AS(Sequence, actions % '|');
auto array = AS(ArrayCtor, '[' >> -sequence >> ']'); // covers empty array
auto root = AS(Root, array | actions);
} // namespace parser
AST::Root parse(std::string_view str) {
auto f = str.begin(), l = str.end();
AST::Root parsed;
phrase_parse(f, l, x3::expect[parser::root >> x3::eoi], x3::space, parsed);
return parsed;
}
// for debug output
#include <iostream>
#include <iomanip>
namespace AST {
static std::ostream& operator<<(std::ostream& os, Member const& m) {
return os << "." << m.name.value_or("/*none*/");
}
static std::ostream& operator<<(std::ostream& os, Indexer const& i) {
if (i.index)
return os << "[" << *i.index << "]";
else
return os << "[/*none*/]";
}
static std::ostream& operator<<(std::ostream& os, Action const& a) {
os << a.member;
for (auto& i : a.indexers)
os << i;
return os;
}
static std::ostream& operator<<(std::ostream& os, Actions const& aa) {
for (auto& a : aa)
os << a;
return os;
}
static std::ostream& operator<<(std::ostream& os, Sequence const& s) {
bool first = true;
for (auto& a : s)
os << (std::exchange(first, false) ? "" : "|") << a;
return os;
}
static std::ostream& operator<<(std::ostream& os, ArrayCtor const& ac) {
return os << "[" << ac.actions << "]";
}
}
int main() {
std::cout << parse("[.a|.b..[2].foo.[]|.c]");
}