Disclaimer: this is an slice of an bigger parser - reduced down to just explain my problem
i got the situation that i can haven 3 types of Identifier-starting rhs value - the .
and :
is only an example for rules starting with an identifier and then differ someway - also the struct copies are just for this example
so it can be just id
, id.id
or id:id
the problem is that the id rule comes first and dictates what is needed at that point - do i need to combine my rules into one to get the different outputs? until now i though that the 3 rules starting with id will be handle as some sort of rule-composit - but it don't seem so
// #define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <variant>
namespace qi = boost::spirit::qi;
namespace Ast
{
struct Identifier : std::string
{
};
struct QualifiedId1
{
Identifier a, b;
};
struct QualifiedId2
{
Identifier a, b;
};
using Variants = std::variant<Identifier, QualifiedId1, QualifiedId2>;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT( Ast::QualifiedId1, a, b )
BOOST_FUSION_ADAPT_STRUCT( Ast::QualifiedId2, a, b )
using It = std::string_view::const_iterator;
template <typename Attr, typename Skipper>
void run_tests( qi::rule<It, Attr(), Skipper> const& rule_, std::vector<std::string> const& tests )
{
std::cout << "======[ " << boost::core::demangle( typeid( Attr ).name() ) << " ]======" << std::endl;
for( std::string_view test : tests )
{
It f = test.begin(), l = test.end();
try
{
std::cout << "Parsing: " << quoted( test );
Attr v;
bool ok;
if constexpr( std::is_same_v<Skipper, qi::unused_type> )
ok = qi::parse( f, l, qi::eps > rule_ > qi::eoi, v );
else
ok = qi::phrase_parse( f, l, qi::eps > rule_ > qi::eoi, Skipper{}, v );
std::cout << ( ok ? "OK" : "FAIL" ) << " Remaining: " << quoted( std::string( f, l ) ) << "\n";
}
catch( qi::expectation_failure<It> const& ef )
{
auto p = ef.first - test.begin();
auto bol = test.find_last_of( "\r\n", p ) + 1;
auto line = std::count( f, f + bol, '\n' ) + 1;
auto eol = test.find_first_of( "\r\n", p );
std::cout << " -> EXPECTED " << ef.what_ << " in line:" << line << " col:" << ( p - bol ) << "\n"
<< " " << test.substr( bol, eol - bol ) << "\n"
<< " " << std::setw( p - bol ) << "" << "^--- here\n";
}
std::cout << "--------------------\n";
}
}
int main()
{
qi::rule<It, Ast::Identifier()> id = qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z0-9_" );
qi::rule<It, Ast::QualifiedId1(), qi::space_type> qualId1 = id > "." > id;
qi::rule<It, Ast::QualifiedId2(), qi::space_type> qualId2 = id > ":" > id;
qi::rule<It, Ast::Variants(), qi::space_type> variants = ( id ) | ( qualId1 ) | ( qualId1 );
run_tests( variants, { "abc", "abc.def", "abc:def" } );
}
Output - the id rule gets handled first and then the expectations fail
======[ class std::variant<struct Ast::Identifier,struct Ast::QualifiedId1,struct Ast::QualifiedId2> ]======
Parsing: "abc"OK Remaining: ""
--------------------
Parsing: "abc.def" -> EXPECTED <eoi> in line:1 col:3
abc.def
^--- here
--------------------
Parsing: "abc:def" -> EXPECTED <eoi> in line:1 col:3
abc:def
^--- here
--------------------
i tried to combine the 3 rules in something like
qi::rule<It, Ast::Variants(), qi::space_type> variants2 = id > *( ( "." > id ) | ( ":" > id ) );
that parses but only give the id type for all 3 in the variant
the following id is only a example normally there are different rules behind (like only numbers or a fixed string etc.) - so not only .
and :
as difference
PEG grammars are left-to-right greedy. (a | ab)
will never parse ab.
Also, expectation points fail the surrounding rule, not just the containing expression. If backtracking is required, expectation points should not be used.
Fixing these things:
// #define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <variant>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Identifier : std::string {
using std::string::string;
using std::string::operator=;
};
struct QualifiedId1 { Identifier a, b; };
struct QualifiedId2 { Identifier a, b; };
using Variants = std::variant<Identifier, QualifiedId1, QualifiedId2>;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::QualifiedId1, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::QualifiedId2, a, b)
using It = std::string_view::const_iterator;
template <typename Attr, typename Skipper>
void run_tests(qi::rule<It, Attr(), Skipper> const& rule_, std::vector<std::string> const& tests) {
using boost::core::demangle;
std::cout << "======[ " << demangle(typeid(Attr).name()) << " ]======" << std::endl;
for (std::string_view test : tests) {
It f = test.begin(), l = test.end();
try {
std::cout << "Parsing: " << quoted(test) << " -> ";
Attr v;
bool ok;
if constexpr (std::is_same_v<Skipper, qi::unused_type>)
ok = parse(f, l, qi::eps > rule_ > qi::eoi, v);
else
ok = phrase_parse(f, l, qi::eps > rule_ > qi::eoi, Skipper{}, v);
std::cout << (ok ? "OK" : "FAIL") //
<< " Remaining: " << quoted(std::string(f, l)) //
<< " Variant #" << v.index() << "\n";
} catch (qi::expectation_failure<It> const& ef) {
auto p = ef.first - test.begin();
auto bol = test.find_last_of( "\r\n", p ) + 1;
auto line = std::count( f, f + bol, '\n' ) + 1;
auto eol = test.find_first_of( "\r\n", p );
std::cout << "EXPECTED " << ef.what_ << " in line:" << line << " col:" << ( p - bol ) << "\n"
<< " " << test.substr( bol, eol - bol ) << "\n"
<< " " << std::setw( p - bol ) << "" << "^--- here\n";
}
}
}
template <typename Attr> using Rule = qi::rule<It, Attr(), qi::space_type>;
int main() {
qi::rule<It, Ast::Identifier()> id = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
Rule<Ast::QualifiedId1> qualId1 = id >> ("." > id);
Rule<Ast::QualifiedId2> qualId2 = id >> (":" > id);
Rule<Ast::Variants> variants = qualId1 | qualId2 | id;
run_tests( variants, { "abc", "abc.def", "abc:def" } );
}
Prints
======[ std::variant<Ast::Identifier, Ast::QualifiedId1, Ast::QualifiedId2> ]======
Parsing: "abc" -> OK Remaining: "" Variant #0
Parsing: "abc.def" -> OK Remaining: "" Variant #1
Parsing: "abc:def" -> OK Remaining: "" Variant #2
Sometimes you can mix lookahead assertions to allow for more stringent expectations:
keywordXYZ >>
(!qualId2 >> id) // will not parse qualid2
In general, if b
matches a subset of a
the tricky case (a | bc)
can be made to work with (a - b | bc)
, or equivalently (!b >> a | bc)
In practice this kind of grammar often appears with expression parsing (where this expression denotes namespace/object member access). Then usually the syntax is recursive:
using IdExpr = boost::variant< //
Id, // boost::recursive_wrapper<QualId1>, //
boost::recursive_wrapper<QualId2>>;
In that case you can often avoid backtracking and incrementally build the attribute using semantic actions:
// #define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Id : std::string {
using std::string::string;
using std::string::operator=;
};
struct QualId1;
struct QualId2;
using IdExpr = boost::variant< //
Id, //
boost::recursive_wrapper<QualId1>, //
boost::recursive_wrapper<QualId2>>;
struct QualId1 { IdExpr a, b; };
struct QualId2 { IdExpr a, b; };
// for demo output
std::ostream& operator<<(std::ostream& os, Id const& id) { return os << "Id(" << quoted(id) << ")"; }
std::ostream& operator<<(std::ostream& os, QualId1 const& id) { return os << "QualId1(" << id.a << "." << id.b << ")"; }
std::ostream& operator<<(std::ostream& os, QualId2 const& id) { return os << "QualId2(" << id.a << ":" << id.b << ")"; }
} // namespace Ast
using It = std::string_view::const_iterator;
template <typename Attr, typename Skipper>
void run_tests(qi::rule<It, Attr(), Skipper> const& rule_, std::vector<std::string> const& tests) {
using boost::core::demangle;
std::cout << "======[ " << demangle(typeid(Attr).name()) << " ]======" << std::endl;
for (std::string_view test : tests) {
It f = test.begin(), l = test.end();
try {
std::cout << "Parsing: " << quoted(test) << " -> ";
Attr v;
bool ok;
if constexpr (std::is_same_v<Skipper, qi::unused_type>)
ok = parse(f, l, qi::eps > rule_ > qi::eoi, v);
else
ok = phrase_parse(f, l, qi::eps > rule_ > qi::eoi, Skipper{}, v);
std::cout << (ok ? "OK" : "FAIL") //
<< " Variant #" << v.which() << "\n -> " << v << "\n";
} catch (qi::expectation_failure<It> const& ef) {
auto p = ef.first - test.begin();
auto bol = test.find_last_of( "\r\n", p ) + 1;
auto line = std::count( f, f + bol, '\n' ) + 1;
auto eol = test.find_first_of( "\r\n", p );
std::cout << "EXPECTED " << ef.what_ << " in line:" << line << " col:" << ( p - bol ) << "\n"
<< " " << test.substr( bol, eol - bol ) << "\n"
<< " " << std::setw( p - bol ) << "" << "^--- here\n";
}
}
}
template <typename Attr> using Rule = qi::rule<It, Attr(), qi::space_type>;
int main() {
using namespace qi::labels;
using boost::phoenix::construct;
qi::rule<It, Ast::Id()> id = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
qi::rule<It, Ast::IdExpr(), qi::space_type> expr;
expr = id[_val = _1] //
>> *(('.' >> expr)[_val = construct<Ast::QualId1>(_val, _1)] |
(':' >> expr)[_val = construct<Ast::QualId2>(_val, _1)]);
run_tests(expr,
{
"abc",
"abc.def",
"abc:def",
"abc.def.ghi",
"abc:def:ghi",
"abc:def.ghi:jkl",
"abc:def:ghi:jkl",
});
}
Printing
======[ boost::variant<Ast::Id, boost::recursive_wrapper<Ast::QualId1>, boost::recursive_wrapper<Ast::QualId2> > ]======
Parsing: "abc" -> OK Variant #0
-> Id("abc")
Parsing: "abc.def" -> OK Variant #1
-> QualId1(Id("abc").Id("def"))
Parsing: "abc:def" -> OK Variant #2
-> QualId2(Id("abc"):Id("def"))
Parsing: "abc.def.ghi" -> OK Variant #1
-> QualId1(Id("abc").QualId1(Id("def").Id("ghi")))
Parsing: "abc:def:ghi" -> OK Variant #2
-> QualId2(Id("abc"):QualId2(Id("def"):Id("ghi")))
Parsing: "abc:def.ghi:jkl" -> OK Variant #2
-> QualId2(Id("abc"):QualId1(Id("def").QualId2(Id("ghi"):Id("jkl"))))
Parsing: "abc:def:ghi:jkl" -> OK Variant #2
-> QualId2(Id("abc"):QualId2(Id("def"):QualId2(Id("ghi"):Id("jkl"))))