c++boostboost-spirit-x3

boost spirit x3 - parse tokens in any order


This is basically a follow up of a question i asked earlier and @sehe so graciously answered!

Question: How do i parse multiple command parsers using boost spirit x3 and here is the code given by @sehe - https://coliru.stacked-crooked.com/a/5879831b11c51f84

The follow up question is how to parse the command arguments in any order:

i.e. parse the following successfully

cmd1 param1=<value> param2=value OR
cmd1 param2=<value> param1=value

and so on


Solution

  • I feel I have to mention we're not ChatGPT. You're in luck though, I like doing X3 finger exercises.

    First, let's observe that Spirit Qi has a parser operator that comes close out of the box: Permutation Parser

    Live On Coliru

    #include <boost/fusion/adapted.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/optional/optional_io.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    #include <iostream>
    
    namespace qi = boost::spirit::qi;
    
    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
        using boost::fusion::operator<<;
    } // namespace ast
    
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd1, param1, param2)
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd2, param1)
    
    template <typename It> struct CommandParser : qi::grammar<It, ast::Command()> {
        CommandParser() : CommandParser::base_type(start) {
            using namespace qi;
            quoted_string = lexeme['"' >> *~char_('"') >> '"'];
    
            cmd1  = lit("cmd1") >> ((lit("param1") >> '=' >> double_) ^ //
                                   (lit("param2") >> '=' >> double_));
            cmd2  = lit("cmd2") >> ((lit("param1") >> '=' >> quoted_string));
            start = qi::skip(qi::space)[cmd1 | cmd2];
    
            BOOST_SPIRIT_DEBUG_NODES((cmd1)(cmd2)(start))
        }
    
      private:
        using Skipper = qi::space_type;
        qi::rule<It, ast::Command()>          start;
        qi::rule<It, ast::cmd1(), Skipper>    cmd1;
        qi::rule<It, ast::cmd2(), Skipper>    cmd2;
        qi::rule<It, std::string(), Skipper>  quoted_string;
    };
    
    template <typename It> boost::optional<ast::Command> parse_line(It first, It last) {
        static CommandParser<It> const p;
        ast::Command attr;
    
        // if (phrase_parse(first, last, qi::expect[parser::command >> qi::eoi], qi::space, attr))
        if (phrase_parse(first, last, p >> qi::eoi, qi::space, attr))
            return attr;
        return {};
    }
    
    auto parse_line(std::string_view input) { return parse_line(begin(input), end(input)); }
    
    int main() {
        // for (std::string line; getline(std::cin, line) && !line.empty();) {
        for (std::string line :
             {
                 R"()",
                 R"(cmd1 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param2 = 8e-9 param1 = 3.14)", // flipped order
                 R"(cmd1 param1 = 3.14 param2 = -inf)",
                 R"(cmd1 param2 = -inf param1 = 3.14)", // flipped order
                 R"(cmd2 param1 = " hello world " )",
    
                 // things that would not have parsed with question code:
                 R"(cmd2 param1 = "" )",
    
                 // things that should not parse
                 R"(cmd2 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = " hello world " )",
                 R"(cmd2 param1 = "" trailing rubbish)",
                 R"(trailing rubbish)",
             }) //
        {
            std::cout << std::left << std::setw(40) << quoted(line);
            try {
                auto parsed = parse_line(line);
                std::cout << " -> " << parsed << std::endl;
            } catch (std::exception const& e) {
                std::cout << " -> ERROR " << e.what() << std::endl;
            }
        }
    }
    

    Printing

    ""                                       -> --
    "cmd1 param1 = 3.14 param2 = 8e-9"       ->  (3.14 8e-09)
    "cmd1 param2 = 8e-9 param1 = 3.14"       ->  (3.14 8e-09)
    "cmd1 param1 = 3.14 param2 = -inf"       ->  (3.14 -inf)
    "cmd1 param2 = -inf param1 = 3.14"       ->  (3.14 -inf)
    "cmd2 param1 = \" hello world \" "       ->  ( hello world )
    "cmd2 param1 = \"\" "                    ->  ()
    "cmd2 param1 = 3.14 param2 = 8e-9"       -> --
    "cmd1 param1 = \" hello world \" "       -> --
    "cmd2 param1 = \"\" trailing rubbish"    -> --
    "trailing rubbish"                       -> --
    

    You might consider staying with Qi for this.

    Other Approaches

    To get similar things done in X3 you some heroics will be required. Let me try by

    Note this builds on the ideas developed here Boost Spirit x3: parse into structs and specifically the workaround mentioned in the last comment

    Here are the quick-and-dirty heroics:

    namespace detail {
        template <typename Attr> auto member_parser = x3::eps;
        template <>
        auto member_parser<std::string> = x3::rule<struct quoted_string, std::string>{"quoted_string"} =
            x3::lexeme['"' >> *~x3::char_('"') >> '"'];
    
        template <> auto member_parser<double> = x3::double_;
    
        template <size_t II, typename T, typename Tuple> auto handle_member(Tuple const& tied) {
            auto&&      val = std::get<II>(tied);
            std::string name{boost::pfr::get_name<II, T>()};
    
            using Attr = std::decay_t<decltype(val)>;
    
            auto assign = [name](auto& ctx) { boost::pfr::get<II>(*x3::get<T>(ctx)) = _attr(ctx); };
            return x3::rule<struct _>{name.c_str()} = (x3::lit(name) >> '=' >> member_parser<Attr>)[assign];
        }
    
        template <typename T, typename Tuple, size_t... I>
        auto params_impl(Tuple const& tied, std::integer_sequence<size_t, I...>) {
            return *(handle_member<I, T, Tuple>(tied) | ...);
        }
    } // namespace detail
    
    template <typename T> auto make_parser(T const& v = {}) {
        std::string tname = boost::typeindex::type_id<T>().pretty_name();
        tname             = tname.substr(tname.find_last_of(":") + 1);
        std::cout << "---- " << tname << std::endl;
    
        auto set_context = [](auto& ctx) { x3::get<T>(ctx) = &_val(ctx); };
    
        return x3::rule<struct _, T>{tname.c_str()} = //
            x3::with<T>(static_cast<T*>(nullptr))     //
                [x3::eps[set_context]                 //
                 >> x3::lit(tname)                    //
                 >> detail::params_impl<T>(boost::pfr::structure_tie(v),
                                           std::make_index_sequence<boost::pfr::tuple_size<T>::value>{})];
    }
    

    I would probably clean it up to use static type info instead of requiring default-constructability, but in the interest of speed let's keep it as that. Now, use it:

    namespace parser {
        auto const command = make_parser<ast::cmd1>() | make_parser<ast::cmd2>();
    } // namespace parser
    

    Or indeed, with some more factory help:

    template <typename... Cmd> auto commands() { return (make_parser<Cmd>() | ...); }
    
    auto const command = commands<ast::cmd1, ast::cmd2>();
    

    Integrating in the example test cases:

    Live On Coliru

    #include <boost/pfr.hpp>
    #include <boost/spirit/home/x3.hpp>
    #include <boost/type_index.hpp>
    #include <iomanip>
    #include <iostream>
    #include <optional>
    
    namespace x3 = boost::spirit::x3;
    
    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
    } // namespace ast
    
    namespace parser {
        namespace detail {
            template <typename Attr> auto member_parser = x3::eps;
            template <>
            auto member_parser<std::string> = x3::rule<struct quoted_string, std::string>{"quoted_string"} =
                x3::lexeme['"' >> *~x3::char_('"') >> '"'];
    
            template <> auto member_parser<double> = x3::double_;
    
            template <size_t II, typename T, typename Tuple> auto handle_member(Tuple const& tied) {
                auto&&      val = std::get<II>(tied);
                std::string name{boost::pfr::get_name<II, T>()};
    
                using Attr = std::decay_t<decltype(val)>;
    
                auto assign = [name](auto& ctx) { boost::pfr::get<II>(*x3::get<T>(ctx)) = _attr(ctx); };
                return x3::rule<struct _>{name.c_str()} = (x3::lit(name) >> '=' >> member_parser<Attr>)[assign];
            }
    
            template <typename T, typename Tuple, size_t... I>
            auto params_impl(Tuple const& tied, std::integer_sequence<size_t, I...>) {
                return *(handle_member<I, T, Tuple>(tied) | ...);
            }
        } // namespace detail
    
        template <typename T> auto make_parser(T const& v = {}) {
            std::string tname = boost::typeindex::type_id<T>().pretty_name();
            tname             = tname.substr(tname.find_last_of(":") + 1);
    
            auto set_context = [](auto& ctx) { x3::get<T>(ctx) = &_val(ctx); };
    
            return x3::rule<struct _, T>{tname.c_str()} = //
                x3::with<T>(static_cast<T*>(nullptr))     //
                    [x3::eps[set_context]                 //
                     >> x3::lit(tname)                    //
                     >> detail::params_impl<T>(boost::pfr::structure_tie(v),
                                               std::make_index_sequence<boost::pfr::tuple_size<T>::value>{})];
        }
    
        template <typename... Cmd> auto commands() { return (make_parser<Cmd>() | ...); }
    
        auto const command = commands<ast::cmd1, ast::cmd2>();
    } // namespace parser
    
    template <typename It> std::optional<ast::Command> parse_line(It first, It last) {
        ast::Command attr;
    
        // if (phrase_parse(first, last, x3::expect[parser::command >> x3::eoi], x3::space, attr))
        if (phrase_parse(first, last, parser::command >> x3::eoi, x3::space, attr))
            return attr;
        return std::nullopt;
    }
    
    auto parse_line(std::string_view input) { return parse_line(begin(input), end(input)); }
    
    int main() {
        // for (std::string line; getline(std::cin, line) && !line.empty();) {
        for (std::string line :
             {
                 R"()",
                 R"(cmd1 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param2 = 8e-9 param1 = 3.14)", // flipped
                 R"(cmd1 param1 = 3.14 param2 = -inf)",
                 R"(cmd1 param2 = -inf param1 = 3.14)", // flipped
                 R"(cmd2 param1 = " hello world " )",
    
                 // things that would not have parsed with question code:
                 R"(cmd2 param1 = "" )",
    
                 // things that should not parse
                 R"(cmd2 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = " hello world " )",
                 R"(cmd2 param1 = "" trailing rubbish)",
                 R"(trailing rubbish)",
             }) //
        {
            std::cout << std::left << std::setw(37) << quoted(line);
            try {
                if (auto parsed = parse_line(line)) {
                    apply_visitor(
                        [](auto const& cmd) {
                            std::cout << " -> " << boost::typeindex::type_id_runtime(cmd).pretty_name()
                                      << boost::pfr::io(cmd) << std::endl;
                        },
                        *parsed);
                } else {
                    std::cout << " -> --" << std::endl;
                }
            } catch (std::exception const& e) {
                std::cout << " -> ERROR " << e.what() << std::endl;
            }
        }
    }
    

    Printing

    ""                                    -> --
    "cmd1 param1 = 3.14 param2 = 8e-9"    -> ast::cmd1{3.14, 8e-09}
    "cmd1 param2 = 8e-9 param1 = 3.14"    -> ast::cmd1{3.14, 8e-09}
    "cmd1 param1 = 3.14 param2 = -inf"    -> ast::cmd1{3.14, -inf}
    "cmd1 param2 = -inf param1 = 3.14"    -> ast::cmd1{3.14, -inf}
    "cmd2 param1 = \" hello world \" "    -> ast::cmd2{" hello world "}
    "cmd2 param1 = \"\" "                 -> ast::cmd2{""}
    "cmd2 param1 = 3.14 param2 = 8e-9"    -> --
    "cmd1 param1 = \" hello world \" "    -> --
    "cmd2 param1 = \"\" trailing rubbish" -> --
    "trailing rubbish"                    -> --
    

    Summarizing

    I would probably make a general grammar and AST like

    enum class CmdType { cmd1, cmd2, ... };
    using Param = std::string;
    using Value = variant<double, std::string>;
    using Args  = std::multimap<Param, Value>;
    
    struct Cmd {
        CmdType cmd;
        Args    args;
    };
    

    And create a validator function that validates the correctness of the commands after parsing. This way you get a very simple grammar that's easy to maintain, and way more flexibility regarding validation logic.