boostuuidboost-spiritboost-spirit-qiboost-uuid

Parse boost uuids with boost spirit


I'm trying to write a boost::uuids::uuid parser with boost::spirit::qi in order to use it in a nice way with other qi parsers and to have a nice unified parser api.

My first idea was to write a custom qi::grammar which would use boost::conversion::try_lexical_convert<boost::uuids::uuid &, const std::string &> however this would have the problem of correctly setting the begin iterator to the consumed position, since boost::conversion::try_lexical_convert<boost::uuids::uuid &, const std::string &> would not only match a 16 character long input but also with enclosing braces or without dashes.

My second approach is to use a boost::spirit::qi::rule (or a grammar CRTP derived from boost::spirit::qi::grammar::base_type if you will), but then I got compile errors, probably from the BOOST_FUSION_ADAPT_STRUCT expression:

    #include <iostream>
    #include <string>
    #include <cstdint>
    #include <boost/uuid/uuid.hpp>
    #include <boost/spirit/include/qi.hpp>


    BOOST_FUSION_ADAPT_STRUCT(
            boost::uuids::uuid,
            (uint8_t, data[0])
            (uint8_t, data[1])
            (uint8_t, data[2])
            (uint8_t, data[3])
            (uint8_t, data[4])
            (uint8_t, data[5])
            (uint8_t, data[6])
            (uint8_t, data[7])
            (uint8_t, data[8])
            (uint8_t, data[9])
            (uint8_t, data[10])
            (uint8_t, data[11])
            (uint8_t, data[12])
            (uint8_t, data[13])
            (uint8_t, data[14])
            (uint8_t, data[15])
    )

    template<typename Iterator>
    boost::spirit::qi::rule<Iterator, boost::uuids::uuid>
            uuid_internal_{
            boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    //time-low
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> -boost::spirit::qi::lit("-")
                    //time-mid
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> -boost::spirit::qi::lit("-")
                    //time-high-and-version
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> -boost::spirit::qi::lit("-")
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>() //clock-seq-and-reserved
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>() //clock-seq-low
                    >> -boost::spirit::qi::lit("-")
                    //node
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                    >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
    };

    template<typename Iterator>
    struct uuid_
            : ::boost::spirit::qi::grammar<Iterator, boost::uuids::uuid()>{
        uuid_() : uuid_::base_type(start) {

            start %= (boost::spirit::qi::lit("{") >> uuid_internal_ >> boost::spirit::qi::lit("}")) |
                     uuid_internal_ ;
        }

        boost::spirit::qi::rule<Iterator, boost::uuids::uuid()> start;

        boost::spirit::qi::rule<Iterator, boost::uuids::uuid()>
                uuid_internal_{
                boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        //time-low
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> -boost::spirit::qi::lit("-")
                        //time-mid
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> -boost::spirit::qi::lit("-")
                        //time-high-and-version
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> -boost::spirit::qi::lit("-")
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>() //clock-seq-and-reserved
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>() //clock-seq-low
                        >> -boost::spirit::qi::lit("-")
                        //node
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
                        >> boost::spirit::qi::uint_parser<uint8_t, 16, 1, 1>()
        };

    };

    int main() {
        std::string input;
        std::cin >> input;
        uuid_<std::string::const_iterator> uuid_{};
        boost::uuids::uuid uuid{};
        auto begin = input.begin(), end = input.end();

        const bool success = boost::spirit::qi::parse(begin, end, uuid_, uuid);
        if (!success || begin != end)
            throw std::runtime_error("Parsing failed");

        return 0;


    }

/opt/local/include/boost/spirit/home/support/container.hpp:292:15: error: no member named 'insert' in 'boost::uuids::uuid' c.insert(c.end(), val);

Seems to be an issued generated by boost::spirit::qi::detail::pass_through_container, however my approach with BOOST_FUSION_ADAPT_ADT in combination with *(obj.begin()+n) also failed with multiple errors.


Solution

  • You can use the builtin qi::stream directive to get 90% of the way:

    uuid_ = qi::stream;
    start = '{' >> uuid_ >> '}' | uuid_;
    

    See it Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <boost/uuid/uuid_io.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator> struct uuid_type : ::qi::grammar<Iterator, boost::uuids::uuid()> {
        uuid_type() : uuid_type::base_type(start) {
    
            start = '{' >> uuid_ >> '}' | uuid_;
            uuid_ = qi::stream;
        }
      private:
        qi::rule<Iterator, boost::uuids::uuid()> start, uuid_;
    };
    
    int main() {
        uuid_type<std::string::const_iterator> uuid_{};
    
        for (std::string const input : {
                "2bc69ead-4aba-4a39-92c0-9565f4d464b4",
                "2BC69EAD-4ABA-4A39-92C0-9565F4D464B4",
                "{2bc69ead-4aba-4a39-92c0-9565f4d464b4}",
                "{2BC69EAD-4ABA-4A39-92C0-9565F4D464B4}",
                //"{2bc69ead--4aba--4a39----92c0--9565f4d464b4}",
                //"{2BC69EAD--4ABA--4A39----92C0--9565F4D464B4}",
                })
        {
            boost::uuids::uuid uuid{};
    
            std::cout << "==== Input " << std::quoted(input) << "\n";
    
            if (qi::parse(input.begin(), input.end(), uuid_ >> qi::eoi, uuid))
                std::cout << "Parsed " << uuid << "\n";
            else
                std::cout << "Parsing failed\n";
        }
    }
    

    Prints

    ==== Input "2bc69ead-4aba-4a39-92c0-9565f4d464b4"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "2BC69EAD-4ABA-4A39-92C0-9565F4D464B4"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2bc69ead-4aba-4a39-92c0-9565f4d464b4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2BC69EAD-4ABA-4A39-92C0-9565F4D464B4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    

    The Remaining 10%

    As per a modified Pareto principle the remaining 10% are the hard part.

    I'm not even sure you want this, but +qi::lit("-") implies that the commented test cases should also be accepted (?!):

            //"{2bc69ead--4aba--4a39----92c0--9565f4d464b4}",
            //"{2BC69EAD--4ABA--4A39----92C0--9565F4D464B4}",
    

    If that's /really/ what you want, I'd indeed suggest a two-phase parse operation with the lexical cast implementation:

    Ok, since it's now more than an hour later, this means that it's more like "10% of the functionality will take 900% of the effort" - I hope you really wanted it :)

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <boost/lexical_cast.hpp>
    #include <boost/uuid/uuid_io.hpp>
    #include <iomanip>
    
    using Uuid = boost::uuids::uuid;
    
    namespace boost::spirit::traits {
        template <> struct is_container<Uuid> : mpl::false_ {};
    
        template <> struct assign_to_attribute_from_value<Uuid, std::string> {
            static void call(std::string const& s, Uuid& v) { v = lexical_cast<Uuid>(s); }
        };
    }
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator> struct uuid_type : qi::grammar<Iterator, Uuid()> {
        uuid_type() : uuid_type::base_type(start) {
            using namespace qi;
    
            auto sep_  = copy(+lit('-') >> qi::attr('-'));
            auto hex2_ = copy(xdigit >> xdigit >> xdigit >> xdigit);
            auto hex4_ = copy(hex2_ >> hex2_);
            auto hex6_ = copy(hex4_ >> hex2_);
            auto fmt_  = copy(
                hex4_ >> sep_ >> hex2_ >> sep_ >> hex2_ >> sep_ >> hex2_ >> sep_ >> hex6_
            );
    
            start = as_string['{' >> fmt_ >> '}' | fmt_];
        }
      private:
        qi::rule<Iterator, Uuid()> start;
    };
    
    int main() {
        uuid_type<std::string::const_iterator> uuid_{};
    
        for (std::string const input : {
                "2bc69ead-4aba-4a39-92c0-9565f4d464b4",
                "2BC69EAD-4ABA-4A39-92C0-9565F4D464B4",
                "{2bc69ead-4aba-4a39-92c0-9565f4d464b4}",
                "{2BC69EAD-4ABA-4A39-92C0-9565F4D464B4}",
                "{2bc69ead--4aba--4a39----92c0--9565f4d464b4}",
                "{2BC69EAD--4ABA--4A39----92C0--9565F4D464B4}",
                })
        {
            Uuid uuid{};
    
            std::cout << "==== Input " << std::quoted(input) << "\n";
    
            auto f = input.begin(), l = input.end();
            if (qi::parse(f, l, uuid_ >> qi::eoi, uuid))
                std::cout << "Parsed " << uuid << "\n";
            else
                std::cout << "Parsing failed\n";
        }
    }
    

    Prints

    ==== Input "2bc69ead-4aba-4a39-92c0-9565f4d464b4"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "2BC69EAD-4ABA-4A39-92C0-9565F4D464B4"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2bc69ead-4aba-4a39-92c0-9565f4d464b4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2BC69EAD-4ABA-4A39-92C0-9565F4D464B4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2bc69ead--4aba--4a39----92c0--9565f4d464b4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4
    ==== Input "{2BC69EAD--4ABA--4A39----92C0--9565F4D464B4}"
    Parsed 2bc69ead-4aba-4a39-92c0-9565f4d464b4