c++c++17boost-spirit

Updating a boost::spirit parser


In the past I made a boost::spirit parser to read a log, where only one type of line contained useful information while the rest could be discarded.

The format of the line is [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000; and the information to stored is located after the equal to symbol (=) and consist in ";" separated data standing for id(uint);position(double);telegram(hexchars); (by the way, the date is discardable).

For the new log format now it would be possible to include one or more data strings after the equal symbol, as in [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;

I must update next code to comply with this new multi-data per line request, but I am unable to do it after trying several % and * parser combinations:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace structs {
    struct Message
    {
        unsigned id;
        double position;
        std::string hex;
    };

    using Messages = std::vector<Message>;
}

namespace qi = boost::spirit::qi;

namespace parser {
class log
{
    using It = boost::spirit::istream_iterator;

public:
    log()
    {
        using namespace qi;
        using namespace structs;
        using boost::phoenix::push_back;

        tcl = (omit[raw[*~char_('>')]] >> '>'
            >> "insert ="
            >> uint_ >> ';' >> double_ >> ';' >> lexeme[raw[+xdigit]] >> ';')[fill(_1, _2, _3, _val)];

        ignore = *~char_("\r\n");

        start = skip(blank)[(tcl[push_back(_val, _1)] | ignore) % eol];
    }

    structs::Messages read(const std::string& file)
    {
        structs::Messages messages;

        std::stringstream in{ file };
        //std::ifstream in(ss, std::ios_base::in);
        in >> std::noskipws;//No white space skipping

        auto ok = qi::parse(boost::spirit::istream_iterator{ in }, boost::spirit::istream_iterator{}, start, messages);
        return ok ? messages : structs::Messages{};
    }

private:
    struct deferred_fill
    {
        void operator() (unsigned id, double position, boost::iterator_range<It> const& tlg, structs::Message& message) const
        {
            message.id       = id;
            message.position = position;
            message.hex.assign(tlg.begin(), tlg.end());
        }
    };

    qi::rule<It, qi::blank_type>   ignore;
    qi::rule<It, structs::Messages> start;
    qi::rule<It, structs::Message, qi::blank_type> tcl;
    boost::phoenix::function<deferred_fill> fill;
};

}

auto file1{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)" };

auto file2{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)" };

int main()
{
    parser::log log;
    auto messages1 = log.read(file1);
    if (messages1.size() != 3) return 1;// Error parsing!!!

    auto messages2 = log.read(file2);
    if (messages2.size() != 6) return 1;// Error parsing!!!

    return 0;
}

The code must comply with c++17 (not further, please) and avoid parsing errors under old (file1 string) and new (file2 string) log formats.

Thank you in advance


Solution

  • Review and simplification, making it self-contained: Live On Coliru, printing:

    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
     - { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
     - { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
    

    Now, let's extract a msg rule for clarity:

    qi::rule<It, Message(), qi::blank_type>  msg;
    msg = (qi::uint_ >> ';'                            //
           >> qi::double_ >> ';'                       //
           >> qi::raw[qi::lexeme[+qi::xdigit]] >> ';') //
        [fill(_1, _2, _3, _val)];                      //
    

    Now it becomes a lot easier to see how to write the tcl rule to return Messages() instead of Message():

    qi::rule<It, Messages(), qi::blank_type> tcl;
    tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg[push_back(_val, _1)];
    

    Even better, don't state the default propagation:

    tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg;
    

    Now the entire demo reads:

    Live On Coliru

    #include <boost/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <fmt/ranges.h>
    #include <sstream>
    
    namespace structs {
        struct Message {
            unsigned id;
            double position;
            std::string hex;
        };
    
        using Messages = std::vector<Message>;
    }
    
    namespace parser {
        namespace qi = boost::spirit::qi;
        using namespace structs;
    
        class log {
            using It = boost::spirit::istream_iterator;
    
          public:
            log() {
                using namespace qi::labels;
    
                msg = (qi::uint_ >> ';'                            //
                       >> qi::double_ >> ';'                       //
                       >> qi::raw[qi::lexeme[+qi::xdigit]] >> ';') //
                    [fill(_1, _2, _3, _val)];                      //
    
                tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg;
    
                ignore = *~qi::char_("\r\n");
                start  = qi::skip(qi::blank)[ (tcl | ignore) % qi::eol ];
            }
    
            Messages read(std::istream& in) const {
                Messages r;
    
                in >> std::noskipws; // No white space skipping
    
                auto ok = parse(boost::spirit::istream_iterator{in}, {}, start, r);
                if (!ok)
                    r.clear();
                return r;
            }
    
          private:
            struct deferred_fill {
                void operator()(unsigned id, double position, boost::iterator_range<It> tlg, Message& m) const {
                    m = {id, position, std::string(tlg.begin(), tlg.end())};
                }
            };
    
            qi::rule<It, qi::blank_type>             ignore;
            qi::rule<It, Messages()>                 start;
            qi::rule<It, Messages(), qi::blank_type> tcl;
            qi::rule<It, Message(), qi::blank_type>  msg;
            boost::phoenix::function<deferred_fill>  fill;
        };
    } // namespace parser
    
    static constexpr auto file1{
        R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
    [2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
    [2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
    [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
    [2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
    [2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
    [2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
    
    static constexpr auto file2{
        R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
    [2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
    [2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
    [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
    [2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
    [2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
    [2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
    
    template <> struct fmt::formatter<structs::Message> : fmt::formatter<std::string> {
        template <typename FormatContext> auto format(structs::Message const& m, FormatContext& ctx) const {
            return fmt::format_to(ctx.out(), "{{ id: {}, position: {}, hex: {} }}", m.id, m.position, m.hex);
        }
    };
    
    int main() {
        parser::log const p;
        for (auto txt : {file1, file2}) {
            std::istringstream s{txt};
            auto messages = p.read(s);
    
            fmt::print("Parsed\n - {}\n", fmt::join(messages, "\n - "));
        }
    }
    

    Printing

    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
     - { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
     - { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
     - { id: 2, position: 253.9, hex: A0027FC384DF40010C8092A041FF83FFF }
     - { id: 3, position: 449.9, hex: A1027FC38F361210BF080007DF }
     - { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
     - { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
     - { id: 10, position: 2261.9, hex: A0127FC384E3C00115412C80008B1007F4 }
    

    Bonus Modernizations/Speed-ups

    Now since you already have only stateless rules and almost no automatic attribute propagation, your grammar will be a trivial one to migrate to Spirit X3 if you wish to cut down on the compile times;

    Down 26 lines of code:

    Live On Coliru

    #include <boost/phoenix/fusion/at.hpp>
    #include <boost/spirit/home/x3.hpp>
    #include <boost/spirit/include/support_istream_iterator.hpp>
    #include <fmt/ranges.h>
    #include <sstream>
    
    namespace structs {
        struct Message {
            unsigned id;
            double position;
            std::string hex;
        };
    
        using Messages = std::vector<Message>;
    }
    
    static inline structs::Messages readLog(std::istream& in) {
        namespace x3 = boost::spirit::x3;
        structs::Messages r;
        auto              fill = [&r](auto&& ctx) {
            auto& a = _attr(ctx);
            using boost::phoenix::at_c;
            r.push_back({at_c<0>(a), at_c<1>(a), at_c<2>(a)});
        };
    
        auto msg = (x3::uint_ >> ';'                   //
                    >> x3::double_ >> ';'              //
                    >> x3::lexeme[+x3::xdigit] >> ';') //
            [fill];                                    //
    
        auto tcl    = x3::seek['>'] >> "insert =" >> +msg;
        auto ignore = *~x3::char_("\r\n");
        auto start  = (tcl | ignore) % x3::eol;
    
        using It = boost::spirit::istream_iterator;
        in >> std::noskipws; // No white space skipping
    
        if (!phrase_parse(It{in}, {}, start, x3::blank))
            r.clear();
        return r;
    }
    
    static constexpr auto file1{
        R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
    [2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
    [2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
    [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
    [2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
    [2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
    [2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
    
    static constexpr auto file2{
        R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
    [2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
    [2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
    [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
    [2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
    [2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id:  command: free
    [2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
    [2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
    
    template <> struct fmt::formatter<structs::Message> : fmt::formatter<std::string> {
        template <typename FormatContext> auto format(structs::Message const& m, FormatContext& ctx) const {
            return fmt::format_to(ctx.out(), "{{ id: {}, position: {}, hex: {} }}", m.id, m.position, m.hex);
        }
    };
    
    int main() {
        for (auto txt : {file1, file2}) {
            std::istringstream s{txt};
            fmt::print("Parsed\n - {}\n", fmt::join(readLog(s), "\n - "));
        }
    }
    

    Printing

    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
     - { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
     - { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
    Parsed
     - { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
     - { id: 2, position: 253.9, hex: A0027FC384DF40010C8092A041FF83FFF }
     - { id: 3, position: 449.9, hex: A1027FC38F361210BF080007DF }
     - { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
     - { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
     - { id: 10, position: 2261.9, hex: A0127FC384E3C00115412C80008B1007F4 }