c++boostboost-spiritboost-spirit-qiboost-spirit-karma

How do I parse datetime with boost::karma/qi?


I'm parsing log file with following format:

<line id>,<time>,<data_1>,<data_2>,<event_description>

The time is in format dd-MM-yy HH:mm:ss.fff.

I need to extract parsed time, data_1 and data_2.

Here's what I'm doing for each line:

    auto unquoted_string = lexeme[+(boost::spirit::qi::char_ - ',')];

    double data_1=-1, data_2=-1;
    boost::fusion::vector<char> datestr;

    bool r = phrase_parse(
        std::begin(line), 
        std::end(line),
        int_>>','>>unquoted_string[ref(datestr)=_1]>>',' >> double_[ref(data_1) = _1] >> ',' >> double_[ref(data_2) = _1] >>','>>unquoted_string,
        boost::spirit::qi::space
    );

Now I'm left with boost::fusion::vector<char> containing the datetime string that needs to be parsed. How do I convert it to std::string? Is there a better way to parse time within boost::karma/qi?


Solution

  • Spirit Karma is meant for generating output, not for parsing, so no you cannot use it for that.

    For a job like this I'd suggest not parsing the entire date format, but instead the general form of the line as you gave it:

    <line id>,<time>,<data_1>,<data_2>,<event_description>
    

    Let's define a recipient type:

    struct Event {
        size_t id;
        std::string date, data1, data2, description;
    };
    

    Adapt it for automatic attribute propagation:

    BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
    

    A simple rule for it:

    qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
    rule =
        qi::ulong_long >> ',' >>            // id
        qi::raw[*~qi::char_(',')] >> ',' >> // date
        qi::raw[*~qi::char_(',')] >> ',' >> // data1
        qi::raw[*~qi::char_(',')] >> ',' >> // data2
        qi::raw[*(qi::char_ - qi::eol)]     // description
        ;
    

    And here we go:

    if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
        for (auto& event : events) {
            std::cout << event << "\n----\n";
        }
    } else {
        std::cout << "Parse failed\n";
    }
    
    if (f != l) {
        std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
    }
    

    Prints: Live On Coliru

             id:11886
           date:"05/09/20 01:01:06.338053260"
          data1:"26168"
          data2:"5374"
    description:"if (_mode & full_match) {"
    
    ----
             id:30215
           date:"05/09/20 01:01:15.391796323"
          data1:"23936"
          data2:"15742"
    description:"auto const& shape = shapes.at(id);"
    
    ----
             id:7386
           date:"05/09/20 01:01:15.463584888"
          data1:"26798"
          data2:"13486"
    description:"into.emplace_back();"
    
    ----
             id:24377
           date:"05/09/20 01:01:15.531308865"
          data1:"11735"
          data2:"15257"
    description:"auto pattern = _formats.begin();"
    
    ----
             id:11744
           date:"05/09/20 01:01:15.590114069"
          data1:"3451"
          data2:"17645"
    description:"auto bounds = field.bounds();"
    
    ----
             id:20148
           date:"05/09/20 01:01:15.652360522"
          data1:"12228"
          data2:"29033"
    description:"if ((_mode & mru) && pattern != _formats.begin()) {"
    
    ----
             id:9196
           date:"05/09/20 01:01:15.699402632"
          data1:"6639"
          data2:"27448"
    description:"#include <boost/archive/text_oarchive.hpp>"
    
    ----
             id:7341
           date:"05/09/20 01:01:15.754603212"
          data1:"21142"
          data2:"30650"
    description:"namespace attrs = boost::log::attributes;"
    
    ----
             id:14990
           date:"05/09/20 01:01:15.802583615"
          data1:"18421"
          data2:"10623"
    description:"BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {"
    
    ----
             id:19490
           date:"05/09/20 01:01:15.860306470"
          data1:"2883"
          data2:"848"
    description:"void Server::getNextSamples(std::vector<sf::Int16> oSamples) {"
    
    ----
             id:30360
           date:"05/09/20 01:01:15.918505128"
          data1:"4369"
          data2:"1998"
    description:"case shape::circle:  return os << \"circle\";"
    
    ----
    Remaining unparsed: "
    "
    

    Full Listing

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/adapted/struct.hpp>
    #include <iostream>
    #include <iomanip>
    
    struct Event {
        size_t id;
        std::string date, data1, data2, description;
    };
    
    // for parsing
    BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
    
    // for debug output
    static std::ostream& operator<<(std::ostream& os, Event const& evt) {
        os << "         id:" << evt.id << "\n";
        os << "       date:" << std::quoted(evt.date) << "\n";
        os << "      data1:" << std::quoted(evt.data1) << "\n";
        os << "      data2:" << std::quoted(evt.data2) << "\n";
        os << "description:" << std::quoted(evt.description) << "\n";
        return os;
    }
    
    int main() {
        //<line id>,<time>,<data_1>,<data_2>,<event_description>
        std::istringstream iss(R"(11886,05/09/20 01:01:06.338053260,26168,5374,            if (_mode & full_match) {
    30215,05/09/20 01:01:15.391796323,23936,15742,                    auto const& shape = shapes.at(id);
    7386,05/09/20 01:01:15.463584888,26798,13486,        into.emplace_back();
    24377,05/09/20 01:01:15.531308865,11735,15257,        auto pattern = _formats.begin();
    11744,05/09/20 01:01:15.590114069,3451,17645,        auto bounds = field.bounds();
    20148,05/09/20 01:01:15.652360522,12228,29033,            if ((_mode & mru) && pattern != _formats.begin()) {
    9196,05/09/20 01:01:15.699402632,6639,27448,#include <boost/archive/text_oarchive.hpp>
    7341,05/09/20 01:01:15.754603212,21142,30650,namespace attrs = boost::log::attributes;
    14990,05/09/20 01:01:15.802583615,18421,10623,BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {
    19490,05/09/20 01:01:15.860306470,2883,848,void Server::getNextSamples(std::vector<sf::Int16> oSamples) {
    30360,05/09/20 01:01:15.918505128,4369,1998,            case shape::circle:  return os << "circle";
    )");
    
        boost::spirit::istream_iterator f(iss >> std::noskipws), l;
    
        std::vector<Event> events;
    
        namespace qi = boost::spirit::qi;
    
        qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
        rule =
            qi::ulong_long >> ',' >>            // id
            qi::raw[*~qi::char_(',')] >> ',' >> // date
            qi::raw[*~qi::char_(',')] >> ',' >> // data1
            qi::raw[*~qi::char_(',')] >> ',' >> // data2
            qi::raw[*(qi::char_ - qi::eol)]     // description
            ;
    
        if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
            for (auto& event : events) {
                std::cout << event << "\n----\n";
            }
        } else {
            std::cout << "Parse failed\n";
        }
    
        if (f != l) {
            std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
        }
    }
    

    BONUS

    To actually parse the date-times, I'd suggest using Boost DateTime. Alternatively, look here for something based on strptime that's really versatile: C++ boost date_input_facet seems to parse dates unexpectedly with incorrect formats passed to the facet constructor