I'm parsing log file with following format:
<line id>,<time>,<data_1>,<data_2>,<event_description>
The time is in format dd-MM-yy HH:mm:ss.fff
.
I need to extract parsed time, data_1 and data_2.
Here's what I'm doing for each line:
auto unquoted_string = lexeme[+(boost::spirit::qi::char_ - ',')];
double data_1=-1, data_2=-1;
boost::fusion::vector<char> datestr;
bool r = phrase_parse(
std::begin(line),
std::end(line),
int_>>','>>unquoted_string[ref(datestr)=_1]>>',' >> double_[ref(data_1) = _1] >> ',' >> double_[ref(data_2) = _1] >>','>>unquoted_string,
boost::spirit::qi::space
);
Now I'm left with boost::fusion::vector<char>
containing the datetime string that needs to be parsed. How do I convert it to std::string? Is there a better way to parse time within boost::karma/qi?
Spirit Karma is meant for generating output, not for parsing, so no you cannot use it for that.
For a job like this I'd suggest not parsing the entire date format, but instead the general form of the line as you gave it:
<line id>,<time>,<data_1>,<data_2>,<event_description>
Let's define a recipient type:
struct Event {
size_t id;
std::string date, data1, data2, description;
};
Adapt it for automatic attribute propagation:
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
A simple rule for it:
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
And here we go:
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
Prints: Live On Coliru
id:11886
date:"05/09/20 01:01:06.338053260"
data1:"26168"
data2:"5374"
description:"if (_mode & full_match) {"
----
id:30215
date:"05/09/20 01:01:15.391796323"
data1:"23936"
data2:"15742"
description:"auto const& shape = shapes.at(id);"
----
id:7386
date:"05/09/20 01:01:15.463584888"
data1:"26798"
data2:"13486"
description:"into.emplace_back();"
----
id:24377
date:"05/09/20 01:01:15.531308865"
data1:"11735"
data2:"15257"
description:"auto pattern = _formats.begin();"
----
id:11744
date:"05/09/20 01:01:15.590114069"
data1:"3451"
data2:"17645"
description:"auto bounds = field.bounds();"
----
id:20148
date:"05/09/20 01:01:15.652360522"
data1:"12228"
data2:"29033"
description:"if ((_mode & mru) && pattern != _formats.begin()) {"
----
id:9196
date:"05/09/20 01:01:15.699402632"
data1:"6639"
data2:"27448"
description:"#include <boost/archive/text_oarchive.hpp>"
----
id:7341
date:"05/09/20 01:01:15.754603212"
data1:"21142"
data2:"30650"
description:"namespace attrs = boost::log::attributes;"
----
id:14990
date:"05/09/20 01:01:15.802583615"
data1:"18421"
data2:"10623"
description:"BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {"
----
id:19490
date:"05/09/20 01:01:15.860306470"
data1:"2883"
data2:"848"
description:"void Server::getNextSamples(std::vector<sf::Int16> oSamples) {"
----
id:30360
date:"05/09/20 01:01:15.918505128"
data1:"4369"
data2:"1998"
description:"case shape::circle: return os << \"circle\";"
----
Remaining unparsed: "
"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <iostream>
#include <iomanip>
struct Event {
size_t id;
std::string date, data1, data2, description;
};
// for parsing
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
// for debug output
static std::ostream& operator<<(std::ostream& os, Event const& evt) {
os << " id:" << evt.id << "\n";
os << " date:" << std::quoted(evt.date) << "\n";
os << " data1:" << std::quoted(evt.data1) << "\n";
os << " data2:" << std::quoted(evt.data2) << "\n";
os << "description:" << std::quoted(evt.description) << "\n";
return os;
}
int main() {
//<line id>,<time>,<data_1>,<data_2>,<event_description>
std::istringstream iss(R"(11886,05/09/20 01:01:06.338053260,26168,5374, if (_mode & full_match) {
30215,05/09/20 01:01:15.391796323,23936,15742, auto const& shape = shapes.at(id);
7386,05/09/20 01:01:15.463584888,26798,13486, into.emplace_back();
24377,05/09/20 01:01:15.531308865,11735,15257, auto pattern = _formats.begin();
11744,05/09/20 01:01:15.590114069,3451,17645, auto bounds = field.bounds();
20148,05/09/20 01:01:15.652360522,12228,29033, if ((_mode & mru) && pattern != _formats.begin()) {
9196,05/09/20 01:01:15.699402632,6639,27448,#include <boost/archive/text_oarchive.hpp>
7341,05/09/20 01:01:15.754603212,21142,30650,namespace attrs = boost::log::attributes;
14990,05/09/20 01:01:15.802583615,18421,10623,BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {
19490,05/09/20 01:01:15.860306470,2883,848,void Server::getNextSamples(std::vector<sf::Int16> oSamples) {
30360,05/09/20 01:01:15.918505128,4369,1998, case shape::circle: return os << "circle";
)");
boost::spirit::istream_iterator f(iss >> std::noskipws), l;
std::vector<Event> events;
namespace qi = boost::spirit::qi;
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
To actually parse the date-times, I'd suggest using Boost DateTime. Alternatively, look here for something based on strptime
that's really versatile: C++ boost date_input_facet seems to parse dates unexpectedly with incorrect formats passed to the facet constructor