In the past I made a boost::spirit
parser to read a log, where only one type of line contained useful information while the rest could be discarded.
The format of the line is [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
and the information to stored is located after the equal to symbol (=) and consist in ";" separated data standing for id(uint);position(double);telegram(hexchars);
(by the way, the date is discardable).
For the new log format now it would be possible to include one or more data strings after the equal symbol, as in [2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
I must update next code to comply with this new multi-data per line request, but I am unable to do it after trying several % and * parser combinations:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace structs {
struct Message
{
unsigned id;
double position;
std::string hex;
};
using Messages = std::vector<Message>;
}
namespace qi = boost::spirit::qi;
namespace parser {
class log
{
using It = boost::spirit::istream_iterator;
public:
log()
{
using namespace qi;
using namespace structs;
using boost::phoenix::push_back;
tcl = (omit[raw[*~char_('>')]] >> '>'
>> "insert ="
>> uint_ >> ';' >> double_ >> ';' >> lexeme[raw[+xdigit]] >> ';')[fill(_1, _2, _3, _val)];
ignore = *~char_("\r\n");
start = skip(blank)[(tcl[push_back(_val, _1)] | ignore) % eol];
}
structs::Messages read(const std::string& file)
{
structs::Messages messages;
std::stringstream in{ file };
//std::ifstream in(ss, std::ios_base::in);
in >> std::noskipws;//No white space skipping
auto ok = qi::parse(boost::spirit::istream_iterator{ in }, boost::spirit::istream_iterator{}, start, messages);
return ok ? messages : structs::Messages{};
}
private:
struct deferred_fill
{
void operator() (unsigned id, double position, boost::iterator_range<It> const& tlg, structs::Message& message) const
{
message.id = id;
message.position = position;
message.hex.assign(tlg.begin(), tlg.end());
}
};
qi::rule<It, qi::blank_type> ignore;
qi::rule<It, structs::Messages> start;
qi::rule<It, structs::Message, qi::blank_type> tcl;
boost::phoenix::function<deferred_fill> fill;
};
}
auto file1{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)" };
auto file2{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)" };
int main()
{
parser::log log;
auto messages1 = log.read(file1);
if (messages1.size() != 3) return 1;// Error parsing!!!
auto messages2 = log.read(file2);
if (messages2.size() != 6) return 1;// Error parsing!!!
return 0;
}
The code must comply with c++17 (not further, please) and avoid parsing errors under old (file1 string) and new (file2 string) log formats.
Thank you in advance
Review and simplification, making it self-contained: Live On Coliru, printing:
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
- { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
- { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
Now, let's extract a msg
rule for clarity:
qi::rule<It, Message(), qi::blank_type> msg;
msg = (qi::uint_ >> ';' //
>> qi::double_ >> ';' //
>> qi::raw[qi::lexeme[+qi::xdigit]] >> ';') //
[fill(_1, _2, _3, _val)]; //
Now it becomes a lot easier to see how to write the tcl
rule to return Messages()
instead of Message()
:
qi::rule<It, Messages(), qi::blank_type> tcl;
tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg[push_back(_val, _1)];
Even better, don't state the default propagation:
tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg;
Now the entire demo reads:
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fmt/ranges.h>
#include <sstream>
namespace structs {
struct Message {
unsigned id;
double position;
std::string hex;
};
using Messages = std::vector<Message>;
}
namespace parser {
namespace qi = boost::spirit::qi;
using namespace structs;
class log {
using It = boost::spirit::istream_iterator;
public:
log() {
using namespace qi::labels;
msg = (qi::uint_ >> ';' //
>> qi::double_ >> ';' //
>> qi::raw[qi::lexeme[+qi::xdigit]] >> ';') //
[fill(_1, _2, _3, _val)]; //
tcl = qi::omit[*~qi::char_('>') >> '>'] >> "insert =" >> +msg;
ignore = *~qi::char_("\r\n");
start = qi::skip(qi::blank)[ (tcl | ignore) % qi::eol ];
}
Messages read(std::istream& in) const {
Messages r;
in >> std::noskipws; // No white space skipping
auto ok = parse(boost::spirit::istream_iterator{in}, {}, start, r);
if (!ok)
r.clear();
return r;
}
private:
struct deferred_fill {
void operator()(unsigned id, double position, boost::iterator_range<It> tlg, Message& m) const {
m = {id, position, std::string(tlg.begin(), tlg.end())};
}
};
qi::rule<It, qi::blank_type> ignore;
qi::rule<It, Messages()> start;
qi::rule<It, Messages(), qi::blank_type> tcl;
qi::rule<It, Message(), qi::blank_type> msg;
boost::phoenix::function<deferred_fill> fill;
};
} // namespace parser
static constexpr auto file1{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
static constexpr auto file2{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
template <> struct fmt::formatter<structs::Message> : fmt::formatter<std::string> {
template <typename FormatContext> auto format(structs::Message const& m, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{{ id: {}, position: {}, hex: {} }}", m.id, m.position, m.hex);
}
};
int main() {
parser::log const p;
for (auto txt : {file1, file2}) {
std::istringstream s{txt};
auto messages = p.read(s);
fmt::print("Parsed\n - {}\n", fmt::join(messages, "\n - "));
}
}
Printing
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
- { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
- { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
- { id: 2, position: 253.9, hex: A0027FC384DF40010C8092A041FF83FFF }
- { id: 3, position: 449.9, hex: A1027FC38F361210BF080007DF }
- { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
- { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
- { id: 10, position: 2261.9, hex: A0127FC384E3C00115412C80008B1007F4 }
Now since you already have only stateless rules and almost no automatic attribute propagation, your grammar will be a trivial one to migrate to Spirit X3 if you wish to cut down on the compile times;
Down 26 lines of code:
#include <boost/phoenix/fusion/at.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <fmt/ranges.h>
#include <sstream>
namespace structs {
struct Message {
unsigned id;
double position;
std::string hex;
};
using Messages = std::vector<Message>;
}
static inline structs::Messages readLog(std::istream& in) {
namespace x3 = boost::spirit::x3;
structs::Messages r;
auto fill = [&r](auto&& ctx) {
auto& a = _attr(ctx);
using boost::phoenix::at_c;
r.push_back({at_c<0>(a), at_c<1>(a), at_c<2>(a)});
};
auto msg = (x3::uint_ >> ';' //
>> x3::double_ >> ';' //
>> x3::lexeme[+x3::xdigit] >> ';') //
[fill]; //
auto tcl = x3::seek['>'] >> "insert =" >> +msg;
auto ignore = *~x3::char_("\r\n");
auto start = (tcl | ignore) % x3::eol;
using It = boost::spirit::istream_iterator;
in >> std::noskipws; // No white space skipping
if (!phrase_parse(It{in}, {}, start, x3::blank))
r.clear();
return r;
}
static constexpr auto file1{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
static constexpr auto file2{
R"([2025-06-06 09:33:30.002155] - 0.000 s => begins to load XML file
[2025-06-06 09:33:30.250151] - 0.000 s => Received -> id: 1
[2025-06-06 09:33:30.253154] - 0.000 s => End initial Mnemonics
[2025-06-06 09:33:36.163154] - 0.020 s => insert = 1;250.9;A0127FC384DF400115406C800011806C031A0000;2;253.9;A0027FC384DF40010C8092A041FF83FFF;3;449.9;A1027FC38F361210BF080007DF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> id: 2
[2025-06-06 09:35:05.444160] - 89.360 s => insert = 7;1655.9;A0027FC384E240010C8092A041FF83FFF;
[2025-06-06 09:33:36.164151] - 0.020 s => Received -> element_id: 1 connection_id: command: free
[2025-06-06 09:35:54.648162] - 138.560 s => insert = 9;2258.9;A002054384E3C0010C8092A041;10;2261.9;A0127FC384E3C00115412C80008B1007F4;
[2025-06-06 09:36:07.028155] - 150.940 s => Received -> Value: 1)"};
template <> struct fmt::formatter<structs::Message> : fmt::formatter<std::string> {
template <typename FormatContext> auto format(structs::Message const& m, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{{ id: {}, position: {}, hex: {} }}", m.id, m.position, m.hex);
}
};
int main() {
for (auto txt : {file1, file2}) {
std::istringstream s{txt};
fmt::print("Parsed\n - {}\n", fmt::join(readLog(s), "\n - "));
}
}
Printing
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
- { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
- { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
Parsed
- { id: 1, position: 250.9, hex: A0127FC384DF400115406C800011806C031A0000 }
- { id: 2, position: 253.9, hex: A0027FC384DF40010C8092A041FF83FFF }
- { id: 3, position: 449.9, hex: A1027FC38F361210BF080007DF }
- { id: 7, position: 1655.9, hex: A0027FC384E240010C8092A041FF83FFF }
- { id: 9, position: 2258.9, hex: A002054384E3C0010C8092A041 }
- { id: 10, position: 2261.9, hex: A0127FC384E3C00115412C80008B1007F4 }