Can I setting split_regex working based on groups instead of using lookbehind?
The code I'm using is as follows:
string data = "xyz: 111.222: k.44.4: 12345";
vector<string> data_vec;
boost::algorithm::split_regex( data_vec, data, boost::regex("(:\s*)\d"));
my expected result is:
xyz
111.222: k.44.4
12345
In case you are open to other solutions, one using std::regex
would be:
:\s*
separator.#include <cctype> // isdigit
#include <fmt/ranges.h>
#include <regex>
#include <string>
#include <vector>
void add_token(std::vector<std::string>& tokens, const std::string& token,
const std::string& separator) {
if (not token.empty()) {
if (tokens.empty() or std::isdigit(token[0])) {
tokens.push_back(token);
} else {
tokens.back() += separator;
tokens.back() += token;
}
}
}
auto split_regex(std::string data) {
std::vector<std::string> tokens{};
std::regex pattern{R"(:\s*)"};
std::smatch matches{};
std::string last_separator{};
while (std::regex_search(data, matches, pattern)) {
last_separator = matches[0];
add_token(tokens, matches.prefix(), last_separator);
data = matches.suffix();
}
add_token(tokens, data, last_separator);
return tokens;
}
int main() {
std::string data{ "xyz: 111.222: k.44.4: 12345" };
fmt::print("{}", fmt::join(split_regex(data), "\n"));
}
// Outputs:
//
// xyz
// 111.222: k.44.4
// 12345