c++boostboost-regex

Does split_regex support group?


Can I setting split_regex working based on groups instead of using lookbehind?

The code I'm using is as follows:

string data = "xyz: 111.222: k.44.4: 12345";
vector<string> data_vec;

boost::algorithm::split_regex( data_vec, data, boost::regex("(:\s*)\d"));

my expected result is:

xyz
111.222: k.44.4
12345

Solution

  • In case you are open to other solutions, one using std::regex would be:

    [Demo]

    #include <cctype>  // isdigit
    #include <fmt/ranges.h>
    #include <regex>
    #include <string>
    #include <vector>
    
    void add_token(std::vector<std::string>& tokens, const std::string& token,
        const std::string& separator) {
        if (not token.empty()) {
            if (tokens.empty() or std::isdigit(token[0])) {
                tokens.push_back(token);
            } else {
                tokens.back() += separator;
                tokens.back() += token;
            }
        }
    }
    
    auto split_regex(std::string data) {
        std::vector<std::string> tokens{};
        std::regex pattern{R"(:\s*)"};
        std::smatch matches{};
        std::string last_separator{};
        while (std::regex_search(data, matches, pattern)) {
            last_separator = matches[0];
            add_token(tokens, matches.prefix(), last_separator);
            data = matches.suffix();
        }
        add_token(tokens, data, last_separator);
        return tokens;
    }
    
    int main() {
        std::string data{ "xyz: 111.222: k.44.4: 12345" };
        fmt::print("{}", fmt::join(split_regex(data), "\n"));
    }
    
    // Outputs:
    //
    //   xyz
    //   111.222: k.44.4
    //   12345