parsingboostboost-spirit

Find isolated word in string with Boost spirit


I'm a newbie and maybe it's even something easy to get. But what is the best way to write a boost::spirit parser that allows you to match a word within a string only if the word is isolated?

Example:

If I would like to find the word "ma" (case insensitive) only if this is isolated.


Solution

  • Where you'd write "ma" to match "ma" anywhere, write e.g.

    Detecting distinct starts of identifiers is usually implicit in the surrounding grammar.

    Simple Demo

    Live On Coliru Using X3

    #include <boost/spirit/home/x3.hpp>
    #include <iomanip>
    #include <iostream>
    #include <string_view>
    
    void run_tests(auto p) {
        for (std::string_view input : {
                 "mare",          // -> no match
                 "altamarea",     // -> no match
                 "Roma",          // -> no match
                 "ma",            // ???
                 "no ma va bene", // -> match!
                 "Ma anche no!",  // -> match
             }) {
            auto f = begin(input), l = end(input);
            auto found = parse(f, l, boost::spirit::x3::seek[p]);
            std::cout                             //
                << std::setw(16) << quoted(input) //
                << " -> " << std::boolalpha << found << "\n";
        }
    }
    
    int main() {
        namespace x3 = boost::spirit::x3;
        auto ma      = x3::no_case["ma"];
    
        std::cout << " ------- Naive:\n";
        run_tests(ma);
        std::cout << " ------- Positive lookahead:\n";
        run_tests(ma >> &x3::space);
        std::cout << " ------- Negative lookahead:\n";
        run_tests(ma >> !x3::graph);
        std::cout << " ------- Hybrid:\n";
        run_tests(ma >> &(x3::space | x3::eoi));
    }
    

    Printing

     ------- Naive:
              "mare" -> true
         "altamarea" -> true
              "Roma" -> true
                "ma" -> true
     "no ma va bene" -> true
      "Ma anche no!" -> true
     ------- Positive lookahead:
              "mare" -> false
         "altamarea" -> false
              "Roma" -> false
                "ma" -> false
     "no ma va bene" -> true
      "Ma anche no!" -> true
     ------- Negative lookahead:
              "mare" -> false
         "altamarea" -> false
              "Roma" -> true
                "ma" -> true
     "no ma va bene" -> true
      "Ma anche no!" -> true
     ------- Hybrid:
              "mare" -> false
         "altamarea" -> false
              "Roma" -> true
                "ma" -> true
     "no ma va bene" -> true
      "Ma anche no!" -> true
    

    Demonstrating arbitrary surrounding grammar:

    Live On Coliru

    #include <boost/spirit/home/x3.hpp>
    #include <iomanip>
    #include <iostream>
    #include <string_view>
    namespace x3 = boost::spirit::x3;
    
    void run_tests(auto p) {
        for (std::string_view input : {
                 "mare",          // -> no match
                 "altamarea",     // -> no match
                 "Roma",          // -> no match
                 "ma",            // ???
                 "no ma va bene", // -> match!
                 "Ma anche no!",  // -> match
             }) {
            auto f = begin(input), l = end(input);
            auto ignored = x3::lexeme[!p >> +x3::graph];
            auto found   = phrase_parse(f, l, *ignored >> p, x3::space);
            std::cout                             //
                << std::setw(16) << quoted(input) //
                << " -> " << std::boolalpha << found << "\n";
        }
    }
    
    int main() {
        auto ma = x3::no_case["ma"];
    
        std::cout << " ------- Negative lookahead:\n";
        run_tests(x3::lexeme[ma >> !x3::graph]);
        std::cout << " ------- Hybrid:\n";
        run_tests(x3::lexeme[ma >> &(x3::space | x3::eoi)]);
    }
    

    Printing

     ------- Negative lookahead:
              "mare" -> false
         "altamarea" -> false
              "Roma" -> false
                "ma" -> true
     "no ma va bene" -> true
      "Ma anche no!" -> true
     ------- Hybrid:
              "mare" -> false
         "altamarea" -> false
              "Roma" -> false
                "ma" -> true
     "no ma va bene" -> true
      "Ma anche no!" -> true