c++boostqi

boost::spirit::qi::parse grammar not working as expected


I try to write a grammar to parse the following syntax:

// - command
// - command value0 ... valueN
// - command -arg0 ... -argN
// - command -arg0 value0 ... valueN ... -argN value0 ... valueN

I added my current grammar within a short example here

My problem:

The vector contains more entries than available values because blanks are also interpreted as values


Solution

  • It's not exactly clear how you want the grammer to function¹, but from the target data structure I get the impression things could be simplified vastly by

    1. using a skipper (see Boost spirit skipper issues for background)
    2. using automatic attribute propagation instead of phoenix (see also Boost Spirit: "Semantic actions are evil"?).

      token  = +~char_("\r\n -");
      values = +token;
      
      //
      entry  = (lexeme['-' >> token] >> -values | attr("empty") >> values);
      args   = *entry;
      
      //
      data   = skip(qi::blank) [ token >> args ];
      

    In the sample below I've used Fusion adaptation to enable automatic attribute propagation (which, at once, enables debug output with

    #define BOOST_SPIRIT_DEBUG
    

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <map>
    #include <string>
    #include <vector>
    
    // Structure stores the parsed command line information:
    struct CmdData
    {
        typedef std::string               Name;
    
        typedef std::string               ArgName;
        typedef std::string               Value;
    
        typedef std::vector<Value>        Values;  // Type defines a list of values:
        typedef std::map<ArgName, Values> Args;    // Type defines a map storing the relation between a argument and the corresponding values:
    
        Name cmd; // Stores the command name as a string.
        Args arg; // Stores the arguments and the corresponding values as strings.
    };
    
    BOOST_FUSION_ADAPT_STRUCT(CmdData, (CmdData::Name, cmd)(CmdData::Args, arg))
    
    namespace Grammar
    {
        namespace qi = boost::spirit::qi;
    
        // This class implements the grammar used to parse a command line.
        // The expected format is as follows:
        // - command
        // - command value0 ... valueN
        // - command -arg0 ... -argN
        // - command -arg0 value0 ... valueN ... -argN value0 ... valueN
        template <typename It>
        struct decode : qi::grammar<It, CmdData()>
        {
            decode() : decode::base_type(data)
            {
                using namespace qi;
    
                token  = +~char_("\r\n -");
                values = +token;
    
                //
                entry  = (lexeme['-' >> token] >> -values | attr("empty") >> values);
                args   = *entry;
    
                //
                data   = skip(qi::blank) [ token >> args ];
    
                BOOST_SPIRIT_DEBUG_NODES( (token)(values)(entry)(args)(data) )
            }
    
          private:
            qi::rule<It, CmdData()> data;
    
            // The following variables define the rules used within this grammar:
            typedef std::pair<CmdData::ArgName, CmdData::Values> Entry;
            qi::rule<It, CmdData::Values(), qi::blank_type> values;
            qi::rule<It, Entry(),           qi::blank_type> entry;
            qi::rule<It, CmdData::Args(),   qi::blank_type> args;
    
            // lexemes
            qi::rule<It, std::string()> token;
        };
    
    }   // namespace
    
    bool parse(const std::string& in)
    {
        CmdData data;
    
        // Create an instance of the used grammar:
        Grammar::decode<std::string::const_iterator> gr;
    
        // Try to parse the data stored within the stream according the grammar and store the result in the tag variable:
        bool b = boost::spirit::qi::parse(in.begin(), in.end(), gr, data);
    
        std::cout << "Parsing: '" << in << "' ok: " << std::boolalpha << b << "\n";
        if (b)
            std::cout << "Entries parsed: " << data.arg.size() << "\n";
    
        return b;
    }
    
    int main()
    {
        parse("   cmd0");
        parse("   cmd0  -23.0 value0  value1  value2");
        parse("   cmd0  -arg0  -arg1  -arg2");
        parse("   cmd0  -arg0  value0  -arg1  value0  value1  -arg2  value0  value1  value2");
    }
    

    Prints

    Parsing: '   cmd0' ok: true
    Entries parsed: 0
    Parsing: '   cmd0  -23.0 value0  value1  value2' ok: true
    Entries parsed: 1
    Parsing: '   cmd0  -arg0  -arg1  -arg2' ok: true
    Entries parsed: 3
    Parsing: '   cmd0  -arg0  value0  -arg1  value0  value1  -arg2  value0  value1  value2' ok: true
    Entries parsed: 3
    

    (with debug output disabled)


    ¹ (e.g. is -23.0 expressly an option or not)