c++boostboost-spiritboost-spirit-qiboost-spirit-lex

conversion to token_def's value type


When I define in lexical analyzer

typedef boost::mpl::vector<std::string, unsigned int, bool>
            token_value_types;
lex::token_def<unsigned int> lit_uint("[0-9]+", token_ids::lit_uint);

and then use it in some grammar as

primary_expr =
        lexer.lit_uint
    |   lexer.true_or_false
    |   identifier
    |   '(' > expr > ')'
    ;

so how the string is converted to the value of correct token value type (unsigned int in this case)? What happens if you specify a custom type or floating-point type as a token value type? Where is the presence of conversion routine (I think something like boost::iterator_range to double conversion)?


Solution

  • The way to accomplish what you want is specializing assign_to_attribute_from_iterators. You can find an example with a custom type here. If you use double as the attribute in your token definition, spirit internally uses qi::double_ to parse the value. (You can find here the specialization for double and the rest of the fundamental types).

    Silly example where I define the real token as anything that is not a , or a ; to show the parsing of doubles.

    #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;
    namespace mpl = boost::mpl;
    
    
    template <typename Lexer>
    struct my_lexer : lex::lexer<Lexer>
    {
        my_lexer()
        {
            real = "[^,;]*"; //anything that is not a , or ; is a real number
    
            this->self=lex::token_def<lex::omit>(',')| ';';
    
            this->self.add(real);
        }
        lex::token_def<double> real;
    };
    
    
    int main()
    {
        // the token type needs to know the iterator type of the underlying
        // input and the set of used token value types
        typedef lex::lexertl::token<std::string::iterator,
            mpl::vector<double> > token_type;
    
        // use actor_lexer<> here if your token definitions have semantic
        // actions
        typedef lex::lexertl::lexer<token_type> lexer_type;
    
        // this is the iterator exposed by the lexer, we use this for parsing
        typedef lexer_type::iterator_type iterator_type;
    
        // create a lexer instance
        std::string input("3.4,2,.4,4.,infinity,NaN,-3.8,1e2,1.5E3;");
        std::string::iterator s = input.begin();
    
        my_lexer<lexer_type> lex;
        iterator_type b = lex.begin(s, input.end());
    
        // use the embedded token_def as a parser, it exposes its token value type
        // as its parser attribute type
        std::vector<double> result;
        qi::rule<iterator_type,double()> number= lex.real;
        qi::rule<iterator_type,std::vector<double>()> sequence= number >> *(',' >> number) >> ';';
        BOOST_SPIRIT_DEBUG_NODE(number);
        BOOST_SPIRIT_DEBUG_NODE(sequence);
        if (!qi::parse(b, lex.end(), sequence, result))
        {
            std::cerr << "Parsing failed!" << std::endl;
            return -1;
        }
    
        std::cout << "Parsing succeeded:"  << std::endl;
        for(auto& n : result)
            std::cout << n << std::endl;
        return 0;
    }
    

    Edit: I have very little experience with regular expressions, but I believe that the token definition equivalent to the grammar linked in the comment (that I believe should have fractional_constant >> -exponent_part instead of fractional_constant >> !exponent_part) would be:

    template <typename Lexer>
    struct my_lexer : lex::lexer<Lexer>
    {
        my_lexer()
        {
            this->self.add_pattern("SIGN","[\\+\\-]");
            this->self.add_pattern("NAN","(1\\.0#)?(?i:nan)(\\([^\\)]\\))?");
            this->self.add_pattern("INF","(?i:inf(inity)?)");
            this->self.add_pattern("DIGIT","[0-9]");
            this->self.add_pattern("FRACT_CONST","{DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.?");
            this->self.add_pattern("EXP","[eE]{SIGN}?{DIGIT}+");
    
            real = "{SIGN}?({NAN}|{INF}|{FRACT_CONST}{EXP}?|{DIGIT}+{EXP})";
    
            this->self=lex::token_def<lex::omit>(',')| ';';
    
            this->self.add(real);
        }
        lex::token_def<double> real;
    };