c++boost-spirit

Is it possible to get the string that was parsed?


i got an working parser+AST based on Spirit that parses parameter initalization - everything works fine

sadly there is some (old) database that wants parts of the AST as the original string (im on my way to replace that thing but want to make a step-by-step change)

(trivialized) example: a=10,b=30,c=A *B +10,d={1,A*4,3}*4

KeyValue-Rule = Identifier >> '=' >> Number|Expression

Expression is a complete Sub-AST with +-* and Numbers (in this trivialized example)

std::vector<KeyValue> Parameters;

the database wants the "values" of the parameters as string for c its A *B +10- at best with the exact string (with blanks etc.)

is that something Spirit can preserve (or the begin/end positions)?

the example above is a super trivialized version of my real parser/AST, expressions can be deeply leveld by () etc. - so its not a simple split by , thing - just to explain what i try to reach

something like: https://astexplorer.net/ - i need start/end-points of my logical AST parts in the string


Solution

  • Yes. You could use semantic actions:

    KeyValue-Rule = raw [ (Identifier >> '=' >> Number|Expression)[assign_ast] ][assign_raw_input_sequence];
    

    You can use automatic attribute propagation in combination with semantic actions, using the %= rule initialization instead of regular = assignment.

    You can also potentially automate things by using on_success "error" handlers. See some examples of that in my older answers: https://stackoverflow.com/search?q=user%3A85371+qi+on_success

    Imagine A Grammar

    Coming up with the simplest possible grammar that doesn't have unary operators or operator precedence:

    Live On Coliru

    // #define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/include/io.hpp>
    #include <boost/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    namespace qi = boost::spirit::qi;
    
    // simple expression AST
    namespace Ast {
        using Number     = double;
        using Variable   = std::string;
        using SimpleExpr = boost::variant<Number, Variable>;
    
        struct BinaryOp;
        struct Expr;
        using ExprV = boost::variant<SimpleExpr, boost::recursive_wrapper<BinaryOp>>;
    
        struct Expr : ExprV {
            using ExprV::ExprV;
            using ExprV::operator=;
        };
    
        struct BinaryOp {
            char op;
            Expr left, right;
        };
    
        using boost::fusion::operator<<;
    } // namespace Ast
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::BinaryOp, left, op, right)
    
    // simple expression grammar
    template <typename It> struct SimpleExprGrammar : qi::grammar<It, Ast::Expr()> {
        SimpleExprGrammar() : SimpleExprGrammar::base_type(start) {
            using namespace qi;
    
            start   = skip(space)[expr_ >> eoi];
    
            binary_ = simple_ >> char_("+*/-") >> expr_;
            expr_   = binary_ | simple_;
            simple_ = double_ | var_ | ('(' >> expr_ >> ')');
            var_    = +alpha;
    
            BOOST_SPIRIT_DEBUG_NODES((expr_)(binary_)(binary_)(simple_)(var_))
        }
    
      private:
        qi::rule<It, Ast::Expr()> start;
        using Skipper = qi::space_type;
        qi::rule<It, Ast::Expr(), Skipper>     expr_;
        qi::rule<It, Ast::Expr(), Skipper>     simple_;
        qi::rule<It, Ast::BinaryOp(), Skipper> binary_;
        // lexemes
        qi::rule<It, Ast::Variable()>   var_;
    };
    
    int main() {
        SimpleExprGrammar<std::string_view::const_iterator> const p;
    
        for (std::string_view input : {
                 "1 + 2",
                 "1 + 2 * 3",
                 "(1 + 2) * 3",
                 "1 + 2 * (3 + 4)",
                 "1 + 2 * (3 + 4) / 5",
                 "1 + 2 * (3 + 4) / 5 - 6",
             }) {
            std::cout << " -- Parsing: " << quoted(input) << "\n";
            auto f = begin(input), l = end(input);
    
            if (Ast::Expr expr; qi::parse(f, l, p, expr))
                std::cout << "   -> " << expr << "\n";
            else
                std::cout << "   FAILED\n";
        }
    }
    

    Prints

     -- Parsing: "1 + 2"
       -> (1 + 2)
     -- Parsing: "1 + 2 * 3"
       -> (1 + (2 * 3))
     -- Parsing: "(1 + 2) * 3"
       -> ((1 + 2) * 3)
     -- Parsing: "1 + 2 * (3 + 4)"
       -> (1 + (2 * (3 + 4)))
     -- Parsing: "1 + 2 * (3 + 4) / 5"
       -> (1 + (2 * ((3 + 4) / 5)))
     -- Parsing: "1 + 2 * (3 + 4) / 5 - 6"
       -> (1 + (2 * ((3 + 4) / (5 - 6))))
    

    Adding Source Annotation

    Extend Expr:

    struct Expr : ExprV {
        using ExprV::ExprV;
        using ExprV::operator=;
    
        std::string_view source;
    
        ExprV&       base() { return *this; }
        ExprV const& base() const { return *this; }
    };
    

    source will contain our source. Craft a semantic action to set the source from raw input iterators:

    phx::function source_ = [](Ast::Expr& ast, std::string_view sv) { ast.source = sv; };
    auto set_raw_         = source_(_val, phx::construct<std::string_view>(phx::begin(_1), phx::end(_1)));
    

    Sprinkle semantic actions across the rules that should propagate both the parsed Ast and the source:

        expr_   = raw[(binary_[_val = _1] | simple_[_val = _1])][set_raw_];
        simple_ = raw[                         //
            double_[_val = _1]                 //
            | var_[_val = _1]                  //
            | ('(' >> expr_ >> ')')[_val = _1] //
        ][set_raw_];
    

    As a bonus add an AnnoatedPrint visitor:

    struct AnnotatedPrint {
        std::ostream&    os_;
        std::string_view input;
        std::string      indent = "";
    
        template <typename T> void operator()(T const& v) const { apply(v); }
    
      private:
        void apply(Ast::Expr const& e) const {
            std::cout << indent << "Source: " << quoted(e.source) << "\n";
            AnnotatedPrint{os_, input, indent + "  "}(e.base());
        }
    
        template <typename... Ts> void apply(boost::variant<Ts...> const& v) const {
            boost::apply_visitor(*this, v);
        }
    
        void apply(Ast::Number const& e) const { std::cout << indent << "Number: " << e << "\n"; }
        void apply(Ast::Variable const& e) const { std::cout << indent << "Variable: " << e << "\n"; }
        void apply(Ast::BinaryOp const& e) const {
            std::cout << indent << "BinaryOp: " << e.op << "\n";
            AnnotatedPrint{os_, input, indent + "  "}(e.left);
            AnnotatedPrint{os_, input, indent + "  "}(e.right);
        }
    };
    

    Now the output becomes Live On Coliru

     -- Parsing: "1 + 2"
       -> (1 + 2)
     -- AST: Source: "1 + 2"
     -- AST:   BinaryOp: +
     -- AST:     Source: "1"
     -- AST:       Number: 1
     -- AST:     Source: "2"
     -- AST:       Number: 2
     -- Parsing: "1 + 2 * 3"
       -> (1 + (2 * 3))
     -- AST: Source: "1 + 2 * 3"
     -- AST:   BinaryOp: +
     -- AST:     Source: "1"
     -- AST:       Number: 1
     -- AST:     Source: "2 * 3"
     -- AST:       BinaryOp: *
     -- AST:         Source: "2"
     -- AST:           Number: 2
     -- AST:         Source: "3"
     -- AST:           Number: 3
     -- Parsing: "(1 + 2) * 3"
       -> ((1 + 2) * 3)
     -- AST: Source: "(1 + 2) * 3"
     -- AST:   BinaryOp: *
     -- AST:     Source: "(1 + 2)"
     -- AST:       BinaryOp: +
     -- AST:         Source: "1"
     -- AST:           Number: 1
     -- AST:         Source: "2"
     -- AST:           Number: 2
     -- AST:     Source: "3"
     -- AST:       Number: 3
     -- Parsing: "1 + 2 * (3 + 4)"
       -> (1 + (2 * (3 + 4)))
     -- AST: Source: "1 + 2 * (3 + 4)"
     -- AST:   BinaryOp: +
     -- AST:     Source: "1"
     -- AST:       Number: 1
     -- AST:     Source: "2 * (3 + 4)"
     -- AST:       BinaryOp: *
     -- AST:         Source: "2"
     -- AST:           Number: 2
     -- AST:         Source: "(3 + 4)"
     -- AST:           BinaryOp: +
     -- AST:             Source: "3"
     -- AST:               Number: 3
     -- AST:             Source: "4"
     -- AST:               Number: 4
     -- Parsing: "1 + 2 * (3 + 4) / 5"
       -> (1 + (2 * ((3 + 4) / 5)))
     -- AST: Source: "1 + 2 * (3 + 4) / 5"
     -- AST:   BinaryOp: +
     -- AST:     Source: "1"
     -- AST:       Number: 1
     -- AST:     Source: "2 * (3 + 4) / 5"
     -- AST:       BinaryOp: *
     -- AST:         Source: "2"
     -- AST:           Number: 2
     -- AST:         Source: "(3 + 4) / 5"
     -- AST:           BinaryOp: /
     -- AST:             Source: "(3 + 4)"
     -- AST:               BinaryOp: +
     -- AST:                 Source: "3"
     -- AST:                   Number: 3
     -- AST:                 Source: "4"
     -- AST:                   Number: 4
     -- AST:             Source: "5"
     -- AST:               Number: 5
     -- Parsing: "1 + 2 * (3 + 4) / 5 - 6"
       -> (1 + (2 * ((3 + 4) / (5 - 6))))
     -- AST: Source: "1 + 2 * (3 + 4) / 5 - 6"
     -- AST:   BinaryOp: +
     -- AST:     Source: "1"
     -- AST:       Number: 1
     -- AST:     Source: "2 * (3 + 4) / 5 - 6"
     -- AST:       BinaryOp: *
     -- AST:         Source: "2"
     -- AST:           Number: 2
     -- AST:         Source: "(3 + 4) / 5 - 6"
     -- AST:           BinaryOp: /
     -- AST:             Source: "(3 + 4)"
     -- AST:               BinaryOp: +
     -- AST:                 Source: "3"
     -- AST:                   Number: 3
     -- AST:                 Source: "4"
     -- AST:                   Number: 4
     -- AST:             Source: "5 - 6"
     -- AST:               BinaryOp: -
     -- AST:                 Source: "5"
     -- AST:                   Number: 5
     -- AST:                 Source: "6"
     -- AST:                   Number: 6
    

    Integrating It To The Question

    Without further comment:

    Live On Coliru

    // #define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/include/adapted.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    namespace qi  = boost::spirit::qi;
    namespace phx = boost::phoenix;
    
    // simple expression AST
    namespace Ast {
        using Number     = double;
        using Variable   = std::string;
        using SimpleExpr = boost::variant<Number, Variable>;
    
        struct BinaryOp;
        struct Expr;
        using ExprV = boost::variant<SimpleExpr, boost::recursive_wrapper<BinaryOp>>;
    
        struct Expr : ExprV {
            using ExprV::ExprV;
            using ExprV::operator=;
    
            std::string_view source;
    
            ExprV&       base() { return *this; }
            ExprV const& base() const { return *this; }
        };
    
        struct BinaryOp {
            char op;
            Expr left, right;
        };
    
        using Def  = std::pair<Variable, Expr>;
        using Defs = std::map<Variable, Expr>;
    
        using boost::fusion::operator<<;
    } // namespace Ast
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::BinaryOp, left, op, right)
    
    // simple expression grammar
    template <typename It> struct DefParser : qi::grammar<It, Ast::Defs()> {
        DefParser() : DefParser::base_type(start) {
            using namespace qi;
    
            phx::function source_ = [](Ast::Expr& ast, std::string_view sv) { ast.source = sv; };
            auto set_raw_         = source_(_val, phx::construct<std::string_view>(phx::begin(_1), phx::end(_1)));
    
            start   = skip(space)[defs_ >> eoi];
            defs_   = def_ % ',';
            def_    = var_ >> '=' >> expr_;
            binary_ = simple_ >> char_("+*/-") >> expr_;
            expr_   = raw[(binary_[_val = _1] | simple_[_val = _1])][set_raw_];
            simple_ = raw[                         //
                double_[_val = _1]                 //
                | var_[_val = _1]                  //
                | ('(' >> expr_ >> ')')[_val = _1] //
            ][set_raw_];
            var_    = +alpha;
    
            BOOST_SPIRIT_DEBUG_NODES((expr_)(binary_)(binary_)(simple_)(var_)(def_)(defs_))
        }
    
      private:
        qi::rule<It, Ast::Defs()> start;
        using Skipper = qi::space_type;
        qi::rule<It, Ast::Def(), Skipper>      def_;
        qi::rule<It, Ast::Defs(), Skipper>     defs_;
        qi::rule<It, Ast::Expr(), Skipper>     expr_;
        qi::rule<It, Ast::Expr(), Skipper>     simple_;
        qi::rule<It, Ast::BinaryOp(), Skipper> binary_;
        // lexemes
        qi::rule<It, Ast::Variable()>   var_;
    };
    
    int main() {
        using It = std::string_view::const_iterator;
        DefParser<It> const p;
    
        std::string_view input = "a=10,b=30,c=A *B +10,  PI = TAU / (((2)))";
    
        auto f = begin(input), l = end(input);
        if (Ast::Defs defs; qi::parse(f, l, p, defs)) {
            for (auto const& [v, e] : defs) {
                std::cout << "   -> " << v << " = " << e << "\n";
                auto pos = e.source.begin() - input.begin();
                std::cout << "Input: " << input << "\n";
                auto n = e.source.size() - 1;
                std::cout << "       " << std::setw(pos) << "" << "^" << std::string(n, '-') << "\n";
            }
        } else {
            std::cout << "FAILED\n";
        }
    }
    

    Prints

       -> PI = (TAU / 2)
    Input: a=10,b=30,c=A *B +10,  PI = TAU / (((2)))
                                       ^------------
       -> a = 10
    Input: a=10,b=30,c=A *B +10,  PI = TAU / (((2)))
             ^-
       -> b = 30
    Input: a=10,b=30,c=A *B +10,  PI = TAU / (((2)))
                  ^-
       -> c = (A * (B + 10))
    Input: a=10,b=30,c=A *B +10,  PI = TAU / (((2)))
                       ^-------