c++data-structuresboostboost-spirit-qi

Parsing several structures with boost spirit


I have to parse a netlist which looks like this:

*comment line 1
V1 N001 N002 10
R1 N001 N002 24.9

*comment line 2
R2 N002 N003 20
V2 N002 N003 5

This is how my structure looks like: (The comment lines need to be in a separate structure in case this part needs to be expanded later)

  struct CommentLine {
  boost::optional<std::string> comment_line;
};

struct ElementStatement {
  boost::optional<std::string> label;
  boost::optional<std::string> node1;
  boost::optional<std::string> node2;
  boost::optional<double> value;
};

struct SpiceNetlist {
  std::vector<CommentLine> comment_lines;
  std::vector<ElementStatement> elements;
};

This is what I come up with using code suggested from a previous question:

BOOST_FUSION_ADAPT_STRUCT(SpiceNetlist, comment_lines, elements)
BOOST_FUSION_ADAPT_STRUCT(CommentLine, comment_line)
BOOST_FUSION_ADAPT_STRUCT(ElementStatement, element_label, element_node1, element_node2, element_value)

namespace qi = boost::spirit::qi;

template <typename It>
class SpiceGrammar : public qi::grammar<It, SpiceNetlist()> {
 public:
SpiceGrammar() : SpiceGrammar::base_type(spice_netlist_) {
     spice_netlist = qi::skip(qi::blank)[comment_lines >> (statements || comment_lines)];

    comment_lines = comment_line % qi::eol;
    comment_line = qi::lit('*') >> *(qi::graph) >> qi::space >> *(qi::graph) >>
               qi::space >> qi::graph;

    statements_    = statement_ % qi::eol;
    statement_     = -label_ >> -node1_ >> -node2_ >> -value_;
    label_         = qi::graph >> qi::graph;
    node1_         = qi::graph >> qi::graph >> qi::graph >> qi::graph;
    node2_         = qi::graph >> qi::graph >> qi::graph >> qi::graph;
    value_         = qi::double_;

    BOOST_SPIRIT_DEBUG_NODES((spice_netlist_)(comment_lines)(comment_line)(statements)    (statement)(label)(node1)(node2)(value))
  }

private:
qi::rule<It, SpiceNetlist()> spice_netlist_;

using Skipper = qi::blank_type;

Skipper skipper_;

qi::rule<It, std::vector<CommentLine>, Skipper> comment_lines;
qi::rule<It, std::string()> comment_line;
qi::rule<It, ElementStatements(), Skipper> statements_;
qi::rule<It, ElementStatement(), Skipper>  statement_;

qi::rule<It, std::string()> label_, node1_, node2_;
qi::rule<It, double()>      value_;
};

  
SpiceNetlist parse_netlist_from_string(std::string_view input) {
using It = std::string_view::const_iterator;
static SpiceGrammar<It> const g;

SpiceNetlist netlist;

It f = input.begin(), l = input.end();
bool success = qi::parse(f, l, g, netlist);

std::cout << "Parsing: " << quoted(input) << " -> "
          << "Parse " << (success ? "SUCCESS" : "FAILED") << "\n"
          << "Remaining: " << quoted(std::string_view(f, l)) << std::endl;

return netlist;
}



   int main(){
    for (const auto &[comment] : netlist.comment_lines) {
          std::cout << "Comment: " << comment << std::endl;
        }
    for (auto const& [label, node1, node2, value] : netlist.elements) {
        std::cout                                                                              
            << "Element Label: " << label.value_or("Not specified") << "\n"                    
            << "        Node1: " << node1.value_or("Not specified") << "\n"                    
            << "        Node2: " << node2.value_or("Not specified") << "\n"                    
            << "        Value: " << (value ? std::to_string(*value) : "Not specified") << "\n" 
            << std::endl;
    }
    }

The parsing is successful until the second '*comment li' and the attributes are also being wrongly filled. Is the problem with my grammar for spice_netlist or comment_line or am I defining the rule with the Skipper wrong? I have also tried spice_netlist = qi::skip(qi::blank)[comment_lines >> *(statements | comment_lines)]; but it generates build errors


Solution

  • This is how my structure looks like: (The comment lines need to be in a separate structure in case this part needs to be expanded later)

    The structure doesn't match the input example. It loses the grouping of comment-lines with elementstatements.

    Secondly, it really looks like you don't know what skippers do, or what you want:

    comment_line  = qi::lit('*') >> *(qi::graph) >> qi::space >> *(qi::graph) >> qi::space >> qi::graph;
    

    I'd expect a simple syntax like:

    comment_line  = '*' >> *(qi::char_ - qi::eol);
    

    Suggested Fix

    I'd suggest to make comment just another statement, like they appear in the example. Don't make the string optional, because you can never have a comment line without a string. A line with a single * will be an empty string:

    namespace Ast {
        using boost::optional;
        using Comment = std::string;
    
        struct Element {
            optional<std::string> label, node1, node2;
            optional<double>      value;
        };
        using Statement  = boost::variant<Comment, Element>;
        using Statements = std::vector<Statement>;
    
        struct SpiceNetlist {
            Statements statements;
        };
    } // namespace Ast
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::SpiceNetlist, statements)
    BOOST_FUSION_ADAPT_STRUCT(Ast::Element, label, node1, node2, value)
    

    Now you can adjust the grammar likewise:

        spice_netlist_ = qi::skip(qi::blank)[statements_];
    
        statements_ = statement_ % qi::eol;
        statement_  = comment_ | element_;
        comment_    = qi::char_('*') >> *(qi::char_ - qi::eol);
        element_    = -label_ >> -node1_ >> -node2_ >> -value_;
        label_      = +qi::graph;
        node1_      = +qi::graph;
        node2_      = +qi::graph;
        value_      = qi::double_;
    

    Now, already the output is close:

    Live On Coliru

    Parsing: "* comment line 1
    V1 N001 N002 10
    R1 N001 N002 24.9
    
    *comment line 2
    R2 N002 N003 20
    V2 N002 N003 5
    
    * some
    * more comment lines without particular layout
    " -> Parse SUCCESS
    Remaining: ""
    * comment line 1
    Element Label: V1 Node1: N001 Node2: N002 Value: 10
    Element Label: R1 Node1: N001 Node2: N002 Value: 24.9
    Element Label:-- Node1:-- Node2:-- Value:--
    *comment line 2
    Element Label: R2 Node1: N002 Node2: N003 Value: 20
    Element Label: V2 Node1: N002 Node2: N003 Value: 5
    Element Label:-- Node1:-- Node2:-- Value:--
    * some
    * more comment lines without particular layout
    Element Label:-- Node1:-- Node2:-- Value:--
    

    Note that for no particular reason I chose to include the * inside the comment string, so the output looks more readable

    The Empty Trailing Element

    This is caused by element_ having only optional parts. q = -a >> -b >> -c >> -d will always match a zero-length input, so *q will infinitely match and q % eol will match empty lines too. Fix that by making at least one part non-optional:

    element_    = label_ >> -node1_ >> -node2_ >> -value_;
    

    What you probably expect is that empty lines have no effect:

    statements_ = -statement_ % qi::eol;
    

    Demo

    Live On Coliru

    #include <boost/optional/optional_io.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace Ast {
        using boost::optional;
        using Comment = std::string;
    
        struct Element {
            optional<std::string> label, node1, node2;
            optional<double>      value;
        };
        using Statement  = boost::variant<Comment, Element>;
        using Statements = std::vector<Statement>;
    
        struct SpiceNetlist {
            Statements statements;
        };
    
        std::ostream& operator<<(std::ostream& os, Ast::Element const& el) {
            return os << "Element Label:" << el.label << " Node1:" << el.node1 << " Node2:" << el.node2
                      << " Value:" << el.value;
        }
    } // namespace Ast
    
    BOOST_FUSION_ADAPT_STRUCT(Ast::SpiceNetlist, statements)
    BOOST_FUSION_ADAPT_STRUCT(Ast::Element, label, node1, node2, value)
    
    namespace qi = boost::spirit::qi;
    
    template <typename It> //
    struct SpiceGrammar : qi::grammar<It, Ast::SpiceNetlist()> {
        SpiceGrammar() : SpiceGrammar::base_type(spice_netlist_) {
            spice_netlist_ = qi::skip(qi::blank)[statements_];
    
            statements_ = -statement_ % qi::eol;
            statement_  = comment_ | element_;
            comment_    = qi::char_('*') >> *(qi::char_ - qi::eol);
            element_    = label_ >> -node1_ >> -node2_ >> -value_;
            label_      = +qi::graph;
            node1_      = +qi::graph;
            node2_      = +qi::graph;
            value_      = qi::double_;
    
            BOOST_SPIRIT_DEBUG_NODES( //
                (spice_netlist_)(element_)(comment_)(statements_)(statement_)(label_)(node1_)(node2_)(value_))
        }
    
      private:
        qi::rule<It, Ast::SpiceNetlist()> spice_netlist_;
        using Skipper = qi::blank_type;
    
        qi::rule<It, Ast::Statements(), Skipper> statements_;
        qi::rule<It, Ast::Element(), Skipper>    element_;
        qi::rule<It, Ast::Statement(), Skipper>  statement_;
    
        // lexemes
        qi::rule<It, Ast::Comment()> comment_;
        qi::rule<It, std::string()>  label_, node1_, node2_;
        qi::rule<It, double()>       value_;
    };
    
    Ast::SpiceNetlist parse_netlist_from_string(std::string_view input) {
        using It = std::string_view::const_iterator;
        static SpiceGrammar<It> const g;
    
        Ast::SpiceNetlist netlist;
    
        It   f = input.begin(), l = input.end();
        bool success = qi::parse(f, l, g, netlist);
    
        std::cout << "Parsing: " << quoted(input) << " -> "
                  << "Parse " << (success ? "SUCCESS" : "FAILED") << "\n"
                  << "Remaining: " << quoted(std::string_view(f, l)) << std::endl;
    
        return netlist;
    }
    
    int main() {
        auto netlist = parse_netlist_from_string(R"(* comment line 1
    V1 N001 N002 10
    R1 N001 N002 24.9
    
    *comment line 2
    R2 N002 N003 20
    V2 N002 N003 5
    
    * some
    * more comment lines without particular layout
    )");
        for (auto const& stmt : netlist.statements) {
            std::cout << stmt << "\n";
        }
    }
    

    Printing

    Parsing: "* comment line 1
    V1 N001 N002 10
    R1 N001 N002 24.9
    
    *comment line 2
    R2 N002 N003 20
    V2 N002 N003 5
    
    * some
    * more comment lines without particular layout
    " -> Parse SUCCESS
    Remaining: ""
    * comment line 1
    Element Label: V1 Node1: N001 Node2: N002 Value: 10
    Element Label: R1 Node1: N001 Node2: N002 Value: 24.9
    *comment line 2
    Element Label: R2 Node1: N002 Node2: N003 Value: 20
    Element Label: V2 Node1: N002 Node2: N003 Value: 5
    * some
    * more comment lines without particular layout