rubyparslet

Ruby:parslet for a system verilog interface parser


I am using Ruby::Parslet.

I am parsing a document similar to an SV interface, eg:

interface my_intf;
  protocol validonly;

  transmit  [Bool]   valid;
  transmit  [Bool]   pipeid;
  transmit  [5:0]    incr;
  transmit  [Bool]   sample;

endinterface

Here is my parser:

class myParse < Parslet::Parser
  rule(:lparen)     { space? >> str('(') >> space? }
  rule(:rparen)     { space? >> str(')') >> space? }
  rule(:lbox)       { space? >> str('[') >> space? }
  rule(:rbox)       { space? >> str(']') >> space? }
  rule(:lcurly)     { space? >> str('{') >> space? }
  rule(:rcurly)     { space? >> str('}') >> space? }
  rule(:comma)      { space? >> str(',') >> space? }
  rule(:semicolon)  { space? >> str(';') >> space? }
  rule(:eof)        { any.absent? }
  rule(:space)      { match["\t\s"] }
  rule(:whitespace) { space.repeat }
  rule(:space?)     { whitespace.maybe }
  rule(:blank_line) { space? >> newline.repeat(1) }
  rule(:newline)    { str("\n") }

  # Things
  rule(:integer)    { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
  rule(:identifier) { match['a-z'].repeat(1) }


  rule(:intf_start)     { space? >> str('interface') >> space? >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:intf_name) >> space? >> str(';') >> space? >> str("\n") }
  rule(:protocol)       { space? >> str('protocol') >> whitespace >> (str('validonly').maybe).as(:protocol) >> space? >> str(';') >> space? >> str("\n") }
  rule(:bool)           { lbox >> space? >> str('Bool').as(:bool) >> space? >> rbox }
  rule(:transmit_width) { lbox >> space? >> match('[0-9]').repeat.as(:msb) >> space? >> str(':') >> space? >> match('[0-9]').repeat.as(:lsb) >> space? >> rbox }
  rule(:transmit)       { space? >> str('transmit') >> whitespace >> (bool | transmit_width) >> whitespace >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:transmit_name) >> space? >> str(';') >> space? >> str("\n") }
  rule(:interface_body) { (protocol | blank_line.maybe) }
  rule(:interface)      { intf_start >> interface_body }

  rule(:expression)     { ( interface ).repeat }

  root :expression
end

I am having an issue making the rule for interface_body.

It can have 0 or more transmit lines and 0 or 1 protocol line and multiple blanks, comments etc.

Can someone help me out please? The rules I have written in the code snippet works with single transmit and single protocol, i.e. they properly match, but when I parse a whole interface it does not work.

Thanks in advance.


Solution

  • Ok... this parses the file you mentioned. I don't understand the desired format so I can't say it will work for all your files, but hopefully this will get you started.

    require 'parslet'
    
    class MyParse < Parslet::Parser
      rule(:lparen)     { space? >> str('(') }
      rule(:rparen)     { space? >> str(')') }
      rule(:lbox)       { space? >> str('[') }
      rule(:rbox)       { space? >> str(']') }
      rule(:lcurly)     { space? >> str('{') }
      rule(:rcurly)     { space? >> str('}') }
      rule(:comma)      { space? >> str(',') }
      rule(:semicolon)  { space? >> str(';') }
      rule(:eof)        { any.absent? }
      rule(:space)      { match["\t\s"] }
      rule(:whitespace) { space.repeat(1) }
      rule(:space?)     { space.repeat(0) }
      rule(:blank_line) { space? >> newline.repeat(1) }
      rule(:newline)    { str("\n") }
    
      # Things
      rule(:integer)    { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
      rule(:identifier) { match['a-z'].repeat(1) }
    
      def line( expression )
        space? >> 
        expression >>
        space? >> 
        str(';') >> 
        space? >> 
        str("\n")    
      end
    
      rule(:expression?)    { ( interface ).repeat(0) }
    
      rule(:interface)      { intf_start >> interface_body.repeat(0) >> intf_end }
    
      rule(:interface_body) { 
        intf_end.absent? >> 
        interface_bodyline >> 
        blank_line.repeat(0)
      }
    
      rule(:intf_start) { 
        line ( 
          str('interface')  >> 
          space? >> 
          ( match['a-zA-Z_'].repeat(1,1) >> 
            match['[:alnum:]_'].repeat(0)).as(:intf_name) 
        )
      }
    
      rule(:interface_bodyline) {
        line ( protocol | transmit )
      }
    
      rule(:protocol)       { 
        str('protocol') >> whitespace >> 
        (str('validonly').maybe).as(:protocol)
      }
    
      rule(:transmit)       {     
        str('transmit') >> whitespace >> 
        (bool | transmit_width) >> whitespace >> 
        name.as(:transmit_name)
      }
    
      rule(:name) {
        match('[a-zA-Z_]') >> 
        (match['[:alnum:]'] | str("_")).repeat(0)
      }
    
      rule(:bool)           { lbox  >> str('Bool').as(:bool) >> rbox }
    
      rule(:transmit_width) { 
        lbox   >> 
        space? >> 
        match('[0-9]').repeat(1).as(:msb) >> 
        space? >> 
        str(':') >> 
        space? >> 
        match('[0-9]').repeat(1).as(:lsb) >> 
        space? >> 
        rbox 
      }
    
      rule(:intf_end)       {  str('endinterface') }
    
      root :expression?
    end
    
      require 'rspec'
      require 'parslet/rig/rspec'
    
      RSpec.describe MyParse  do
        let(:parser) { MyParse.new }
        context "simple_rule" do
          it "should consume protocol line" do
            expect(parser.interface_bodyline).to parse('  protocol validonly;
    ')
          end 
          it 'name' do
            expect(parser.name).to parse('valid')
          end
          it "bool" do
            expect(parser.bool).to parse('[Bool]')
          end 
          it "transmit line" do
            expect(parser.transmit).to parse('transmit [Bool] valid')
          end 
          it "transmit as bodyline'" do
            expect(parser.interface_bodyline).to parse('  transmit  [Bool]   valid;
    ')
          end 
        end
      end
    
      RSpec::Core::Runner.run(['--format', 'documentation'])  
    
    
    begin 
      doc = File.read("test.txt")
      MyParse.new.parse(doc) 
      rescue Parslet::ParseFailed => error
        puts error.cause.ascii_tree
      end
    

    The main changes...

    so....

    Write your parser from the top down. Write tests from the bottom up. When your tests get to the top you are done! :)

    Good luck.