rubyparslet

How do I use Parslet with strings not Parslet Slices


I've started using Parslet to parse some custom data. In the examples, the resulting parsed data is something like:

{ :custom_string => "data"@6 }

And I've created the Transform something like

rule(:custom_string => simple(:x)) { x.to_s }

But it doesn't match, presumably because I'm passing "data"@6 instead of just "data" which isn't just a simple string. All the examples for the Transform have hashes with strings, not with Parslet::Slices which is what the parser outputs. Maybe I'm missing a step but I can't see anything in the docs.

EDIT : More sample code (reduced version but should still be explanatory)

original_text = 'MSGSTART/DATA1/DATA2/0503/MAR'

require "parslet"
include Parslet

module ParseExample
  class Parser < Parslet::Parser
    rule(:fs)         { str("/") }
    rule(:newline)    { str("\n") | str("\r\n") }

    rule(:msgstart) { str("MSGSTART") }
    rule(:data1) { match("\\w").repeat(1).as(:data1) }
    rule(:data2) { match("\\w").repeat(1).as(:data2) }
    rule(:serial_number) { match("\\w").repeat(1).as(:serial_number) }
    rule(:month) { match("\\w").repeat(1).as(:month) }

    rule(:first_line) { msgstart >> fs >> data1 >> fs >> data2 >> fs >> serial_number >> fs >> month >> newline }

    rule(:document) { first_line >> newline.maybe }

    root(:document)
  end
end

module ParseExample
  class Transformer < Parslet::Transform

    rule(:data1 => simple(:x)) { x.to_s }
    rule(:data2 => simple(:x)) { x.to_s }
    rule(:serial_number => simple(:x)) { x.to_s }
    rule(:month => simple(:x)) { x.to_s }
  end
end

# Run by calling...
p = ParseExample::Parser.new
parse_result = p.parse(original_text)

# => {:data1=>"data1"@6, :data2=>"data2"@12, :serial_number=>"0503"@18, :month=>"MAR"@23}

t = ParseExample::Transformer.new
transformed = t.apply(parser_result)

# Actual result => {:data1=>"data1"@6, :data2=>"data2"@12, :serial_number=>"0503"@18, :month=>"MAR"@23}

# Expected result => {:data1=>"data1", :data2=>"data2", :serial_number=>"0503", :month=>"MAR"}

Solution

  • You can't replace individual key/value pairs. You have to replace the whole hash at once.

    I fell for this the first time I wrote transformers too. The key is that transform rules match a whole node and replace it.. in it's entirity. Once a node has been matches it's not visited again.

    If you did consume a hash and only match a single key/value pair, replacing it with a value... you just lost all the other key/value pairs in the same hash.

    However... There is a way!

    If you do want to pre-process all the nodes in a hash before matching the whole hash, the the hash's values need to be hashes themselves. Then you could match those and convert them to strings. You can usually do this by simply adding another 'as' in your parser.

    For example:

    original_text = 'MSGSTART/DATA1/DATA2/0503/MAR'
    
    require "parslet"
    include Parslet
    
    module ParseExample
      class Parser < Parslet::Parser
        rule(:fs)         { str("/") }
        rule(:newline)    { str("\n") | str("\r\n") }
    
        rule(:msgstart) { str("MSGSTART") }
    
        rule(:string) {match("\\w").repeat(1).as(:string)} # Notice the as!
    
        rule(:data1) { string.as(:data1) }
        rule(:data2) { string.as(:data2) }
        rule(:serial_number) { string.as(:serial_number) }
        rule(:month) { string.as(:month) }
    
        rule(:first_line) { 
            msgstart >> fs >> 
            data1 >> fs >> 
            data2 >> fs >> 
            serial_number >> fs >> 
            month >> newline.maybe 
        }
    
        rule(:document) { first_line >> newline.maybe }
    
        root(:document)
      end
    end
    
    # Run by calling...
    p = ParseExample::Parser.new
    parser_result = p.parse(original_text)
    
    puts parser_result.inspect
    # => {:data1=>{:string=>"DATA1"@9}, 
          :data2=>{:string=>"DATA2"@15}, 
          :serial_number=>{:string=>"0503"@21}, 
          :month=>{:string=>"MAR"@26}}
    
    # See how the values in the hash are now all hashes themselves.
    
    module ParseExample
      class Transformer < Parslet::Transform
        rule(:string => simple(:x)) { x.to_s }
      end
    end
    
    # We just need to match the "{:string => x}" hashes now...and replace them with strings
    
    t = ParseExample::Transformer.new
    transformed = t.apply(parser_result)
    
    puts transformed.inspect
    # => {:data1=>"DATA1", :data2=>"DATA2", :serial_number=>"0503", :month=>"MAR"}
    
    # Tada!!!
    

    If you had wanted to handle the whole line, do make an object from it.. say..

    class Entry 
       def initialize(data1:, data2:, serial_number:,month:)
          @data1 = data1
          @data2 = data2
          @serial_number = serial_number
          @month = month
       end
    end
    
    module ParseExample
      class Transformer < Parslet::Transform
        rule(:string => simple(:x)) { x.to_s }
    
        # match the whole hash
        rule(:data1 => simple(:d1),
             :data2 => simple(:d2),
             :serial_number => simple(:s),
             :month => simple(:m)) { 
                Entry.new(data1: d1,data2: d2,serial_number: s,month: m)} 
      end
    end
    
    t = ParseExample::Transformer.new
    transformed = t.apply(parser_result)
    
    puts transformed.inspect
    # => #<Entry:0x007fd5a3d26bf0 @data1="DATA1", @data2="DATA2", @serial_number="0503", @month="MAR">