rubystring

What is the easiest way to remove the first character from a string?


Example:

[12,23,987,43

What is the fastest, most efficient way to remove the "[", using maybe a chop() but for the first character?


Solution

  • I kind of favor using something like:

    asdf = "[12,23,987,43"
    asdf[0] = '' 
    
    p asdf
    # >> "12,23,987,43"
    

    I'm always looking for the fastest and most readable way of doing things:

    require 'benchmark'
    
    N = 1_000_000
    
    puts RUBY_VERSION
    
    STR = "[12,23,987,43"
    
    Benchmark.bm(7) do |b|
      b.report('[0]') { N.times { "[12,23,987,43"[0] = '' } }
      b.report('sub') { N.times { "[12,23,987,43".sub(/^\[+/, "") } }
    
      b.report('gsub') { N.times { "[12,23,987,43".gsub(/^\[/, "") } }
      b.report('[1..-1]') { N.times { "[12,23,987,43"[1..-1] } }
      b.report('slice') { N.times { "[12,23,987,43".slice!(0) } }
      b.report('length') { N.times { "[12,23,987,43"[1..STR.length] } }
    
    end
    

    Running on my Mac Pro:

    1.9.3
                  user     system      total        real
    [0]       0.840000   0.000000   0.840000 (  0.847496)
    sub       1.960000   0.010000   1.970000 (  1.962767)
    gsub      4.350000   0.020000   4.370000 (  4.372801)
    [1..-1]   0.710000   0.000000   0.710000 (  0.713366)
    slice     1.020000   0.000000   1.020000 (  1.020336)
    length    1.160000   0.000000   1.160000 (  1.157882)
    

    Updating to incorporate one more suggested answer:

    require 'benchmark'
    
    N = 1_000_000
    
    class String
      def eat!(how_many = 1)
        self.replace self[how_many..-1]
      end
    
      def first(how_many = 1)
        self[0...how_many]
      end
    
      def shift(how_many = 1)
        shifted = first(how_many)
        self.replace self[how_many..-1]
        shifted
      end
      alias_method :shift!, :shift
    end
    
    class Array
      def eat!(how_many = 1)
        self.replace self[how_many..-1]
      end
    end
    
    puts RUBY_VERSION
    
    STR = "[12,23,987,43"
    
    Benchmark.bm(7) do |b|
      b.report('[0]') { N.times { "[12,23,987,43"[0] = '' } }
      b.report('sub') { N.times { "[12,23,987,43".sub(/^\[+/, "") } }
    
      b.report('gsub') { N.times { "[12,23,987,43".gsub(/^\[/, "") } }
      b.report('[1..-1]') { N.times { "[12,23,987,43"[1..-1] } }
      b.report('slice') { N.times { "[12,23,987,43".slice!(0) } }
      b.report('length') { N.times { "[12,23,987,43"[1..STR.length] } }
      b.report('eat!') { N.times { "[12,23,987,43".eat! } }
      b.report('reverse') { N.times { "[12,23,987,43".reverse.chop.reverse } }
    end
    

    Which results in:

    2.1.2
                  user     system      total        real
    [0]       0.300000   0.000000   0.300000 (  0.295054)
    sub       0.630000   0.000000   0.630000 (  0.631870)
    gsub      2.090000   0.000000   2.090000 (  2.094368)
    [1..-1]   0.230000   0.010000   0.240000 (  0.232846)
    slice     0.320000   0.000000   0.320000 (  0.320714)
    length    0.340000   0.000000   0.340000 (  0.341918)
    eat!      0.460000   0.000000   0.460000 (  0.452724)
    reverse   0.400000   0.000000   0.400000 (  0.399465)
    

    And another using /^./ to find the first character:

    require 'benchmark'
    
    N = 1_000_000
    
    class String
      def eat!(how_many = 1)
        self.replace self[how_many..-1]
      end
    
      def first(how_many = 1)
        self[0...how_many]
      end
    
      def shift(how_many = 1)
        shifted = first(how_many)
        self.replace self[how_many..-1]
        shifted
      end
      alias_method :shift!, :shift
    end
    
    class Array
      def eat!(how_many = 1)
        self.replace self[how_many..-1]
      end
    end
    
    puts RUBY_VERSION
    
    STR = "[12,23,987,43"
    
    Benchmark.bm(7) do |b|
      b.report('[0]') { N.times { "[12,23,987,43"[0] = '' } }
      b.report('[/^./]') { N.times { "[12,23,987,43"[/^./] = '' } }
      b.report('[/^\[/]') { N.times { "[12,23,987,43"[/^\[/] = '' } }
      b.report('sub+') { N.times { "[12,23,987,43".sub(/^\[+/, "") } }
      b.report('sub') { N.times { "[12,23,987,43".sub(/^\[/, "") } }
      b.report('gsub') { N.times { "[12,23,987,43".gsub(/^\[/, "") } }
      b.report('[1..-1]') { N.times { "[12,23,987,43"[1..-1] } }
      b.report('slice') { N.times { "[12,23,987,43".slice!(0) } }
      b.report('length') { N.times { "[12,23,987,43"[1..STR.length] } }
      b.report('eat!') { N.times { "[12,23,987,43".eat! } }
      b.report('reverse') { N.times { "[12,23,987,43".reverse.chop.reverse } }
    end
    

    Which results in:

    # >> 2.1.5
    # >>               user     system      total        real
    # >> [0]       0.270000   0.000000   0.270000 (  0.270165)
    # >> [/^./]    0.430000   0.000000   0.430000 (  0.432417)
    # >> [/^\[/]   0.460000   0.000000   0.460000 (  0.458221)
    # >> sub+      0.590000   0.000000   0.590000 (  0.590284)
    # >> sub       0.590000   0.000000   0.590000 (  0.596366)
    # >> gsub      1.880000   0.010000   1.890000 (  1.885892)
    # >> [1..-1]   0.230000   0.000000   0.230000 (  0.223045)
    # >> slice     0.300000   0.000000   0.300000 (  0.299175)
    # >> length    0.320000   0.000000   0.320000 (  0.325841)
    # >> eat!      0.410000   0.000000   0.410000 (  0.409306)
    # >> reverse   0.390000   0.000000   0.390000 (  0.393044)
    

    Here's another update on faster hardware and a newer version of Ruby:

    2.3.1
                  user     system      total        real
    [0]       0.200000   0.000000   0.200000 (  0.204307)
    [/^./]    0.390000   0.000000   0.390000 (  0.387527)
    [/^\[/]   0.360000   0.000000   0.360000 (  0.360400)
    sub+      0.490000   0.000000   0.490000 (  0.492083)
    sub       0.480000   0.000000   0.480000 (  0.487862)
    gsub      1.990000   0.000000   1.990000 (  1.988716)
    [1..-1]   0.180000   0.000000   0.180000 (  0.181673)
    slice     0.260000   0.000000   0.260000 (  0.266371)
    length    0.270000   0.000000   0.270000 (  0.267651)
    eat!      0.400000   0.010000   0.410000 (  0.398093)
    reverse   0.340000   0.000000   0.340000 (  0.344077)
    

    Why is gsub so slow?

    After doing a search/replace, gsub has to check for possible additional matches before it can tell if it's finished. sub only does one and finishes. Consider gsub like it's a minimum of two sub calls.

    Also, it's important to remember that gsub, and sub can also be handicapped by poorly written regex which match much more slowly than a sub-string search. If possible anchor the regex to get the most speed from it. There are answers here on Stack Overflow demonstrating that so search around if you want more information.