I'm using Parse::RecDescent to parse lines in a Cisco IOS ACL. The ACL is used on the edge router of a large network, so it contains almost 8k lines which are set by the government. I'm looping through each of those lines and putting the values into a hash. Although it is 8k lines I'm still spending over 14 seconds parsing the lines? Does this sound reasonable? It seems VERY slow to me. Is there some overhead to using a hash verse another data structure?
Sample Input: (about 8k of these or similar)
deny ip 2.3.4.5 0.0.0.7 any log-input
deny ip 5.6.7.8 0.0.0.255 any log-input
deny ip host 9.10.11.12 any log-input
deny ip 13.14.15.16 0.0.31.255 any log-input
permit tcp host 17.18.19.20 host 21.22.23.24 eq bgp
permit icmp 25.26.0.0 0.0.255.255 27.28.0.0 0.0.255.255
Here is my entire parser:
package AccessList::Parser;
use strict;
use warnings;
use Carp;
use Scalar::Util 'blessed';
use Parse::RecDescent;
our $VERSION = '0.05';
sub new {
my ($class) = @_;
my $self = { PARSER => undef, };
bless $self, $class;
$self->_init();
return $self;
}
sub _init {
my ($self) = @_;
$self->{PARSER} = Parse::RecDescent->new( $self->_grammar() );
}
sub parse {
my ( $self, $string ) = @_;
defined ($string) or confess "blank line received";
my $tree = $self->{PARSER}->startrule($string);
defined($tree) or confess "unrecognized line\n";
return visit($tree);
}
#
# Finished tests
#
sub visit {
my ($node) = @_;
my $Rule_To_Key_Map = {
"acl_action" => 1,
"acl_protocol" => 1,
"acl_src_ip" => 1,
"acl_src_port" => 1,
"acl_dst_ip" => 1,
"acl_dst_port" => 1,
"acl_remark" => 1
};
my $parent_key;
my $result;
# set s of explored vertices
my %seen;
#stack is all neighbors of s
my @stack;
push @stack, [ $node, $parent_key ];
my $key;
while (@stack) {
my $rec = pop @stack;
$node = $rec->[0];
$parent_key = $rec->[1]; #undef for root
next if ( $seen{$node}++ );
my $rule_id = ref($node);
if ( exists( $Rule_To_Key_Map->{$rule_id} ) ) {
$parent_key = $rule_id;
}
foreach my $key ( keys %$node ) {
next if ( $key eq "EOL" );
my $next = $node->{$key};
if ( blessed($next) ) {
if ( exists( $next->{__VALUE__} ) ) {
#print ref($node), " ", ref($next), " ", $next->{__VALUE__},"\n";
my $rule = ref($node);
my $token = $next->{__VALUE__};
$result->{$parent_key} = $token;
#print $rule, " ", $result->{$rule}, "\n";
}
push @stack, [ $next, $parent_key ];
#push @stack, $next;
}
}
}
return $result;
}
sub _grammar {
my ($self) = @_;
my $grammar = q{
<autotree>
startrule :
access_list EOL
| acl_remark EOL
| <error>
#
# access-lists
#
access_list : acl_action
acl_remark :
"remark" REMARKS
acl_action :
ACTIONS acl_protocol
#
# protocol options
#
acl_protocol :
PROTOCOL acl_src_ip
#
# access-list source IP addresses
#
acl_src_ip :
address acl_dst_ip
| address acl_src_port
#
# access-list source ports
#
acl_src_port :
port acl_dst_ip
#
# access-list destination IP address
#
acl_dst_ip :
address acl_dst_port
| address acl_options
| address CONNECTION_TYPE
| address LAYER3_OPTIONS
| IPRANGE
#
# access-list destination ports
#
acl_dst_port :
port acl_options
| acl_icmp_type acl_options
#
# icmp_types
#
acl_icmp_type :
ICMP_TYPE
#
# access-list options
#
acl_options :
acl_logging LAYER3_OPTIONS
| acl_logging
| EOL
| <error>
acl_logging :
"log-input"
| "log"
#
# IP address types
#
# "object" should be fine here because "object" can not
# be used to specify ports
address :
"host" IPADDRESS
| "host" NAME
| IPNETWORK
| WILDCARD_NETWORK
| ANY
#
# port types
#
port :
port_eq
| port_range
| port_gt
| port_lt
| port_neq
port_eq :
"eq" PORT_ID
port_range :
"range" PORT_RANGE
port_gt :
"gt" PORT_GT
port_lt :
"lt" PORT_LT
port_neq :
"neq" <error: neq is unsupported>
#
# Token Definitions
#
STRING :
/\S+/
DIGIT :
/\d+/
NAME :
/((^|\s[a-zA-Z])(\.|[0-9a-zA-Z_-]+)+)/
RULE_REF :
/\S+/
ANY:
"any"
IPADDRESS :
/((\d{1,3})((\.)(\d{1,3})){3})/
MASK :
/(((255\.){3}(255|254|252|248|240|224|192|128|0+))|((255\.){2}(255|254|252|248|240|224|192|128|0+)\.0)|((255\.)(255|254|252|248|240|224|192|128|0+)(\.0+){2})|((255|254|252|248|240|224|192|128|0+)(\.0+){3}))/
INVERSE_MASK :
/(0+|1|3|7|15|31|63|127|255)((\.)(255|127|63|31|15|7|3|1|0)){3}/
WILDCARD_NETWORK :
/((\d{1,3})((\.)(\d{1,3})){3}) (0+|1|3|7|15|31|63|127|255)((\.)(255|127|63|31|15|7|3|1|0)){3}/
IPNETWORK :
/((\d{1,3})((\.)(\d{1,3})){3}) (((255\.){3}(255|254|252|248|240|224|192|128|0+))|((255\.){2}(255|254|252|248|240|224|192|128|0+)\.0)|((255\.)(255|254|252|248|240|224|192|128|0+)(\.0+){2})|((255|254|252|248|240|224|192|128|0+)(\.0+){3}))/
IPRANGE :
/((\d{1,3})((\.)(\d{1,3})){3}) ((\d{1,3})((\.)(\d{1,3})){3})/
PROTOCOL :
/\d+/ | "ahp" | "eigrp" | "esp" | "gre" | "icmp" | "icmp6" | "igmp"
| "igrp" | "ip" | "ipinip" | "ipsec" | "nos" | "ospf" | "pcp"
| "pim" | "pptp" | "snp" | "tcp" | "udp" | "41"
GROUP_PROTOCOL :
"tcp-udp" | "tcp" | "udp"
ICMP_TYPE :
/\d+/ | "alternate-address" | "conversion-error" | "echo-reply" | "echo"
| "information-reply" | "information-request" | "mask-reply" | "mask-request"
| "mobile-redirect" | "parameter-problem" | "redirect" | "router-advertisement"
| "router-solicitation" | "source-quench" | "time-exceeded" | "timestamp-reply"
| "timestamp-request" | "traceroute" | "unreachable"
CONNECTION_TYPE:
"established"
LAYER3_OPTIONS:
"fragments" | "packet-too-big"
PORT_ID :
/\S+/
PORT_GT :
/\S+/
{
bless {__VALUE__=>"$item[1] 65535"}, $item[0]
}
PORT_LT :
/\S+/
{
bless {__VALUE__=>"1 $item[1]"}, $item[0]
}
PORT_RANGE :
/\S+ \S+/
ACTIONS :
"permit"
| "deny"
REMARKS :
/.*$/
LOG_LEVEL :
/\d+/ | "emergencies" | "alerts" | "critical" | "errors"
| "warnings" | "notifications" | "informational" | "debugging"
| "disable"
EOL :
/$/
};
return $grammar;
}
1;
Performance problems:
address
in acl_dst_ip
, IPRANGE
in acl_dst_ip
) access_list
)Functionality problems:
remarkfoo
as remark
. Similar mistakes elsewhere.0.0.127.4 0.0.127.255
as "from 0.0.127.4 to 0.0.127.255" and as "from 0.0.0.0 to 0.0.127.255". (The first one found win, so its treated as "from 0.0.0.0 to 0.0.127.255".) The distinction shouldn't even be made in the parser.I started fixing up your code. (COMPLETELY UNTESTED)
# make_parser.pl
use strict;
use warnings;
use Parse::RecDescent qw( );
my $grammar = <<'__EOI__';
{
use strict;
use warnings;
use Socket qw( inet_aton );
my %protocol_names = map { $_ => 1 } qw(
ahp eigrp esp gre icmp icmp6 igmp
igrp ip ipinip ipsec nos ospf pcp
pim pptp snp tcp udp
);
my %protocol_group_names = map { $_ => 1 } qw(
tcp-udp tcp udp
);
my %icmp_type_names = map { $_ => 1 } qw(
alternate-address conversion-error echo-reply echo
information-reply information-request mask-reply mask-request
mobile-redirect parameter-problem redirect router-advertisement
router-solicitation source-quench time-exceeded timestamp-reply
timestamp-request traceroute unreachable
);
sub parse_ipv4_addr {
my ($addr) = @_;
return inet_aton($addr);
}
}
parse : <skip: qr/[ \t]*/> line(s) /\Z/ { $item[2] }
line : line_body /\n|\Z/ { $item[1] }
line_body : PERMIT <commit> permit_deny_args { [ $item[1], $item[3] ] }
| DENY <commit> permit_deny_args { [ $item[1], $item[3] ] }
| REMARK <commit> /[^\n]*/ { 0 }
| /[ \t]+/ { 0 }
permit_deny_args : protocol permit_deny_src permit_deny_dst { [ @item[1,2,3] ] }
permit_deny_src : addrs ports { [ @item[1, 2] ] }
permit_deny_dst : ...
addrs : HOST <commit> ( IPv4_ADDR | DOMAIN ) { [ host => $item[3] ] }
| IPv4_ADDR <commit> IPv4_ADDR { [ range => $item[1], $item[3] ] }
| ANY <commit> { [ any => ] }
ports : EQ <commit> IDENT { [ permit => $item[2], $item[2] ] }
| NEQ <commit> IDENT { [ deny => $item[2], $item[2] ] }
| GT <commit> IDENT { [ deny => 1, $item[2] ] }
| LT <commit> IDENT { [ deny => $item[2], 65535 ] }
| RANGE <commit> IDENT IDENT { [ permit => $item[2], $item[3] ] }
| { [ permit => 1, 65535 ] }
# Rules that match simply return what they match (i.e. no type info is returned).
PROTOCOL_NAME : IDENT { $protocol_names{$item[1]} ? $item[1] : undef }
DOMAIN : ...
IPv4_ADDR : /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ { parse_ipv4_addr($item[1]) }
# Keywords
REMARK : IDENT { $item[1] eq 'remark' ? $item[1] : undef }
PERMIT : IDENT { $item[1] eq 'permit' ? $item[1] : undef }
DENY : IDENT { $item[1] eq 'deny' ? $item[1] : undef }
ANY : IDENT { $item[1] eq 'any' ? $item[1] : undef }
EQ : IDENT { $item[1] eq 'eq' ? $item[1] : undef }
NEQ : IDENT { $item[1] eq 'neq' ? $item[1] : undef }
LT : IDENT { $item[1] eq 'lt' ? $item[1] : undef }
GT : IDENT { $item[1] eq 'gt' ? $item[1] : undef }
IDENT : /[a-zA-Z][a-zA-Z0-9_]*/
__EOI__
Parse::RecDescent->Precompile($grammar, 'Parser')
or die("Bad grammar\n");
Run the above file, then you can use the parse as follows:
# test.pl
use strict;
use warnings;
use Data::Dumper qw( Dumper );
use Parser qw( );
my $text = '...';
my $parser = Parser->new();
print(Dumper($parser->parse($text)));