perlparsingparse-recdescent

Why does my Parse::RecDescent give me all these warnings and errors?


Having a lot of pain with the following Perl file parsing code [last reply on PM @http://www.perlmonks.org/index.pl?node_id=754947] below:

#!/usr/bin/perl -w

use strict;
use warnings;
#use diagnostics;

use Parse::RecDescent;
use Data::Dumper;

# Enable warnings within the Parse::RecDescent module.

$::RD_ERRORS = 1; # Make sure the parser dies when it encounters an error
$::RD_WARN   = 1; # Enable warnings. This will warn on unused rules &c.
$::RD_HINT   = 1; # Give out hints to help fix problems.
#$::RD_TRACE  = 1; # Trace of parser

#$::AUTOSTUB = 1;

my $grammar = <<'_EOGRAMMAR_';

{
    use strict;
    use warnings;
}
#{ our $errortext = ''; our $errorprefix = '';}
RECORDSTART : /^(RECORD)[\r\n]+/
{
    #print "\n[*] RECORDSTART -> " . $item[1];
    $1;
    #$item[1];
} 

RECORDEND : /^(\.)[\r\n]*/
#/\./
{
    #print "\n[*] RECORDEND -> " . $item[1] . "\n";
    $1;
    #$item[1];
} 

fieldName : /[^ \t\n]+/
{
    #print "\n[*] fieldName -> $item[1]\n";
    $item[1];
}

metaName : /[^ \t\n]+\n?/
{
    $item[1];
}

metaFieldValue: /([^\n]*)\n/
{
    $1;
}

fieldValue : /([^\n]*)\n/
{
    #print "[*] fieldValue -> $item[1] ($1)\n";
    $1;
}

field : /^F/ fieldName fieldValue
{
    #print "[*] Got field named \'" . $item{ fieldName } . '\' with value \'' . $item{ fieldValue } . "\'\n";
    #print "[*] Got metafield named \'" . $item[2] . '\' with value \'' . $item[3] . "\'\n";
    #print Data::Dumper->Dump([$text], ["fieldStuff"]);
    $return = { fieldName => $item[2], fieldValue => $item[3]};
}

metaField : /^\#/ metaName metaFieldValue
{
    #print "[*] Got metafield named \'" . $item{ metaName } . '\' with value \'' . $item{ metaFieldValue } . "\'\n";
    #print "[*] Got metafield named \'" . $item[2] . '\' with value \'' . $item[3] . "\'\n";
    $return = { metaName => $item[2], metaFieldValue => $item[3]};
}

recordBody : field(s)
{
    print "\n[*] field(s)\n";
    #print main::Dumper \@item;
    #print Data::Dumper->Dump([@item], ["field(s)"]);
    $return = 'field(s)';
    #if((length($text) > 3) && (0 == @item))
    if(length($text) > 2)
    {
        $return = undef;
    }
}
|
metaField(s)
{
    print "\n[*] metaField(s)\n";
    #print main::Dumper \@item;
    #print Data::Dumper->Dump([@item], ["metaField(s)"]);
    $return = 'metaField(s)';
    #if((length($text) > 3) && (0 == @item))
    if(length($text) > 2)
    {
        $return = undef;
    }
}
|
<error>
#<error: I am confused in recordBody at $thisoffset!>

#startOfRecord: RECORDSTART recordBody(s /$/) RECORDEND
startOfRecord: RECORDSTART recordBody RECORDEND
#startOfRecord: RECORDSTART ( metaField(s) field(s) ) RECORDEND
#startOfRecord: RECORDSTART ( field(s) metaField(s) ) RECORDEND
{
    #print main::Dumper \@item;
    $return = 'something';
    #$return = $item[1];
    1;
}
|
#<error>
<error: I could not even parse a line line starting at $thisoffset!>
_EOGRAMMAR_

#$skeletonPattern = "#input_type[ \t]*";
#my $metaFieldPattern = qr/[ \t]*#([^ \t]+)[ \t]+(.*)/o; # "#input_type SCDR+", "#filename processed_01_20080616001403.cdr", etc
#my $normalFieldPattern = qr/([ \t]*)([0-9]*)F[ \t]+([^ \t]+)[ \t]+([^ \t\r\n]+)(.*)/; # "1F S_Diagnostic1 62" OR " F S_Diagnostic1 62" OR " F S_Diagnostic1 62" are synonymous, etc

my $testData0 = <<'_EOGTESTA_';
RECORD
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
.
_EOGTESTA_

my $testData1 = <<'_EOGTESTA_';
RECORD
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
.
_EOGTESTA_

my $testData2 = <<'_EOGTESTA_';
RECORD
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
.
_EOGTESTA_

my $testData3 = <<'_EOGTESTA_';
RECORD
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
.
_EOGTESTA_

my $testData4 = <<'_EOGTESTA_';
RECORD
#input_id 91210758171x001_0013
#output_id 
#input_type PTC
#output_type PTC
#addkey 
#source_id 01
#filename TTFILE01-0001-20080101000000
F ptc_record_length 00B6
F ptc_record_type
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
F ptc_term_mcz_change_direction 
.
_EOGTESTA_

my $parser = Parse::RecDescent->new($grammar) or die "Bad grammar!\n";;

print $testData0, "\n\n";
$parser->startOfRecord($testData0) ? print "Parsing done sucessfully!\n" : print "Bad input!\n";

print $testData1, "\n\n";
$parser->startOfRecord($testData1) ? print "Parsing done sucessfully!\n" : print "Bad input!\n";

print $testData2, "\n\n";
$parser->startOfRecord($testData2) ? print "Parsing done sucessfully!\n" : print "Bad input!\n";

print $testData3, "\n\n";
$parser->startOfRecord($testData3) ? print "Parsing done sucessfully!\n" : print "Bad input!\n";

print $testData4, "\n\n";
$parser->startOfRecord($testData4) ? print "Parsing done sucessfully!\n" : print "Bad input!\n";

#$parser->startOfRecord($testData) ? print "Parsing done sucessfully!" : die "Bad input!\n";

Output:

RECORD
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
.

[*] field(s)
Parsing done sucessfully!
RECORD
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
.

[*] metaField(s)
Parsing done sucessfully!
RECORD
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
.

[*] field(s)
Bad input!
RECORD
#input_id 91210758171x001_0013
#input_type PTC
#output_type MTC
#source_id 01
#filename TTFILE01-0001-20080101000000
#jingalama valuewith#inIt andaSpace
F ptc_record_length 00B6
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
.

[*] metaField(s)
Bad input!
RECORD
#input_id 91210758171x001_0013
#output_id 
#input_type PTC
#output_type PTC
#addkey 
#source_id 01
#filename TTFILE01-0001-20080101000000
F ptc_record_length 00B6
F ptc_record_type
F ptc_charging_start_time 20090604093721
F ptc_charging_end_time 20080604093721
F ptc_called_msrn_ton FF
F ptc_term_mcz_duration 060000
F ptc_term_mcz_change_direction 
.

[*] metaField(s)
Bad input!

Here's STDERR:

print() on closed filehandle ERROR at C:/laPerl/site/lib/Parse/RecDescent.pm line 2905.
Variable "$errortext" is not available at C:/laPerl/site/lib/Parse/RecDescent.pm line 2906.
Variable "$errorprefix" is not available at C:/laPerl/site/lib/Parse/RecDescent.pm line 2906.
Use of uninitialized value $errorprefix in formline at C:/laPerl/site/lib/Parse/RecDescent.pm line 2850.
Use of uninitialized value $errortext in formline at C:/laPerl/site/lib/Parse/RecDescent.pm line 2850.
Use of uninitialized value $errortext in formline at C:/laPerl/site/lib/Parse/RecDescent.pm line 2852.
write() on closed filehandle ERROR at C:/laPerl/site/lib/Parse/RecDescent.pm line 2906.
...

Any suggestions? I am really confused here?

Can anyone figure out what is going wrong (except the choice of ActivePerl 5.10 and WinXP SP2)?


Solution

  • I think the choice of ActivePerl on XP was just fine; the only problem is the grammar.

    Your grammar rule for recordBody says there can only be multiple fields inside, or multiple metafields, and not anything in between.

    If you need any mix of fields/metaFields, I'd suggest to create some artificial rule anyField

    anyField : field | metaField
    
    recordBody : anyField(s)