perlwin32ole

Perl extracting multiple values out of thousands of Excel Files and write each dataset into txt file


I am using Perl 5.24.3 on Windows 7 with Strawberry Perl. I am currently writing a script that searches thousands of Excel files for certain values in the same folder. These values are extracted/read from each Excel file and saved in their corresponding variables. After that, I want to write the variable values into a .txt file.

My problem is that the perl script crashes at Excel file 400ish. I cannot run it when using use strict but it will run if I leave it out. Unfortunately not till the end. How can I optimize my script so it won't crash? How do I need to change my code so that I can run the script WITH use strict?

use diagnostics returns the following:

Use of uninitialized value in string eq at auswertungLieferscheine.pl line 112(#1), line 117, 122, 127, ...

Here is the code:

#use strict;
#use warnings "all";
use Cwd 'abs_path';
use Win32::OLE;
#use diagnostics;

lager();

sub lager
{
    my @xlsm;
    my $strGebLS = "Z:\\User\\Projekte\\Fertigung\\Lieferscheintool\\lieferschein_lager\\bearbeitete Lieferscheine\\gebuchte Lieferscheine\\";
    chdir $strGebLS;

    opendir(DIR, $strGebLS) or die $!;

    while(my $lieferschein = readdir(DIR))
    {   
        next if ($lieferschein !~ m/2018\.xlsm$/);
        push(@xlsm, $lieferschein);
    }
    closedir(DIR);

    my $excel = Win32::OLE->new('Excel.Application', 'Quit') or die $!;
    $excel->{'DisplayAlerts'} = 0;
    $excel->{'Visible'} = 0;

    foreach my $lsDatei(@xlsm)
    {   
    my $absPfad = abs_path("$lsDatei") or die "Fehler: Die Datei $lsDatei wurde nicht gefunden\n";
        my $arbeitsmappe = $excel->Workbooks->Open($absPfad, {
            'ReadOnly' => 1,
            'IgnoreReadOnlyRecommended' => 1
          });
        my $sheet = $arbeitsmappe->Worksheets("LieferscheinBauleitung");

        # THESE VALUES ARE ALWAYS IN THE SAME CELL OR PREDEFINED IF CELLS EMPTY!
        my $lagerort = "Lager";
        my $lsNr = $sheet->Range("C5")->{Value};
        my $status = "gebucht";
        my $bv = $sheet->Range("C7")->{Value};
        my $hv = $sheet->Range("G7")->{Value};
        my $grund = "Kein Anforderungsgrund";
        my $besteller = "Kein Besteller";
        my $abholer = "Kein Abholer";
        my $bezeichnung ="Keine Bezeichnung";
        my $menge = "0";
        my $einzelpreis = "0.0";
        my $summe = "0.0";

        #THESE VALUES ARE IN DIFFERENT CELLS 
#BUT ALWAYS IN 1 out of 2 POSSIBLE COLUMNS! 
#THAT IS WHY I AM USING A FOR LOOP HERE... 
#THE IF LINES RETURN UNDECLARED WARNINGS FOR VARIABLES AND I DONT KNOW WHY! WHY?! #$sheet and $zeile are both defined, aren't they?!
        for (my $zeile=4; $zeile<=25;$zeile++)
        {           
            if($sheet->Range("G" . $zeile)->{Value} eq "Grund")
            {
                $grund = $sheet->Range("G" . $zeile + 2)->{Value};
            }

            if($sheet->Range("A" . $zeile)->{Value} eq "Anforderungsgrund")
            {
                $grund = $sheet->Range("C" . $zeile)->{Value};
            }

            if($sheet->Range("A" . $zeile)->{Value} eq "Besteller")
            {
                $besteller = $sheet->Range("C" . $zeile)->{Value};  
            }

            if($sheet->Range("A" . $zeile)->{Value} eq "Abholer")
            {
                $abholer = $sheet->Range("C" . $zeile)->{Value};
            }   

            if($sheet->Range("B" . $zeile)->{Value} eq "Bezeichnung")
            {
                $bezeichnung = $sheet->Range("B" . ($zeile + 2))->{Value};
            }
            if($sheet->Range("A" . $zeile)->{Value} eq "Menge")
            {
                $menge = $sheet->Range("A" . ($zeile + 2))->{Value};
            }
            if($sheet->Range("H" . $zeile)->{Value} eq "Einzelpreis")
            {
                $einzelpreis = $sheet->Range("H" . ($zeile + 2))->{Value};
            }
            if($sheet->Range("I" . $zeile)->{Value} eq "Summe")
            {
                $summe = $sheet->Range("I" . ($zeile + 2))->{Value};
            }       
        }   
        $arbeitsmappe->Close();
        $excel->Quit();

        if($besteller eq "") 
        {
            $besteller = "Kein Besteller";
        }
        if($abholer eq "")
        {
            $abholer = "Kein Abholer";
        }
        if($grund eq "")
        {
            $grund = "Kein Anforderungsgrund";
        }

        # WRITE VALUES IN VARIABLES INTO TXT-FILE

        if(substr($lsDatei, -9, 4) eq "2018")
        {       
            local $auswertung = "auswertungLieferscheine_2018.txt";     
            local $auswertungPfad = "Z:\\User\\Projekte\\Fertigung\\Lieferscheintool\\lieferschein_auswertung\\$auswertung";
            local $data = $lagerort . "\t" . $lsNr . "\t" . $status . "\t" . $bv . "\t" . $hv . "\t" . $grund . "\t" . $besteller . "\t" . $abholer . "\t" . $bezeichnung . "\t" . $menge . "\t" . $einzelpreis . "\t" . $summe . "\n";
            open(my $fh, '>>', $auswertungPfad) or die $!;

            # PRINT DATASET INTO TXT FILE               
            print $fh $data;
            close $fh;
        }
    }   
}

The warnings occur in the for-loop where it says if($sheet....eq "something"). Alle if lines are concerned. Any solutions?


Solution

  • Here's a possible bug:

    $grund = $sheet->Range("G" . $zeile + 2)->{Value};
    

    makes no sense. The . and + operators have the same precedence (and are left associative), so "G" . $zeile + 2 parses as ("G" . $zeile) + 2. "G..." converted to a number is 0 (and produces a warning), so this effectively calls $sheet->Range(2).

    You probably want

    $grund = $sheet->Range("G" . ($zeile + 2))->{Value};
    

    instead.