excelperlperl5.8perl5.10

How to generate output files of excel format for each column of A i.e output files for App and Data each using perl code


This is the input file which is of excel format:

A B C D
APP 1 210101 8.1
APP 2 210102 8.2
APP 3 210103 8.3
Data 4 210104 8.4
Data 5 210105 8.5
Data 6 210106 8.6

How to generate output files of excel in below way using Perl?

output file 1 (contains only data from rows of app):

A B C
1 210101 8.1
2 210102 8.2
3 210103 8.3

Output file 2 (contains only data from rows of data):

A B C
4 210104 8.4
5 210105 8.5
6 210106 8.6

Solution

  • Here is an example of how you can split the input file into multiple output files based on the value in column 0 of the input file:

    package Main;
    use v5.22.0;         # experimental signatures requires perl >= 5.22
    use feature qw(say);
    use strict;
    use warnings;
    use experimental qw(signatures);
    use Spreadsheet::ParseXLSX;
    use Excel::CloneXLSX::Format qw(translate_xlsx_format);
    use Excel::Writer::XLSX;
    
    {
        my $self = Main->new(
            input_file    => 'input.xlsx',
            output_prefix => 'out',
        );
        my $worksheet = $self->scan_input_file();
        $self->open_output_files();
        my ( $row_min, $row_max ) = $worksheet->row_range();
        my $col0 = 0; # column number that contains the save type
        my @cols_to_save = (1..3);
        for my $row ( $row_min .. $row_max ) {
            my $cell = $worksheet->get_cell( $row, $col0 );
            my $save_type = $cell->unformatted();
            my $row = $self->get_row($worksheet, $row, \@cols_to_save);
            $self->save_row( $save_type, $row);
        }
        $self->close_output_files();
        say "Done.";
    }
    
    
    sub close_output_files( $self  ) {
        for my $file (keys %{$self->{files}}) {
            my $workbook = $self->{files}{$file}{workbook};
            $workbook->close();
        }
    }
    
    sub save_row( $self, $save_type, $cells ) {
        my $file = $self->{save_types}{$save_type};
        my $workbook = $self->{files}{$file}{workbook};
        my $worksheet = $self->{files}{$file}{worksheet};
        my $row = $self->{files}{$file}{row};
        my $col = 0;
        for my $cell (@$cells) {
            my $fmt = $cell->get_format();
            my $fmt_props  = translate_xlsx_format( $fmt );
            my $new_format = $workbook->add_format(%$fmt_props);
            my $value = $cell->unformatted() || '';
            $worksheet->write($row, $col, $value, $new_format);
            $col++;
        }
        $self->{files}{$file}{row}++;
    }
    
    sub get_row( $self, $worksheet, $row, $cols_to_save ) {
        my @row;
        for my $col (@$cols_to_save) {
            my $cell = $worksheet->get_cell( $row, $col );
            push @row, $cell;
        }
        return \@row;
    }
    
    sub new( $class, %args ) { bless \%args, $class }
    
    sub scan_input_file( $self ) {
        my $parser = Spreadsheet::ParseXLSX->new;
        my $workbook = $parser->parse($self->{input_file});
        my $worksheet = $workbook->worksheet(0);
        my ( $row_min, $row_max ) = $worksheet->row_range();
    
        my %save_types;
        for my $row ( $row_min .. $row_max ) {
            my $col0 = 0;
            my $cell0 = $worksheet->get_cell( $row, $col0 );
            my $save_type = $cell0->unformatted();
            $save_types{$save_type} = 1;
        }
        $self->{save_types} = \%save_types;
        return $worksheet;
    }
    
    sub open_output_files( $self  ) {
        my $save_types = $self->{save_types};
        my $prefix = $self->{output_prefix};
        my %file_info;
        for my $type (keys %$save_types) {
            my $fn = $prefix . "_" . $type . '.xlsx';
            $save_types->{$type} = $fn;
            my $workbook = Excel::Writer::XLSX->new( $fn );
            my $worksheet = $workbook->add_worksheet();
            $file_info{$fn} = {
                workbook  => $workbook,
                worksheet => $worksheet,
                row       => 0,  # current row number
            };
        }
        $self->{files} = \%file_info;
    }