perlparallel-processing

MCE parallel processing: copy and modify an AoA


How could I change this code to make it work or is there a better approach to solve the problem?

#!/usr/bin/env perl
use warnings;
use 5.14.0;
use Data::Dumper;
use Scalar::Util qw(looks_like_number);
use MCE;

sub get_string {
    my @chars = ( 'a'..'z', 'A'..'Z', '0'..'9', ' ' );
    return join '', map { @chars[rand @chars] } 1 .. rand( 20 ) + 1;
}

my $table = [];
for my $row ( 0 .. 9 ) {
    $table->[$row] = [ map { get_string } 0 .. 5 ];
}

my $modified_table = [];
my $data_type = [];
my $max_length = 0;

my $mce = MCE->new(
    chunk_size  => 5,
    input_data  => $table,
    user_func   => sub {
        my ( $mce, $chunk_ref, $chunk_id ) = @_;
        for my $row ( 0 .. $#$chunk_ref ) {
            for my $col ( 0 .. $#{$chunk_ref->[$row]} ) {
                $modified_table->[?][$col] = $chunk_ref->[$row][$col] =~ s/\s/_/gr;
                $data_type->[?][$col] = looks_like_number $modified_table->[?][$col] ? 1 : 0;
                if ( length( $modified_table->[?][$col] ) > $max_length ) {
                    $max_length = length $modified_table->[?][$col];
                }
            }
        }
    },
)->run();


say Dumper $modified_table;
say Dumper $data_type;
say $max_length;

Without parallel processing id would look like:


for my $row ( 0 .. $#$table ) {
    for my $col ( 0 .. $#{$table->[$row]} ) {
        $modified_table->[$row][$col] = $table->[$row][$col] =~ s/\s/_/gr;
        $data_type->[$row][$col] = looks_like_number $modified_table->[$row][$col] ? 1 : 0;
        if ( length( $modified_table->[$row][$col] ) > $max_length ) {
            $max_length = length $modified_table->[$row][$col];
        }
    }
}

Solution

  • MCE::Candy lacks the ability to call a callback function, orderly. I will add the capability in the next MCE release. The following uses MCE::Relay for orderly action, and passing results via MCE->do.

    #!/usr/bin/env perl
    use warnings;
    use 5.14.0;
    use Data::Dumper;
    use Scalar::Util qw(looks_like_number);
    use MCE;
    
    srand(963);
    
    sub get_string {
        my @chars = ( 'a'..'z', 'A'..'Z', '0'..'9', ' ' );
        return join '', map { @chars[rand @chars] } 1 .. rand( 20 ) + 1;
    }
    
    my $table = [];
    for my $row ( 0 .. 9 ) {
        $table->[$row] = [ map { get_string } 0 .. 5 ];
    }
    
    my $modified_table = [];
    my $data_type = [];
    my $max_length = 0;
    
    sub upd_vars {
        push @{$modified_table}, @{$_[0]};
        push @{$data_type}, @{$_[1]};
        $max_length = $_[2] if ($_[2] > $max_length);
    }
    
    my $mce = MCE->new(
        max_workers => 2,
        chunk_size  => 3,
        input_data  => $table,
        init_relay  => 0, # loads MCE::Relay if defined
        user_func   => sub {
            my ( $mce, $chunk_ref, $chunk_id ) = @_;
            ## Local variables, inside child
            my $modified_table = [];
            my $data_type = [];
            my $max_length = 0;
            for my $row ( 0 .. $#$chunk_ref ) {
                for my $col ( 0 .. $#{$chunk_ref->[$row]} ) {
                    $modified_table->[$row][$col] = $chunk_ref->[$row][$col] =~ s/\s/_/gr;
                    $data_type->[$row][$col] = looks_like_number $modified_table->[$row][$col] ? 1 : 0;
                    if ( length( $modified_table->[$row][$col] ) > $max_length ) {
                        $max_length = length $modified_table->[$row][$col];
                    }
                }
            }
            MCE::relay {
                # Append/update parent vars, orderly
                $mce->do('upd_vars', $modified_table, $data_type, $max_length);
            };
        },
    )->run();
    
    say Dumper $modified_table;
    say Dumper $data_type;
    say $max_length;
    

    Edit:

    MCE v1.897 adds gather out_iter_callback capability to MCE::Candy.

    #!/usr/bin/env perl
    use warnings;
    use 5.14.0;
    use Data::Dumper;
    use Scalar::Util qw(looks_like_number);
    use MCE;
    use MCE::Candy;
    
    srand(963);
    
    sub get_string {
        my @chars = ( 'a'..'z', 'A'..'Z', '0'..'9', ' ' );
        return join '', map { @chars[rand @chars] } 1 .. rand( 20 ) + 1;
    }
    
    my $table = [];
    for my $row ( 0 .. 9 ) {
        $table->[$row] = [ map { get_string } 0 .. 5 ];
    }
    
    my $modified_table = [];
    my $data_type = [];
    my $max_length = 0;
    
    sub upd_vars {
        push @{$modified_table}, @{$_[0]};
        push @{$data_type}, @{$_[1]};
        $max_length = $_[2] if ($_[2] > $max_length);
    }
    
    my $mce = MCE->new(
        max_workers => 2,
        chunk_size  => 3,
        input_data  => $table,
        gather      => MCE::Candy::out_iter_callback( \&upd_vars ),
        user_func   => sub {
            my ( $mce, $chunk_ref, $chunk_id ) = @_;
            ## Local variables, inside child
            my $modified_table = [];
            my $data_type = [];
            my $max_length = 0;
            for my $row ( 0 .. $#$chunk_ref ) {
                for my $col ( 0 .. $#{$chunk_ref->[$row]} ) {
                    $modified_table->[$row][$col] = $chunk_ref->[$row][$col] =~ s/\s/_/gr;
                    $data_type->[$row][$col] = looks_like_number $modified_table->[$row][$col] ? 1 : 0;
                    if ( length( $modified_table->[$row][$col] ) > $max_length ) {
                        $max_length = length $modified_table->[$row][$col];
                    }
                }
            }
            ## Orderly gather capability requires calling gather with chunk_id value.
            $mce->gather($chunk_id, $modified_table, $data_type, $max_length);
        },
    )->run();
    
    say Dumper $modified_table;
    say Dumper $data_type;
    say $max_length;