rfor-loopggplot2glmnet

How to get distinct ggplot2 plot from list of glmnet model in R?


I'm trying to make plots of glmnet (lasso) model in R from list of model objects.

My data looks like (original data is large)

head(SLE28sy_w20_dat3)
# A tibble: 6 × 31
  EE86222ln1 EE86223ln1 EE86224ln1 EE86225ln1 blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20
       <dbl>      <dbl>      <dbl>      <dbl>            <dbl>             <dbl>            <dbl>     <dbl>
1    0.00809    0.00357    0.00146    0.00228                0                 1                0         1
2    0.00437    0.00212    0.00156    0.00197                0                 1                0         1
3    0.00437    0.00303    0.00172    0.00237                1                 0                0         1
4    0.00833    0.00303    0.00126    0.00217                0                 1                0         1
5    0.00833    0.00316    0.00165    0.00217                1                 1                0         1
6    0.00833    0.00236    0.00134    0.00189                1                 0                0         1

I created a list of lasso models by below code,

  j <- SLE28sy_w20_dat3[, c(1:4)]
  l <- data.matrix(SLE28sy_w20_dat3[, -c(1:4)])
  
  for (i in 1:ncol(j)) {
    las_glmnet <- glmnet(l, data.matrix(j[, i]))
    assign(paste0("glmnet_lasso_", names(j)[i]), las_glmnet, envir = .GlobalEnv)
  }

Now, I'm trying to make plots of models from every samples, how can I do this...

What I did so far,

  las_sam<-list(glmnet_lasso_EE86222ln1, glmnet_lasso_EE86223ln1,
       glmnet_lasso_EE86224ln1, glmnet_lasso_EE86225ln1)
  
  for (i in seq_along(las_sam)){
    
    betas = as.matrix(las_sam[[i]]$beta)
    lambdas = las_sam[[i]]$lambda
    names(lambdas) = colnames(betas)

 plot_las<- as.data.frame(betas) %>% 
      tibble::rownames_to_column("variable") %>% 
      pivot_longer(-variable) %>% 
      mutate(lambda=lambdas[name]) %>% 
      ggplot(aes(x=lambda,y=value,col=variable)) + 
      geom_line() + 
      geom_label_repel(data=~subset(.x,lambda==min(lambda)), size = 2.5,
                       aes(label=variable),nudge_x=-0.8) +
      theme(legend.position="none")   +
      scale_x_log10()
 assign(paste0("Plot_lasso_", names(las_sam)[i]), plot_las, envir = .GlobalEnv)
    
  }  

By this I'm only getting the plot of last sample's model which is glmnet_lasso_EE86225ln1. That is, the loop is functional, but it is unable to store the image objects with distinct names. Perhaps the last line of the code is where I'm going wrong. My end goal is to make a separate image in PNG or TIFF format for every sample's model.


Solution

  • What about this "tidy-style" solution?


    SLE28sy_w20_dat3 <- 
    structure(list(EE86222ln1 = c(0.00809, 0.00437, 0.00437, 0.00833, 
    0.00833, 0.00833), EE86223ln1 = c(0.00357, 0.00212, 0.00303, 
    0.00303, 0.00316, 0.00236), EE86224ln1 = c(0.00146, 0.00156, 
    0.00172, 0.00126, 0.00165, 0.00134), EE86225ln1 = c(0.00228, 
    0.00197, 0.00237, 0.00217, 0.00217, 0.00189), blood_vessel_w20 = c(0L, 
    0L, 1L, 0L, 1L, 1L), adrenal_gland_w20 = c(1L, 1L, 0L, 1L, 1L, 
    0L), bone_element_w20 = c(0L, 0L, 0L, 0L, 0L, 0L), brain_w20 = c(1L, 
    1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA, 
    6L))
    
        library(glmnet)
        library(tidyr)
        library(dplyr)
    
        data_long <- 
          SLE28sy_w20_dat3 |>
          pivot_longer(cols = starts_with('EE'), 
                       names_to = 'EE_variant',
                       values_to = 'Y',
                       )
        
        ## > data_long
        ## # A tibble: 24 x 6
        ##    blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20 EE_variant
        ##               <int>             <int>            <int>     <int> <chr>     
        ##  1                0                 1                0         1 EE86222ln1
        ##  2                0                 1                0         1 EE86223ln1
        ##  3                0                 1                0         1 EE86224ln1
        ## ...
    
        data_nested <- 
          data_long |>
          group_by(EE_variant) |>
          nest(data = -EE_variant)
        
        + # A tibble: 4 x 2
        # Groups:   EE_variant [4]
          EE_variant data            
          <chr>      <list>          
        1 EE86222ln1 <tibble [6 x 5]>
        2 EE86223ln1 <tibble [6 x 5]>
        3 EE86224ln1 <tibble [6 x 5]>
        4 EE86225ln1 <tibble [6 x 5]>
    
        the_models <- 
          data_nested |>
          rowwise() |>
          ## save the model in list-column
          summarise(the_model = list(glmnet(x = data |> select(-Y) |> data.matrix(),
                                            y = data |> select(Y) |> data.matrix()
                                            )
                                     )
                    )
        ## > the_models
        ## # A tibble: 4 x 2
        ## # Groups:   EE_variant [4]
        ##   EE_variant the_model
        ##   <chr>      <list>   
        ## 1 EE86222ln1 <elnet>  
        ## 2 EE86223ln1 <elnet>  
        ## 3 EE86224ln1 <elnet>  
        ## 4 EE86225ln1 <elnet> 
    
    par(mfrow = c(2, 2))
    
    the_models |> mutate(plot(the_model[[1]], sub = EE_variant))
    

    four models plottet in one go


    a ggplot variant:

    
        the_plot_data <- 
          the_models |>
          rowwise() |>
          transmute(plot_data = cbind(lambda = the_model$lambda,
                                      t(as.matrix(the_model$beta))
                                      ) |>
                      as.data.frame() |>
                      pivot_longer(-lambda, names_to = 'variable') |>
                      list()
                    ) |>
          reframe(plot_data)
    
    
        ## + # A tibble: 1,000 x 4
        ##    EE_variant   lambda variable              value
        ##    <chr>         <dbl> <chr>                 <dbl>
        ##  1 EE86222ln1 0.000438 blood_vessel_w20  0        
        ##  2 EE86222ln1 0.000438 adrenal_gland_w20 0        
        ##  3 EE86222ln1 0.000438 bone_element_w20  0        
        ##  4 EE86222ln1 0.000438 brain_w20         0        
        ##  5 EE86222ln1 0.000399 blood_vessel_w20  0        
        ##  6 EE86222ln1 0.000399 adrenal_gland_w20 0.0000826
    
    the_plot_data |>
      ggplot(aes(lambda, value)) +
      geom_line(aes(col = variable)) +
      facet_wrap(~ EE_variant)
    

    facetted glmnet results

    
        the_plot_data <- 
          the_plot_data |> split(the_plot_data$EE_variant)
        
        names(the_plot_data) |>
          Map(f = \(variant){ 
            the_plot_data[[variant]] |>
              ggplot(aes(lambda, value)) +
              geom_line(aes(col = variable))
            ggsave(filename = sprintf('plot-of-%s.png', variant))
          }
          )