I'm trying to make plots of glmnet (lasso
) model in R from list of model objects.
My data looks like (original data is large)
head(SLE28sy_w20_dat3)
# A tibble: 6 × 31
EE86222ln1 EE86223ln1 EE86224ln1 EE86225ln1 blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.00809 0.00357 0.00146 0.00228 0 1 0 1
2 0.00437 0.00212 0.00156 0.00197 0 1 0 1
3 0.00437 0.00303 0.00172 0.00237 1 0 0 1
4 0.00833 0.00303 0.00126 0.00217 0 1 0 1
5 0.00833 0.00316 0.00165 0.00217 1 1 0 1
6 0.00833 0.00236 0.00134 0.00189 1 0 0 1
I created a list of lasso
models by below code,
j <- SLE28sy_w20_dat3[, c(1:4)]
l <- data.matrix(SLE28sy_w20_dat3[, -c(1:4)])
for (i in 1:ncol(j)) {
las_glmnet <- glmnet(l, data.matrix(j[, i]))
assign(paste0("glmnet_lasso_", names(j)[i]), las_glmnet, envir = .GlobalEnv)
}
Now, I'm trying to make plots of models from every samples, how can I do this...
What I did so far,
las_sam<-list(glmnet_lasso_EE86222ln1, glmnet_lasso_EE86223ln1,
glmnet_lasso_EE86224ln1, glmnet_lasso_EE86225ln1)
for (i in seq_along(las_sam)){
betas = as.matrix(las_sam[[i]]$beta)
lambdas = las_sam[[i]]$lambda
names(lambdas) = colnames(betas)
plot_las<- as.data.frame(betas) %>%
tibble::rownames_to_column("variable") %>%
pivot_longer(-variable) %>%
mutate(lambda=lambdas[name]) %>%
ggplot(aes(x=lambda,y=value,col=variable)) +
geom_line() +
geom_label_repel(data=~subset(.x,lambda==min(lambda)), size = 2.5,
aes(label=variable),nudge_x=-0.8) +
theme(legend.position="none") +
scale_x_log10()
assign(paste0("Plot_lasso_", names(las_sam)[i]), plot_las, envir = .GlobalEnv)
}
By this I'm only getting the plot of last sample's model which is glmnet_lasso_EE86225ln1
. That is, the loop is functional, but it is unable to store the image objects with distinct names. Perhaps the last line of the code is where I'm going wrong.
My end goal is to make a separate image in PNG or TIFF format for every sample's model.
What about this "tidy-style" solution?
facet
ing by group would streamline the process even betterSLE28sy_w20_dat3 <-
structure(list(EE86222ln1 = c(0.00809, 0.00437, 0.00437, 0.00833,
0.00833, 0.00833), EE86223ln1 = c(0.00357, 0.00212, 0.00303,
0.00303, 0.00316, 0.00236), EE86224ln1 = c(0.00146, 0.00156,
0.00172, 0.00126, 0.00165, 0.00134), EE86225ln1 = c(0.00228,
0.00197, 0.00237, 0.00217, 0.00217, 0.00189), blood_vessel_w20 = c(0L,
0L, 1L, 0L, 1L, 1L), adrenal_gland_w20 = c(1L, 1L, 0L, 1L, 1L,
0L), bone_element_w20 = c(0L, 0L, 0L, 0L, 0L, 0L), brain_w20 = c(1L,
1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
6L))
library(glmnet)
library(tidyr)
library(dplyr)
data_long <-
SLE28sy_w20_dat3 |>
pivot_longer(cols = starts_with('EE'),
names_to = 'EE_variant',
values_to = 'Y',
)
## > data_long
## # A tibble: 24 x 6
## blood_vessel_w20 adrenal_gland_w20 bone_element_w20 brain_w20 EE_variant
## <int> <int> <int> <int> <chr>
## 1 0 1 0 1 EE86222ln1
## 2 0 1 0 1 EE86223ln1
## 3 0 1 0 1 EE86224ln1
## ...
data_nested <-
data_long |>
group_by(EE_variant) |>
nest(data = -EE_variant)
+ # A tibble: 4 x 2
# Groups: EE_variant [4]
EE_variant data
<chr> <list>
1 EE86222ln1 <tibble [6 x 5]>
2 EE86223ln1 <tibble [6 x 5]>
3 EE86224ln1 <tibble [6 x 5]>
4 EE86225ln1 <tibble [6 x 5]>
the_models <-
data_nested |>
rowwise() |>
## save the model in list-column
summarise(the_model = list(glmnet(x = data |> select(-Y) |> data.matrix(),
y = data |> select(Y) |> data.matrix()
)
)
)
## > the_models
## # A tibble: 4 x 2
## # Groups: EE_variant [4]
## EE_variant the_model
## <chr> <list>
## 1 EE86222ln1 <elnet>
## 2 EE86223ln1 <elnet>
## 3 EE86224ln1 <elnet>
## 4 EE86225ln1 <elnet>
par(mfrow = c(2, 2))
the_models |> mutate(plot(the_model[[1]], sub = EE_variant))
a ggplot variant:
the_plot_data <-
the_models |>
rowwise() |>
transmute(plot_data = cbind(lambda = the_model$lambda,
t(as.matrix(the_model$beta))
) |>
as.data.frame() |>
pivot_longer(-lambda, names_to = 'variable') |>
list()
) |>
reframe(plot_data)
## + # A tibble: 1,000 x 4
## EE_variant lambda variable value
## <chr> <dbl> <chr> <dbl>
## 1 EE86222ln1 0.000438 blood_vessel_w20 0
## 2 EE86222ln1 0.000438 adrenal_gland_w20 0
## 3 EE86222ln1 0.000438 bone_element_w20 0
## 4 EE86222ln1 0.000438 brain_w20 0
## 5 EE86222ln1 0.000399 blood_vessel_w20 0
## 6 EE86222ln1 0.000399 adrenal_gland_w20 0.0000826
the_plot_data |>
ggplot(aes(lambda, value)) +
geom_line(aes(col = variable)) +
facet_wrap(~ EE_variant)
Map
like so:
the_plot_data <-
the_plot_data |> split(the_plot_data$EE_variant)
names(the_plot_data) |>
Map(f = \(variant){
the_plot_data[[variant]] |>
ggplot(aes(lambda, value)) +
geom_line(aes(col = variable))
ggsave(filename = sprintf('plot-of-%s.png', variant))
}
)