Let's say I have a tibble
like so:
> mod_tbl
# A tibble: 46 × 3
.parsnip_engine .parsnip_mode .parsnip_fns
<chr> <chr> <chr>
1 lm regression linear_reg
2 brulee regression linear_reg
3 gee regression linear_reg
4 glm regression linear_reg
5 glmer regression linear_reg
6 glmnet regression linear_reg
7 gls regression linear_reg
8 lme regression linear_reg
9 lmer regression linear_reg
10 stan regression linear_reg
dput
output
structure(list(.parsnip_engine = c("lm", "brulee", "gee", "glm",
"glmer", "glmnet", "gls", "lme", "lmer", "stan"), .parsnip_mode = c("regression",
"regression", "regression", "regression", "regression", "regression",
"regression", "regression", "regression", "regression"), .parsnip_fns = c("linear_reg",
"linear_reg", "linear_reg", "linear_reg", "linear_reg", "linear_reg",
"linear_reg", "linear_reg", "linear_reg", "linear_reg")), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
Now I want to add the parameters as a column for each line, so I do the following:
model_tbl_with_params <- mod_tbl %>%
dplyr::mutate(
model_params = purrr::pmap(
dplyr::cur_data(),
~ list(formalArgs(..3))
)
)
This gives:
> model_tbl_with_params
# A tibble: 46 × 4
.parsnip_engine .parsnip_mode .parsnip_fns model_params
<chr> <chr> <chr> <list>
1 lm regression linear_reg <list [1]>
2 brulee regression linear_reg <list [1]>
3 gee regression linear_reg <list [1]>
4 glm regression linear_reg <list [1]>
5 glmer regression linear_reg <list [1]>
6 glmnet regression linear_reg <list [1]>
7 gls regression linear_reg <list [1]>
8 lme regression linear_reg <list [1]>
9 lmer regression linear_reg <list [1]>
10 stan regression linear_reg <list [1]>
I then add a .model_id
column as a factor and create a dplyr group_split object
mod_factor_tbl <- model_tbl_with_params %>%
dplyr::mutate(.model_id = dplyr::row_number() %>%
forcats::as_factor()) %>%
dplyr::select(.model_id, dplyr::everything())
models_list <- mod_factor_tbl %>%
dplyr::group_split(.model_id)
I then get all the parameters in the fashion I want, in this instance its to set all to tune::tune()
tuned_params_list <- models_list %>%
purrr::imap(
.f = function(obj, id){
# Pull the model params
mod_params <- obj %>% dplyr::pull(5) %>% purrr::pluck(1)
mod_params_list <- unlist(mod_params) %>% as.list()
#param_names <- unlist(mod_params)
names(mod_params_list) <- unlist(mod_params)
# Set mode and engine
p_mode <- obj %>% dplyr::pull(2) %>% purrr::pluck(1)
p_engine <- obj %>% dplyr::pull(3) %>% purrr::pluck(1)
me_list <- list(
mode = paste0("mode = ", p_mode),
engine = paste0("engine = ", p_engine)
)
# Get all other params
me_vec <- c("mode","engine")
pv <- unlist(mod_params)
params_to_modify <- pv[!pv %in% me_vec] %>% as.list()
names(params_to_modify) <- unlist(params_to_modify)
# Set each item equal to .x = tune::tune()
tuned_params_list <- purrr::map(
params_to_modify,
~ paste0(.x, " = tune::tune()")
)
# use modifyList()
res <- utils::modifyList(mod_params_list, tuned_params_list)
res <- utils::modifyList(res, me_list)
# Return
return(res)
}
)
How do I take that list object and map the arguments to a model spec?
I make the model spec as a column with a function like this:
internal_make_spec_tbl <- function(.data){
# Checks ----
df <- dplyr::as_tibble(.data)
nms <- unique(names(df))
if (!".parsnip_engine" %in% nms | !".parsnip_mode" %in% nms | !".parsnip_fns" %in% nms){
rlang::abort(
message = "The model tibble must come from the class/reg to parsnip function.",
use_cli_format = TRUE
)
}
# Make tibble ----
mod_spec_tbl <- df %>%
dplyr::mutate(
model_spec = purrr::pmap(
dplyr::cur_data(),
~ match.fun(..3)(mode = ..2, engine = ..1)
)
) %>%
# add .model_id column
dplyr::mutate(.model_id = dplyr::row_number()) %>%
dplyr::select(.model_id, dplyr::everything())
# Return ----
return(mod_spec_tbl)
}
I want to map the values into the arguments of the model spec that is created by the above function.
Here is what the output looks like:
[[46]]
[[46]]$mode
[1] "mode = kernlab"
[[46]]$engine
[1] "engine = regression"
[[46]]$cost
[1] "cost = tune::tune()"
[[46]]$rbf_sigma
[1] "rbf_sigma = tune::tune()"
[[46]]$margin
[1] "margin = tune::tune()"
As both list
s have the same length and wants to match and replace the column 'model_params' in models_list
with the corresponding list
element from tuned_params_list
, loop over the lists with map2
, extract the model_params
vector (which is stored as a nested list in models_list
column), and use that vector to extract the named matching list element from the tuned_params_list element and assign it back to model_params
column, return the data
library(purrr)
models_list2 <- map2(tuned_params_list, models_list, ~ {
.y$model_params <- list(.x[.y$model_params[[1]][[1]]])
.y})
It can be done in base R
with Map
models_list2 <- Map(\(.x, .y) {
.y$model_params <- list(.x[.y$model_params[[1]][[1]]])
.y}, tuned_params_list, models_list)