rdplyrpurrrparsnip

Update parsnip model parameters dynamically


Let's say I have a tibble like so:

> mod_tbl
# A tibble: 46 × 3
   .parsnip_engine .parsnip_mode .parsnip_fns
   <chr>           <chr>         <chr>       
 1 lm              regression    linear_reg  
 2 brulee          regression    linear_reg  
 3 gee             regression    linear_reg  
 4 glm             regression    linear_reg  
 5 glmer           regression    linear_reg  
 6 glmnet          regression    linear_reg  
 7 gls             regression    linear_reg  
 8 lme             regression    linear_reg  
 9 lmer            regression    linear_reg  
10 stan            regression    linear_reg  

dput output

structure(list(.parsnip_engine = c("lm", "brulee", "gee", "glm", 
"glmer", "glmnet", "gls", "lme", "lmer", "stan"), .parsnip_mode = c("regression", 
"regression", "regression", "regression", "regression", "regression", 
"regression", "regression", "regression", "regression"), .parsnip_fns = c("linear_reg", 
"linear_reg", "linear_reg", "linear_reg", "linear_reg", "linear_reg", 
"linear_reg", "linear_reg", "linear_reg", "linear_reg")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

Now I want to add the parameters as a column for each line, so I do the following:

model_tbl_with_params <- mod_tbl %>% 
  dplyr::mutate(
    model_params = purrr::pmap(
      dplyr::cur_data(), 
      ~ list(formalArgs(..3))
    ) 
  )

This gives:

> model_tbl_with_params
# A tibble: 46 × 4
   .parsnip_engine .parsnip_mode .parsnip_fns model_params
   <chr>           <chr>         <chr>        <list>      
 1 lm              regression    linear_reg   <list [1]>  
 2 brulee          regression    linear_reg   <list [1]>  
 3 gee             regression    linear_reg   <list [1]>  
 4 glm             regression    linear_reg   <list [1]>  
 5 glmer           regression    linear_reg   <list [1]>  
 6 glmnet          regression    linear_reg   <list [1]>  
 7 gls             regression    linear_reg   <list [1]>  
 8 lme             regression    linear_reg   <list [1]>  
 9 lmer            regression    linear_reg   <list [1]>  
10 stan            regression    linear_reg   <list [1]>  

I then add a .model_id column as a factor and create a dplyr group_split object

mod_factor_tbl <- model_tbl_with_params %>%
  dplyr::mutate(.model_id = dplyr::row_number() %>% 
                  forcats::as_factor()) %>%
  dplyr::select(.model_id, dplyr::everything())

models_list <- mod_factor_tbl %>%
  dplyr::group_split(.model_id)

I then get all the parameters in the fashion I want, in this instance its to set all to tune::tune()

tuned_params_list <- models_list %>%
  purrr::imap(
    .f = function(obj, id){
      
      # Pull the model params
      mod_params <- obj %>% dplyr::pull(5) %>% purrr::pluck(1)
      mod_params_list <- unlist(mod_params) %>% as.list()
      #param_names <- unlist(mod_params)
      names(mod_params_list) <- unlist(mod_params)
      
      # Set mode and engine
      p_mode <- obj %>% dplyr::pull(2) %>% purrr::pluck(1)
      p_engine <- obj %>% dplyr::pull(3) %>% purrr::pluck(1)
      me_list <- list(
        mode = paste0("mode = ", p_mode),
        engine = paste0("engine = ", p_engine)
      )
      
      # Get all other params
      me_vec <- c("mode","engine")
      pv <- unlist(mod_params)
      params_to_modify <- pv[!pv %in% me_vec] %>% as.list()
      names(params_to_modify) <- unlist(params_to_modify)
      
      # Set each item equal to .x = tune::tune()
      tuned_params_list <- purrr::map(
        params_to_modify,
        ~ paste0(.x, " = tune::tune()")
      )
      
      # use modifyList()
      res <- utils::modifyList(mod_params_list, tuned_params_list)
      res <- utils::modifyList(res, me_list)
      
      # Return      
      return(res)
      
    }
  )

How do I take that list object and map the arguments to a model spec?

I make the model spec as a column with a function like this:

internal_make_spec_tbl <- function(.data){

  # Checks ----
  df <- dplyr::as_tibble(.data)

  nms <- unique(names(df))

  if (!".parsnip_engine" %in% nms | !".parsnip_mode" %in% nms | !".parsnip_fns" %in% nms){
    rlang::abort(
      message = "The model tibble must come from the class/reg to parsnip function.",
      use_cli_format = TRUE
    )
  }

  # Make tibble ----
  mod_spec_tbl <- df %>%
    dplyr::mutate(
      model_spec = purrr::pmap(
        dplyr::cur_data(),
        ~ match.fun(..3)(mode = ..2, engine = ..1)
      )
    ) %>%
    # add .model_id column
    dplyr::mutate(.model_id = dplyr::row_number()) %>%
    dplyr::select(.model_id, dplyr::everything())

  # Return ----
  return(mod_spec_tbl)

}

I want to map the values into the arguments of the model spec that is created by the above function.

Here is what the output looks like:

[[46]]
[[46]]$mode
[1] "mode = kernlab"

[[46]]$engine
[1] "engine = regression"

[[46]]$cost
[1] "cost = tune::tune()"

[[46]]$rbf_sigma
[1] "rbf_sigma = tune::tune()"

[[46]]$margin
[1] "margin = tune::tune()"

Solution

  • As both lists have the same length and wants to match and replace the column 'model_params' in models_list with the corresponding list element from tuned_params_list, loop over the lists with map2, extract the model_params vector (which is stored as a nested list in models_list column), and use that vector to extract the named matching list element from the tuned_params_list element and assign it back to model_params column, return the data

    library(purrr)
    models_list2 <- map2(tuned_params_list, models_list, ~ {
         .y$model_params <- list(.x[.y$model_params[[1]][[1]]])
     .y})
    

    It can be done in base R with Map

    models_list2 <- Map(\(.x, .y) {
       .y$model_params <- list(.x[.y$model_params[[1]][[1]]])
       .y}, tuned_params_list, models_list)