rtidymodels

R tidymodels: how to pass argument `initial` using workflow_map()?


I am trying to pass the results from tune_grid() into the argument initial of tune_bayes(). This works when calling directly tune_bayes() (see example in ?tune_bayes). However, I don't see how I can pass the initial argument when I use workflow_map()? I get the error message:

Error in check_initial(): ! initial should be a positive integer or the results of [tune_grid()]

This error message is quite understandable, since the output of workflow_map() is a workflow set, i.e. a tibble that will contain in the column result each output from tune_grid().

So how can I pass the workflow set output from tune_grid <- workflow_map("tune_grid") into workflow_map("tune_bayes", initial =tune_grid) ?

Reproducible example:

library(tidymodels)

# Load and prepare data
ames_data <- ames[,sapply(ames, class) %in% c("integer", "numeric")]

# Define a recipe
recipe <- recipe(Sale_Price ~ ., data = ames_data) %>%
  step_normalize(all_predictors())

# Define models
lasso_model <- linear_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")

rf_model <- rand_forest(min_n = tune(), trees = 500) %>%
  set_engine("ranger") %>%
  set_mode("regression")

# Create workflows
lasso_wf <- workflow() %>%
  add_model(lasso_model) %>%
  add_recipe(recipe)

rf_wf <- workflow() %>%
  add_model(rf_model) %>%
  add_recipe(recipe)

cross_val <- vfold_cv(ames_data, v = 5)


tune_grid <- workflow_set(
  preproc = list(recipe),
  models = list(lasso = lasso_model, rf = rf_model)) %>%
  workflow_map("tune_grid", resamples = cross_val, grid = 25)

tune_grid
#> # A workflow set/tibble: 2 × 4
#>   wflow_id     info             option    result   
#>   <chr>        <list>           <list>    <list>   
#> 1 recipe_lasso <tibble [1 × 4]> <opts[2]> <tune[+]>
#> 2 recipe_rf    <tibble [1 × 4]> <opts[2]> <tune[+]>

## now bayes
tune_bayes <- workflow_set(
  preproc = list(recipe),
  models = list(lasso = lasso_model, rf = rf_model)
) %>%
  workflow_map("tune_bayes", resamples = cross_val, initial =tune_grid)

tune_bayes$result[[1]]
#> [1] "Error in check_initial(initial, pset = param_info, wflow = object, resamples = resamples,  : \n  `initial` should be a positive integer or the results of [tune_grid()]\n"
#> attr(,"class")
#> [1] "try-error"
#> attr(,"condition")
#> <error/rlang_error>
#> Error in `check_initial()`:
#> ! `initial` should be a positive integer or the results of [tune_grid()]

Created on 2025-03-19 with reprex v2.1.1


Solution

  • You'll need to use option_add() to put each previous result into the option column of the workflow set.

    Here is some code, with the main changes at the end:

    library(tidymodels)
    
    # Load and prepare data
    ames_data <- ames[,sapply(ames, class) %in% c("integer", "numeric")]
    
    # Define a recipe
    recipe <- recipe(Sale_Price ~ ., data = ames_data) %>%
      step_normalize(all_predictors())
    
    # Define models
    lasso_model <- linear_reg(penalty = tune(), mixture = 1) %>%
      set_engine("glmnet")
    
    rf_model <- rand_forest(min_n = tune(), trees = 500) %>%
      set_engine("ranger") %>%
      set_mode("regression")
    
    # Create workflows
    lasso_wf <- workflow() %>%
      add_model(lasso_model) %>%
      add_recipe(recipe)
    
    rf_wf <- workflow() %>%
      add_model(rf_model) %>%
      add_recipe(recipe)
    
    cross_val <- vfold_cv(ames_data, v = 5)
    
    tune_grid_res <- 
      workflow_set(
        preproc = list(recipe),
        models = list(lasso = lasso_model, rf = rf_model)) %>%
      workflow_map("tune_grid", resamples = cross_val, grid = 25)
    
    tune_bayes_res <- 
      workflow_set(
        preproc = list(recipe),
        models = list(lasso = lasso_model, rf = rf_model)
      ) %>%
      option_add(
        id = "recipe_lasso",
        initial = tune_grid_res %>% extract_workflow_set_result("recipe_lasso")
      ) %>% 
      option_add(
        id = "recipe_rf",
        initial = tune_grid_res %>% extract_workflow_set_result("recipe_rf")
      ) %>%  
      workflow_map("tune_bayes", resamples = cross_val)