I've prepared a custom recipe step that works when parameter tuning is run sequentially, but fails when attempting to run in parallel.
I've tried doParallel psock, doFuture cluster, and doFuture multisession specifications and get a similar error
show_notes(.Last.tune.result)
#> unique notes:
#> ────────────────────────────────────────────────────────────────────────────────
#> Error in `recipes::prep()`:
#> ! You cannot `prep()` a tuneable recipe. Argument(s) with `tune()`: 'n_features'. Do you want to use a tuning function such as `tune_grid()`?
I suspect this may be related to the parallel backends on a Windows machine not receiving global variables/objects (described here and here). I'm not certain this is what is happening here, and if it is, I don't know what exactly is missing from the backend (e.g., variables, other model objects, functions defined in the custom step function, the custom step function itself).
I'm looking for help on understanding what's going on here and how to control these things when trying to tune in parallel when using a custom recipe step.
If its the functions that are missing, is there a way to pass them directly to the backends or do I need to put them in a package to call in the backend options?
Here's a reproducible example.
Create the step function:
# HOUSEKEEPING ####
rm(list = ls(all = TRUE)) # clean house
# CRAN libraries
library(tidyverse) # install.packages("tidyverse")
library(tidymodels) # install.packages("tidymodels")
library(finetune) # install.packages("finetune")
#> Warning: package 'finetune' was built under R version 4.3.2
library(doParallel) # install.packages("finetune")
#> Warning: package 'doParallel' was built under R version 4.3.2
#> Loading required package: foreach
#>
#> Attaching package: 'foreach'
#> The following objects are masked from 'package:purrr':
#>
#> accumulate, when
#> Loading required package: iterators
#> Loading required package: parallel
library(reprex) # install.packages("reprex")
#> Warning: package 'reprex' was built under R version 4.3.2
# Bioconductor libraries
library(limma) # BiocManager::install("limma")
# set conflict preference
tidymodels_prefer()
# DEFINE step_limma ####
step_limma <- function(
recipe,
...,
role = NA,
trained = FALSE,
include_covariate = FALSE,
covariate_id = NULL,
remove_corr = FALSE,
corr_threshold = NULL,
n_features = NULL,
removals = NULL,
skip = FALSE,
id = rand_id("limma")) {
add_step(
recipe,
step_limma_new(
terms = enquos(...),
role = role,
trained = trained,
include_covariate = include_covariate,
covariate_id = covariate_id,
remove_corr = remove_corr,
corr_threshold = corr_threshold,
n_features = n_features,
removals = removals,
skip = skip,
id = id
)
)
}
step_limma_new <- function(
terms, role, trained, include_covariate, covariate_id,
remove_corr, corr_threshold, n_features, removals, skip, id) {
step(
subclass = "limma",
terms = terms,
role = role,
trained = trained,
include_covariate = include_covariate,
covariate_id = covariate_id,
remove_corr = remove_corr,
corr_threshold = corr_threshold,
n_features = n_features,
removals = removals,
skip = skip,
id = id
)
}
prep.step_limma <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
target <- info %>%
dplyr::filter(role == "outcome") %>%
pull(variable)
covariate_id <- x$covariate_id
tar <- training %>% pull(!!target)
df_limma <- training %>%
dplyr::select(all_of(col_names)) %>%
dplyr::select(-all_of(covariate_id)) %>%
t()
if (x$include_covariate) {
if (length(x$covariate_id) == 0) {
rlang::abort("include_covariate == TRUE but covariate_id was not specified\nspecify covariate_id")
}
if (length(x$covariate_id) > 1) {
rlang::abort("more than one covariate_id specified\nstep_limma only supports 1 covariate at presentF")
}
covar <- training %>% pull(covariate_id)
design <- model.matrix(~ tar + covar)
} else {
design <- model.matrix(~tar)
}
fit <- limma::lmFit(df_limma, design)
ebayes <- limma::eBayes(fit)
tab <- limma::topTable(ebayes, coef = 2, adjust = "fdr", sort.by = "p", n = "all")
if (x$remove_corr) {
corr_mat <- training %>%
dplyr::select(all_of(col_names)) %>%
dplyr::select(row.names(tab[1:x$n_features, ])) %>%
cor(method = "spearman", use = "p")
cor_features <- caret::findCorrelation(corr_mat, cutoff = x$corr_threshold)
removals <- c(covariate_id, rownames(tab)[-cor_features])
} else {
removals <- c(
covariate_id,
tab %>%
dplyr::slice((x$n_features + 1):nrow(tab)) %>%
row.names()
)
}
step_limma_new(
terms = x$terms,
trained = TRUE,
role = x$role,
include_covariate = x$include_covariate,
covariate_id = x$covariate_id,
remove_corr = x$remove_corr,
corr_threshold = x$corr_threshold,
n_features = x$n_features,
removals = removals,
skip = x$skip,
id = x$id
)
}
bake.step_limma <- function(object, new_data, ...) {
new_data <- recipes_remove_cols(new_data, object)
}
print.step_limma <- function(x, width = max(20, options()$width - 35), ...) {
title <- "Variables selected "
print_step(names(x$n_features), x$terms, x$trained, title, width)
invisible(x)
}
tidy.step_limma <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(features = names(x$features))
} else {
var_expr <- map(x$features, quo_get_expr)
var_expr <- map_chr(var_expr, quo_text, width = options()$width, nlines = 1)
res <- tibble(features = unname(var_expr))
}
res$id <- x$id
res
}
num_features <- function(range = c(10L, 1000L), trans = NULL) {
new_quant_param(
type = "integer",
range = range,
inclusive = c(TRUE, TRUE),
trans = trans,
label = c(n_features = "# features"),
finalize = NULL
)
}
include_covariate <- function(values = c(TRUE, FALSE)) {
new_qual_param(
type = "logical",
values = values,
label = c(include_covariate = "covariate"),
)
}
corr_threshold <- function(range = c(0.7, 0.9), trans = NULL) {
new_quant_param(
type = "double",
range = range,
inclusive = c(TRUE, TRUE),
trans = trans,
label = c(corr_threshold = "correlation threshold"),
finalize = NULL
)
}
remove_corr <- function(values = c(TRUE, FALSE)) {
new_qual_param(
type = "logical",
values = values,
label = c(remove_corr = "remove correlated features"),
)
}
tunable.step_limma <- function(x, ...) {
tibble::tibble(
name = c(
"n_features",
"include_covariate",
"remove_corr",
"corr_threshold"
),
call_info = list(
list(fun = "num_features"),
list(fun = "include_covariate"),
list(fun = "remove_corr"),
list(fun = "corr_threshold")
),
source = "recipe",
component = "step_limma",
component_id = x$id
)
}
required_pkgs.step_limma <- function(x, ...) {
c("limma", "caret", "yardstick")
}
Test it sequentially and in parallel:
# DEFINE SEED ####
seed <- 42
# DEFINE SET ####
set.seed(seed)
set <- matrix(runif(1000, min = 1, max = 10), nrow = 1000, ncol = 100) %>%
data.frame() %>%
tibble() %>%
mutate(
target = sample(c(0, 1), 1000, replace = TRUE) %>% factor(),
.before = 1
)
# DEFINE TRAINING AND VALIDATION SETS ####
set.seed(seed)
set_split <- initial_split(set, strata = target)
set_train <- training(set_split)
set_test <- testing(set_split)
# SET UP RESAMPLING ####
set.seed(seed)
cv_folds <- set_train %>% vfold_cv(v = 5, strata = target)
# BUILD MODELS ####
mod_svmlinear <- svm_linear(cost = tune(), margin = tune()) %>%
set_engine("kernlab") %>%
set_mode("classification")
# BUILD RECIPE ####
recipe_set <- recipe(target ~ ., data = set_train) %>%
step_limma(
all_predictors(),
n_features = tune()
)
# CREATE WORKFLOWS ####
workflow_spec <- workflow() %>%
add_recipe(recipe_set) %>%
add_model(mod_svmlinear)
# CHECK IF step_limma WORKS IF TUNING IS DONE SEQUANTIALLY ####
set.seed(seed)
res_tune_sequential <- workflow_spec %>%
tune_race_win_loss(
resamples = cv_folds,
metrics = metric_set(roc_auc),
grid = 5,
control = control_race(
save_pred = TRUE,
event_level = "second",
save_workflow = TRUE,
allow_par = FALSE
)
)
res_tune_sequential %>% collect_metrics()
#> # A tibble: 5 × 9
#> cost margin n_features .metric .estimator mean n std_err .config
#> <dbl> <dbl> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 11.1 0.153 419 roc_auc binary 0.509 5 0.0113 Preprocess…
#> 2 0.0120 0.00566 948 roc_auc binary 0.481 5 0.00746 Preprocess…
#> 3 1.63 0.0461 374 roc_auc binary 0.504 5 0.0120 Preprocess…
#> 4 0.00497 0.0859 133 roc_auc binary 0.507 5 0.0116 Preprocess…
#> 5 0.0836 0.194 687 roc_auc binary 0.493 5 0.0116 Preprocess…
# CHECK IF step_limma WORKS IF TUNING IS DONE IN PARALLEL ####
cl <- parallel::makePSOCKcluster(5)
doParallel::registerDoParallel(cl)
set.seed(seed)
res_tune <- workflow_spec %>%
tune_race_win_loss(
resamples = cv_folds,
metrics = metric_set(roc_auc),
grid = 5,
control = control_race(
save_pred = TRUE,
event_level = "second",
save_workflow = TRUE,
allow_par = TRUE
)
)
#> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
#> information.
#> Error in `dplyr::filter()`:
#> ℹ In argument: `.metric == metric`.
#> Caused by error:
#> ! object '.metric' not found
#> Backtrace:
#> ▆
#> 1. ├─workflow_spec %>% ...
#> 2. ├─finetune::tune_race_win_loss(...)
#> 3. ├─finetune:::tune_race_win_loss.workflow(...)
#> 4. │ └─finetune:::tune_race_win_loss_workflow(...)
#> 5. │ └─finetune:::test_parameters_bt(res, control$alpha)
#> 6. │ └─tune::collect_metrics(x, summarize = FALSE) %>% ...
#> 7. ├─dplyr::filter(., .metric == metric)
#> 8. ├─dplyr:::filter.data.frame(., .metric == metric)
#> 9. │ └─dplyr:::filter_rows(.data, dots, by)
#> 10. │ └─dplyr:::filter_eval(...)
#> 11. │ ├─base::withCallingHandlers(...)
#> 12. │ └─mask$eval_all_filter(dots, env_filter)
#> 13. │ └─dplyr (local) eval()
#> 14. └─base::.handleSimpleError(...)
#> 15. └─dplyr (local) h(simpleError(msg, call))
#> 16. └─rlang::abort(message, class = error_class, parent = parent, call = error_call)
parallel::stopCluster(cl)
Session info:
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.3.1 (2023-06-16 ucrt)
#> os Windows 11 x64 (build 22621)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_Canada.utf8
#> ctype English_Canada.utf8
#> tz America/Edmonton
#> date 2023-11-28
#> pandoc 3.1.6.1 @ C:/PROGRA~1/Pandoc/ (via rmarkdown)
Thanks @topepo for the suggestion to use some parallel controls to pass the functions to the backends. Perhaps not the most elegant solution but it seems to be working.
Here's a reproduction of the entire process
Create the step function:
# HOUSEKEEPING ####
rm(list = ls(all = TRUE)) # clean house
# CRAN libraries
library(tidyverse) # install.packages("tidyverse")
library(tidymodels) # install.packages("tidymodels")
library(finetune) # install.packages("finetune")
#> Warning: package 'finetune' was built under R version 4.3.2
library(doParallel) # install.packages("finetune")
#> Warning: package 'doParallel' was built under R version 4.3.2
#> Loading required package: foreach
#>
#> Attaching package: 'foreach'
#> The following objects are masked from 'package:purrr':
#>
#> accumulate, when
#> Loading required package: iterators
#> Loading required package: parallel
library(bench) # install.packages("bench")
#> Warning: package 'bench' was built under R version 4.3.2
library(reprex) # install.packages("reprex")
#> Warning: package 'reprex' was built under R version 4.3.2
# Bioconductor libraries
library(limma) # BiocManager::install("limma")
# DEFINE step_limma ####
step_limma <- function(
recipe,
...,
role = NA,
trained = FALSE,
include_covariate = FALSE,
covariate_id = NULL,
remove_corr = FALSE,
corr_threshold = NULL,
n_features = NULL,
removals = NULL,
skip = FALSE,
id = rand_id("limma")) {
add_step(
recipe,
step_limma_new(
terms = enquos(...),
role = role,
trained = trained,
include_covariate = include_covariate,
covariate_id = covariate_id,
remove_corr = remove_corr,
corr_threshold = corr_threshold,
n_features = n_features,
removals = removals,
skip = skip,
id = id
)
)
}
step_limma_new <- function(terms, role, trained, include_covariate, covariate_id,
remove_corr, corr_threshold, n_features, removals, skip, id) {
step(
subclass = "limma",
terms = terms,
role = role,
trained = trained,
include_covariate = include_covariate,
covariate_id = covariate_id,
remove_corr = remove_corr,
corr_threshold = corr_threshold,
n_features = n_features,
removals = removals,
skip = skip,
id = id
)
}
prep.step_limma <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_type(training[, col_names], types = c("double", "integer"))
target <- info %>%
dplyr::filter(role == "outcome") %>%
pull(variable)
covariate_id <- x$covariate_id
tar <- training %>% pull(!!target)
df_limma <- training %>%
dplyr::select(all_of(col_names)) %>%
dplyr::select(-all_of(covariate_id)) %>%
t()
if (x$include_covariate) {
if (length(x$covariate_id) == 0) {
rlang::abort("include_covariate == TRUE but covariate_id was not specified\nspecify covariate_id")
}
if (length(x$covariate_id) > 1) {
rlang::abort("more than one covariate_id specified\nstep_limma only supports 1 covariate at presentF")
}
covar <- training %>% pull(covariate_id)
design <- model.matrix(~ tar + covar)
} else {
design <- model.matrix(~tar)
}
fit <- limma::lmFit(df_limma, design)
ebayes <- limma::eBayes(fit)
tab <- limma::topTable(ebayes, coef = 2, adjust = "fdr", sort.by = "p", n = "all")
if (x$remove_corr) {
corr_mat <- training %>%
dplyr::select(all_of(col_names)) %>%
dplyr::select(row.names(tab[1:x$n_features, ])) %>%
cor(method = "spearman", use = "p")
cor_features <- caret::findCorrelation(corr_mat, cutoff = x$corr_threshold)
removals <- c(covariate_id, rownames(tab)[-cor_features])
} else {
removals <- c(
covariate_id,
tab %>%
dplyr::slice((x$n_features + 1):nrow(tab)) %>%
row.names()
)
}
step_limma_new(
terms = x$terms,
trained = TRUE,
role = x$role,
include_covariate = x$include_covariate,
covariate_id = x$covariate_id,
remove_corr = x$remove_corr,
corr_threshold = x$corr_threshold,
n_features = x$n_features,
removals = removals,
skip = x$skip,
id = x$id
)
}
bake.step_limma <- function(object, new_data, ...) {
new_data <- recipes_remove_cols(new_data, object)
}
print.step_limma <- function(x, width = max(20, options()$width - 35), ...) {
title <- "Variables selected "
print_step(names(x$n_features), x$terms, x$trained, title, width)
invisible(x)
}
tidy.step_limma <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(features = names(x$features))
} else {
var_expr <- map(x$features, quo_get_expr)
var_expr <- map_chr(var_expr, quo_text, width = options()$width, nlines = 1)
res <- tibble(features = unname(var_expr))
}
res$id <- x$id
res
}
num_features <- function(range = c(10L, 1000L), trans = NULL) {
new_quant_param(
type = "integer",
range = range,
inclusive = c(TRUE, TRUE),
trans = trans,
label = c(n_features = "# features"),
finalize = NULL
)
}
include_covariate <- function(values = c(TRUE, FALSE)) {
new_qual_param(
type = "logical",
values = values,
label = c(include_covariate = "covariate"),
)
}
corr_threshold <- function(range = c(0.7, 0.9), trans = NULL) {
new_quant_param(
type = "double",
range = range,
inclusive = c(TRUE, TRUE),
trans = trans,
label = c(corr_threshold = "correlation threshold"),
finalize = NULL
)
}
remove_corr <- function(values = c(TRUE, FALSE)) {
new_qual_param(
type = "logical",
values = values,
label = c(remove_corr = "remove correlated features"),
)
}
tunable.step_limma <- function(x, ...) {
tibble::tibble(
name = c(
"n_features",
"include_covariate",
"remove_corr",
"corr_threshold"
),
call_info = list(
list(fun = "num_features"),
list(fun = "include_covariate"),
list(fun = "remove_corr"),
list(fun = "corr_threshold")
),
source = "recipe",
component = "step_limma",
component_id = x$id
)
}
required_pkgs.step_limma <- function(x, ...) {
c("limma", "caret", "yardstick", "dials")
}
Define the data and workflow:
# DEFINE SEED ####
seed <- 42
# DEFINE SET ####
set.seed(seed)
set <- matrix(runif(1000, min = 1, max = 10), nrow = 1000, ncol = 100) %>%
data.frame() %>%
tibble() %>%
mutate(
target = sample(c(0, 1), 1000, replace = TRUE) %>% factor(),
.before = 1
)
# DEFINE TRAINING AND VALIDATION SETS ####
set.seed(seed)
set_split <- initial_split(set, strata = target)
set_train <- training(set_split)
set_test <- testing(set_split)
# SET UP RESAMPLING ####
set.seed(seed)
cv_folds <- set_train %>% vfold_cv(v = 5, strata = target)
# BUILD MODELS ####
mod_svmlinear <- svm_linear(cost = tune(), margin = tune()) %>%
set_engine("kernlab") %>%
set_mode("classification")
# BUILD RECIPE ####
recipe_set <- recipe(target ~ ., data = set_train) %>%
step_limma(
all_predictors(),
n_features = tune()
)
# CREATE WORKFLOWS ####
workflow_spec <- workflow() %>%
add_recipe(recipe_set) %>%
add_model(mod_svmlinear)
Set up workers and give them what they need
# doParallel TUNING ####
cl <- parallel::makeCluster(20, type = "PSOCK")
parallel::clusterExport(
cl,
c(
ls(pattern = "*\\.step_limma"),
"step_limma_new",
"num_features",
"remove_corr",
"include_covariate",
"corr_threshold"
)
)
Look at what we've assigned to the workers
parallel::clusterEvalQ(
cl,
c(
ls(pattern = "*\\.step_limma"),
"step_limma_new",
"num_features",
"remove_corr",
"include_covariate",
"corr_threshold"
)
)
#> [[1]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[2]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[3]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[4]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[5]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[6]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[7]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[8]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[9]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[10]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[11]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[12]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[13]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[14]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[15]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[16]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[17]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[18]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[19]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
#>
#> [[20]]
#> [1] "bake.step_limma" "prep.step_limma"
#> [3] "print.step_limma" "required_pkgs.step_limma"
#> [5] "tidy.step_limma" "tunable.step_limma"
#> [7] "step_limma_new" "num_features"
#> [9] "remove_corr" "include_covariate"
#> [11] "corr_threshold"
Measure performance of parameter tuning sequentially, over resamples, or over everything
doParallel::registerDoParallel(cl)
set.seed(seed)
res_mark <- bench::mark(
sequential = workflow_spec %>%
tune_race_win_loss(
resamples = cv_folds,
metrics = yardstick::metric_set(roc_auc),
grid = 10,
control = control_race(
save_pred = TRUE,
event_level = "second",
save_workflow = TRUE,
allow_par = FALSE
)
),
doParallel_resamples = workflow_spec %>%
tune_race_win_loss(
resamples = cv_folds,
metrics = yardstick::metric_set(roc_auc),
grid = 10,
control = control_race(
save_pred = TRUE,
event_level = "second",
save_workflow = TRUE,
allow_par = TRUE,
parallel_over = "resamples",
backend_options = tune::new_backend_options(
tune_race_win_loss = list(pkgs = "dials"),
class = "tune_race_win_loss"
)
)
),
doParallel_everything = workflow_spec %>%
tune_race_win_loss(
resamples = cv_folds,
metrics = yardstick::metric_set(roc_auc),
grid = 10,
control = control_race(
save_pred = TRUE,
event_level = "second",
save_workflow = TRUE,
allow_par = TRUE,
parallel_over = "everything",
backend_options = tune::new_backend_options(
tune_race_win_loss = list(pkgs = "dials"),
class = "tune_race_win_loss"
)
)
),
iterations = 10,
check = FALSE,
filter_gc = FALSE
)
parallel::stopCluster(cl)
# COMPARE RESULTS ####
res_mark %>%
mutate(expression = expression %>% as.character()) %>%
arrange(expression) %>%
dplyr::select(1:9)
#> # A tibble: 3 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <chr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 doParallel_everything 4.68s 7.02s 0.145 23.79MB 0.203
#> 2 doParallel_resamples 10.63s 12.33s 0.0772 19.74MB 0.108
#> 3 sequential 16.51s 18.52s 0.0534 2.44GB 0.619
Created on 2023-11-28 with reprex v2.0.2
Session infosessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.3.1 (2023-06-16 ucrt)
#> os Windows 11 x64 (build 22621)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_Canada.utf8
#> ctype English_Canada.utf8
#> tz America/Edmonton
#> date 2023-11-28
#> pandoc 3.1.6.1 @ C:/PROGRA~1/Pandoc/ (via rmarkdown)