From multiple target (a
) created with map I have 2 other targets (b
and d
) that iterate over the first target. Now I would like to use the results of these targets in another target. In addition I would like to cross with another variable (model
).
I pasted a reprex below, but for some context in my case a
describe different subset of a dataset, b
and d
pre-compute some stuff, e
apply different models on each subset using the pre-computed data.
I tried different combination of map
cross
(like e
below) without success. I tried to add all the targets names I want to use after in fn4 but it creates unnecessary crosses.
library(drake)
drake_plan(
a = target(
fn1(arg1, arg2),
transform = map(
arg1 = !!c("arg11", "arg12"),
arg2 = !!c("arg21", "arg22")
)
),
b = target(
fn2(arg1),
transform = map(arg1)
),
d = target(
fn3(arg1),
transform = map(arg1)
),
e = target(
fn4(b, d, model, arg1),
transform = cross(
b,
d,
model = !!c("x", "y", "z"),
.by = arg1,
.id = c(arg1, model)
)
),
trace = TRUE
)
#> # A tibble: 18 x 10
#> target command arg1 arg2 a b d model .by e
#> <chr> <expr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 a_arg11… fn1("arg11… "\"arg… "\"ar… a_arg… <NA> <NA> <NA> <NA> <NA>
#> 2 a_arg12… fn1("arg12… "\"arg… "\"ar… a_arg… <NA> <NA> <NA> <NA> <NA>
#> 3 b_arg11 fn2("arg11… "\"arg… "\"ar… a_arg… b_ar… <NA> <NA> <NA> <NA>
#> 4 b_arg12 fn2("arg12… "\"arg… "\"ar… a_arg… b_ar… <NA> <NA> <NA> <NA>
#> 5 d_arg11 fn3("arg11… "\"arg… "\"ar… a_arg… <NA> d_ar… <NA> <NA> <NA>
#> 6 d_arg12 fn3("arg12… "\"arg… "\"ar… a_arg… <NA> d_ar… <NA> <NA> <NA>
#> 7 e_NA_x fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"x… arg1 e_NA…
#> 8 e_NA_y fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"y… arg1 e_NA…
#> 9 e_NA_z fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"z… arg1 e_NA…
#> 10 e_NA_x_2 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"x… arg1 e_NA…
#> 11 e_NA_y_2 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"y… arg1 e_NA…
#> 12 e_NA_z_2 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"z… arg1 e_NA…
#> 13 e_NA_x_3 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"x… arg1 e_NA…
#> 14 e_NA_y_3 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"y… arg1 e_NA…
#> 15 e_NA_z_3 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"z… arg1 e_NA…
#> 16 e_NA_x_4 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"x… arg1 e_NA…
#> 17 e_NA_y_4 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"y… arg1 e_NA…
#> 18 e_NA_z_4 fn4(b_arg1… <NA> <NA> <NA> b_ar… d_ar… "\"z… arg1 e_NA…
Created on 2019-07-15 by the reprex package (v0.3.0)
It seems to work, but the arg1
and arg2
are not carried over and are not usable in fn4
and following targets. Should I split this step in 2 steps, if so how? (map
then cross
, cross
then map
?) I tried to cross earlier, after a
, but I don't wont to recompute identical b
and d
multiple times, it may take a lot of time and memory.
because many targets use the same data that need to be saved as file for the run
function (call to an external binaries), so to prevent re-computing the same thing multiple times and to save multiple times the same thing in different files (taht can be huge) I seperated all these tasks in Drake.
library(drake)
library(tibble)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
path_data <- c("path/data_1.csv", "path/data_2.csv")
countries <- c("1", "2")
analysis_dir <- "path"
substudies_1 <- tribble(
~substudy, ~adjust, ~sex,
"sub1", "no", "male/female",
"sub2", "yes", "male/female"
)
models <- c("x", "y")
plan <- drake_plan(
data = target(
get_data(file_in(path)),
transform = map(path = !!path_data, country = !!countries, .id = country)
),
SNP = target(
get_SNP_data_country(SNP_gene, data),
transform = map(data, .id = country)
),
map = target(
# actually write file and save path
write_snp_map(SNP, file.path(analysis_dir, country, "SNP_map.txt")),
transform = map(SNP, .id = country)
),
ref = target(
# actually write file and save path
write_snp_ref(SNP, file.path(analysis_dir, country, "SNP_ref.txt")),
transform = map(SNP, .id = country)
),
# data_2 is managed in another target because it has a different set of substudies,
# this maybe can be tidied up, a problem for another day...
population_1 = target(
extract_population(data, sex, adjust),
transform = map(
data = data_1,
country = "1",
.data = !!substudies_1,
.id = c(substudy)
),
),
pedigree_1 = target(
extract_pedigree(data_1, population_1),
transform = map(
population_1,
.id = substudy
)
),
covariable_1 = target(
extract_covariable(data_1, population_1, adjust, sex),
transform = map(
population_1,
.id = substudy
)
),
# run_1 = target(
# run_fn(map_1, ref_1, pedigree_1, covariable_1, substudy, model, adjust, sex),
# transform = cross(population_1, model = !!models)
# ),
trace = TRUE
)
# the desired plan for the run target
run_plan <- tibble(
target = c("run_1_x_population_1_sub1", "run_1_y_population_1_sub1", "run_1_x_population_1_sub2", "run_1_y_population_1_sub2"),
command = list(
expr(run(map_1, ref_1, pedigree_1_sub1, covariable_1_sub1, "x", "sub1", "no")),
expr(run(map_1, ref_1, pedigree_1_sub1, covariable_1_sub1, "y", "sub1", "no")),
expr(run(map_1, ref_1, pedigree_1_sub2, covariable_1_sub2, "x", "sub2", "yes")),
expr(run(map_1, ref_1, pedigree_1_sub2, covariable_1_sub2, "y", "sub2", "yes"))
),
path = NA_character_,
country = "1",
population_1 = c(rep("population_1_sub1", 2), rep("population_1_sub2", 2)),
substudy = c(rep("sub1", 2), rep("sub2", 2)),
adjust = c(rep("no", 2), rep("yes", 2)),
sex = c(rep("male/female", 4)),
pedigree_1 = c(rep("pedigree_1_sub1", 2), rep("pedigree_1_sub2", 2)),
covariable_1 = c(rep("covariable_1_sub1", 2), rep("covariable_1_sub2", 2)),
model = c("x", "y", "x", "y"),
SNP = "SNP_1",
map = "map_1",
ref = "ref_1"
)
config <- drake_config(bind_rows(plan, run_plan))
vis_drake_graph(config, targets_only = TRUE)
Created on 2019-07-15 by the reprex package (v0.3.0)
plan: i.imgur.com/MyqoKJi.png
Edit 2:
I now use the .data
parameter in a map transform using a dataframe with previous target names (using rlang::syms
) it works fine except that it doesn't work with drake::drake_plan
's max_expand
parameter. This solution is not optimal also because crafting a grid for .data
is very verbose.
Would you mind explicitly posting the plan you want without any transforms? drake_plan_source()
can help.
One note: only combine()
understands .by
. Maybe another approach is to use transform = map(.data = !!your_grid_of_combinations)
: https://ropenscilabs.github.io/drake-manual/plans.html#map.
Does the plan you want look something like this?
library(drake)
plan <- drake_plan(
a = target(
fn1(arg1, arg2),
transform = map(
arg1 = !!c("arg11", "arg12"),
arg2 = !!c("arg21", "arg22")
)
),
b = target(
fn2(arg1),
transform = map(arg1)
),
d = target(
fn3(arg1),
transform = map(arg1)
),
e = target(
fn4(b, d, model, arg1),
transform = cross(
b,
d,
model = c("x", "y", "z"),
arg1,
.id = c(arg1, model)
)
)
)
config <- drake_config(plan)
vis_drake_graph(config)
Created on 2019-07-15 by the reprex package (v0.3.0)