I'm trying to get a simple dopar
example working so I can parallelize some very expensive cross validation calculations. When I run the following code, my result array r
is filled with NA
:
library(doParallel)
library(parallel)
library(foreach)
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
# parallel::detectCores() # 16 on Ryzen 7 system
cl = parallel::makeCluster(detectCores() - 2, output = "")
# activate cluster for foreach library
doParallel::registerDoParallel(cl)
r = array(0, c(3, 3))
for (i in 1:3) {
r[i, ] = foreach::foreach(j = 1:length(data_list[[i]]),
.combine = rbind) %dopar% {
mean(data_list[[i]][j])
}
}
# stop cluster to free up resources
parallel::stopCluster(cl)
The results I'm expecting in r
array is:
[,1] [,2] [,3]
[1,] 5 6 7
[2,] 8 9 10
[3,] 11 12 13
The r
array is not being filled because mean
is returning an NA
. This can be solved by adding another [
around j
.
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
library(foreach)
r = array(0, c(3, 3))
for (i in 1:3) {
r[i, ] = foreach::foreach(j = 1:length(data_list[[i]]),
.combine = rbind) %dopar% {
mean(data_list[[i]][[j]])
}
}
#> Warning: executing %dopar% sequentially: no parallel backend registered
r
#> [,1] [,2] [,3]
#> [1,] 5 6 7
#> [2,] 8 9 10
#> [3,] 11 12 13
Created on 2022-12-08 by the reprex package (v2.0.0)
If you create a helper function and run that in foreach
it might simplify things. You can let foreach
be the iterator of list elements, so you can skip the for
loop. Then each worker will have a clear set of data (one list element) and function (mean_inner_list
) to run.
data1.1 = 1:9 # mean(data1.1) = 5
data1.2 = 2:10 # mean(data1.2) = 6
data1.3 = 3:11 # mean(data1.3) = 7
data2.1 = 4:12 # mean(data2.1) = 8
data2.2 = 5:13 # mean(data2.2) = 9
data2.3 = 6:14 # mean(data2.3) = 10
data3.1 = 7:15 # mean(data3.1) = 11
data3.2 = 8:16 # mean(data3.2) = 12
data3.3 = 9:17 # mean(data3.3) = 13
data_list = list("1" = list("1" = data1.1, "2" = data1.2, "3" = data1.3),
"2" = list("1" = data2.1, "2" = data2.2, "3" = data2.3),
"3" = list("1" = data3.1, "2" = data3.2, "3" = data3.3))
#----------
library(foreach)
mean_inner_list <- function(x) lapply(x, mean)
foreach(x = data_list, .combine = rbind) %dopar% mean_inner_list(x)
#> Warning: executing %dopar% sequentially: no parallel backend registered
#> 1 2 3
#> result.1 5 6 7
#> result.2 8 9 10
#> result.3 11 12 13
Created on 2022-12-07 by the reprex package (v2.0.0)