Parallel RNG with foreach + doMC inside function call

I'd like to write a function that automates the foreach / %dopar% syntax within a function. Specifically, I'd like to generate a list NREP evaluations of EXPR. On my four core machine, the following call

library(doMC)
library(foreach)

parReplicate <- function(NREP, EXPR, ncores=NULL, ...){
    if (is.null(ncores)) {
        doMC::registerDoMC()
    } else {
        doMC::registerDoMC(ncores)
    }
    foreach(ii=1:NREP,...) %dopar% {
        eval(substitute(EXPR))
    }
}

parReplicate(10, rnorm(1))

returns the same four values recycled to length 10. On the other hand,

foreach(ii=1:10) %dopar% {
    eval(substitute(rnorm(1)))
}

returns 10 distinct values as expected. Why is this happening and how can I fix it?

Solution

I think getting values 'recycled' according to the number of cores is caused by the default mclapply setting preschedule = TRUE and set.seed = FALSE, which are used by parallel/foreach/doMC/%dopar% (discussed in the last section of the docs).

You can fix it by changing the default mclapply settings:

#install.packages("doMC")
library(doMC)
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(foreach)

mcoptions <- list(preschedule=FALSE, set.seed=TRUE)

parReplicate <- function(NREP, EXPR, ncores=NULL, ...){
  if (is.null(ncores)) {
    doMC::registerDoMC()
  } else {
    doMC::registerDoMC(ncores)
  }
  foreach(ii=1:NREP,...) %dopar% {
    eval(substitute(EXPR))
  }
}
parReplicate(10, rnorm(1), .options.multicore = mcoptions)
#> [[1]]
#> [1] -0.7706123
#> 
#> [[2]]
#> [1] -0.5468218
#> 
#> [[3]]
#> [1] 0.05862097
#> 
#> [[4]]
#> [1] -1.006217
#> 
#> [[5]]
#> [1] 1.137702
#> 
#> [[6]]
#> [1] 1.17726
#> 
#> [[7]]
#> [1] 0.05892002
#> 
#> [[8]]
#> [1] -0.3721021
#> 
#> [[9]]
#> [1] -0.8732641
#> 
#> [[10]]
#> [1] -0.3528947

^{Created on 2024-09-11 with reprex v2.1.0}

Or by specifying 'ncores = 10' to get 10 unique values, e.g.

#install.packages("doMC")
library(doMC)
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(foreach)

parReplicate <- function(NREP, EXPR, ncores=NULL, ...){
  if (is.null(ncores)) {
    doMC::registerDoMC()
  } else {
    doMC::registerDoMC(ncores)
  }
  foreach(ii=1:NREP,...) %dopar% {
    eval(substitute(EXPR))
  }
}

parReplicate(10, rnorm(1), ncores = 10)
#> [[1]]
#> [1] 0.5351092
#> 
#> [[2]]
#> [1] 1.005979
#> 
#> [[3]]
#> [1] 1.323487
#> 
#> [[4]]
#> [1] 0.6233244
#> 
#> [[5]]
#> [1] 0.4297823
#> 
#> [[6]]
#> [1] -0.5288708
#> 
#> [[7]]
#> [1] -0.7294616
#> 
#> [[8]]
#> [1] -0.323428
#> 
#> [[9]]
#> [1] 1.498394
#> 
#> [[10]]
#> [1] -0.02763688

^{Created on 2024-09-11 with reprex v2.1.0}

Or by running it sequentially (with %do%), e.g.

#install.packages("doMC")
library(doMC)
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(foreach)

parReplicate_2 <- function(NREP, EXPR, ncores=NULL, ...){
  if (is.null(ncores)) {
    doMC::registerDoMC()
  } else {
    doMC::registerDoMC(ncores)
  }
  foreach(ii=1:NREP,...) %do% {
    eval(substitute(EXPR))
  }
}

parReplicate_2(10, rnorm(1))
#> [[1]]
#> [1] -1.459348
#> 
#> [[2]]
#> [1] -1.034365
#> 
#> [[3]]
#> [1] -0.8022466
#> 
#> [[4]]
#> [1] -1.150572
#> 
#> [[5]]
#> [1] -0.4519494
#> 
#> [[6]]
#> [1] 0.914055
#> 
#> [[7]]
#> [1] 1.087463
#> 
#> [[8]]
#> [1] 1.748693
#> 
#> [[9]]
#> [1] 0.07885326
#> 
#> [[10]]
#> [1] 1.164207

^{Created on 2024-09-11 with reprex v2.1.0}

I think it would be worth benchmarking these 'fixes' with your actual data/function to see whether you're getting the speed-up you're looking for, or consider switching to a different package such as doRNG as @Roland suggests.