rparallel-processingparallel-foreach

Get multiple values out of parallel foreach loop


I've tried searching but haven't located anything that's gotten me all the way.

I'm running an occupancy prediction model on a stack of three rasters. Due to the large amount of processing that needs to happen I'm using a parallel foreach loop.

I need to retrieve three variables out of the results from the loop: test, na, and pred. I need those three values to fill in the new raster values and maintain the same extent. Unless someone knows a way to fill in the gaps created by NA values during processing?

Below is the code I've been trying to use based on posts I've found. I also tried nesting foreach loops, but I'm not sure I understand how those work, or if that would achieve my ends.

library(parallel)
library(doSNOW)

multiResultClass<- function(test = NULL, tmp = NULL, na = NULL, pred = NULL){
  results<- list(
    test = test,
    tmp = tmp,
    na = na,
    pred = pred
  )
  class(results)<- append(class(results), "multiResultClass")
  return(results)
}

nc<- detectCores()-1
cl<- makeCluster(nc)
registerDoSNOW(cl)

predicts<- foreach (i = 1:nrow(pm), .multicombine = T, .maxcombine = 1000,
         .packages = c("unmarked", "raster"), .verbose = T)%dopar%{
           
            results<- multiResultClass()
           
           test<- cellFromRow(pm, i)
           tmp<- data.frame(pm[test])
           na<- any(is.na(tmp[i, ]))
           
           if(length(which(na) != nrow(tmp))){
             pred<- predict(fmBest, "state", tmp)
           }
           results$test<- test
           results$tmp<- tmp
           results$na<- na
           results$pred<- pred
           return(results)
         }

foreach(i = 1:nrow(pm))%do%{
  
test<- predicts[[i]]$test
na<- predicts[[i]]$na
pred<- predicts[[i]]$pred
}

stopCluster(cl)

I have a working foreach loop that gets me the pred values, but without test and na I haven't found a way to properly fill in the raster template the data needs to go into. That foreach loop is below:

library(parallel)
library(doSNOW)


ns<- detectCores()-1
cl<- makeCluster(ns);cl
registerDoSNOW(cl)

predictions<- 
  foreach (i = 1:nrow(pm), .multicombine = T, .maxcombine = 5000,
           .packages = c("unmarked", "raster"), .verbose = T)%dopar%{
   
                 test<- cellFromRow(pm, i)
                 tmp<- data.frame(pm[test])
                 na<- any(is.na(tmp[i, ]))
                 
                 if(length(which(na) != nrow(tmp))){
                 predict(fmBest, "state", tmp)
}

}

stopCluster(cl)

Solution

  • I finally found a combine function I could make work. Below is the code I used that returns values for test, na, and pred all in a large list.

    library(doSNOW)
    
    nc<- detectCores()-1
    cl<- makeCluster(nc);cl
    registerDoSNOW(cl)
    
    comb<- function(...){
      mapply('rbind', ..., SIMPLIFY = F)
    }
    
    predictions<- foreach(i = 1:nrow(pm), .combine = 'comb', .multicombine = T,
              .maxcombine = 200, .packages = c("unmarked", "raster"), .verbose = T,
              .inorder = F)%dopar%{ 
                
            #get cell number values from raster stack
                test<- cellFromRow(pm, i)
                
                # make into a data.frame for prediction
                tmp<- data.frame(pm[test])
                
                # test which are na
                na<- any(is.na(tmp[i, ]))
                
                # avoid NA values entering the predict function
                if(length(which(na)) != nrow(tmp)){
    
                  #   # Predict the new data
                  pred<- predict(fmBest, "state", tmp)
                  
                }
    
                list(test, na, pred)  
                
               }
    stopCluster(cl)