revalpurrr

Using purrr::map with a user defined function - how to pass arguments


I am creating a package where I have a function (pkgfun) that computes some indicators based on values from subsets of a data frame, using group_by and purrr::map. I would also like to let the user be able to submit an additional function (userfun) to compute this indicator, in case none of the built-in functions matches the needs. pkgfun should therefore accept some additional parameters for userfun, but also make it possible for userfun to use other variables in pkgfun, either other arguments or variables defined inside the function.

Ideally, I would have liked to write the functions as close to the following example as possible. However, this will not correctly pass the arguments.

pkgfun = function(df, pkgArg, userfun, fargs, test, ...) {
    dots = list(...)
    funArg = 3
    fargs = names(formals(userfun))
    ufRes = df %>% 
      group_by(himgid) %>%  nest() %>%
      mutate(ufres = map(data, ~userfun(., fargs))) %>%
      unnest(ufres) %>% ungroup %>% select(ufres) %>% pull 
    print("ufRes")
    ufRes
  }
  
  userfun = function(df, aboveLimit, pkgArg, funArg) {
    print(funArg)
    sum(df$gridvar > aboveLimit) >= pkgArg
  }

 pkgfun(df, userfun = userfun, aboveLimit = 5, pkgArg = 5)

I have managed to create a simplified working example, but I find it depends on a bit of redundancy on the user side, it seems a bit too complicated, and it uses some expressions I would prefer to remove (such as eval(parse(...)) ).

The implementation of pkgfun allows for two ways of passing the right arguments, but I don't like either.

The first one takes an additional argument fargs, which includes all the names of the arguments that should be added to argList, a list of arguments. This is a nuisance when calling the function.

The second one checks the list of required arguments of userfun, searches for them and adds them all to argList. However, this means the user needs to specify dummy arguments in userfun that are actually not used in the function.

I think it should be possible both to get closer to the first example and remove the eval(parse construct, but how...? Any suggestions?

# pkgArg is an argument that is otherwise used in pkgfun, and reused in userfun
pkgfun = function(df, pkgArg, userfun, fargs, test, ...) {
  dots = list(...)
# funArg is a variable defined inside the function, and but also useful in userfun. 
  funArg = 3
# Finding the arguments from userfun if not given as an argument
  if (missing(fargs)) {
    fargs = names(formals(userfun))
    fargs = fargs[!(fargs %in% c("df", "argList", "..."))]
  }
  dnames = names(dots)[which(names(dots) %in% fargs)] 
# Adding ... arguments to the environment, so they are available to dynGet
  if (length(dnames) > 0) for (dn in dnames) eval(parse(text = paste(dn, "=", dots[[dn]] )))
  argList = lapply(fargs, dynGet)
  names(argList) = fargs
# 
  ufRes = df %>%
    group_by(himgid) %>%  nest() %>%
    mutate(ufRes = map(data, ~userfun(., argList = argList))) %>%
    unnest(ufRes) %>% ungroup %>% select(ufRes) %>% pull 
  print("ufRes")
  ufRes
}

# The userfun checks if the group has at least pkgArg entries that are larger than aboveLimit
# It also prints the names of the arguments in argList, and funArg, just to check that it 
# is correctly passed
userfun = function(df, aboveLimit, pkgArg, funArg, argList) {
  print(paste("argList", names(argList)))
   aboveLimit = argList$aboveLimit
   pkgArg = argList$pkgArg
   funArg = argList$funArg
   print(funArg)
    sum(df$gridvar > aboveLimit) >= pkgArg
  }

  df = data.frame(himgid = rep(c(1,2,3), 20), gridvar = 1:20)

# Call function with a vector of argument names, the named variables are then unnecessary in
# function definition
  pkgfun(df, userfun = userfun, fargs = c("aboveLimit", "pkgArg", "pkgArg"), 
            aboveLimit = 5, pkgArg = 5)
# This call is possible when the arguments are named in the function definition
  pkgfun(df, userfun = userfun, aboveLimit = 5, pkgArg = 5)


Solution

  • I'll take a stab at it ...

    require(tidyverse)
    
    df = data.frame(himgid = rep(c(1,2,3), 20), gridvar = 1:20)
    
    
    pkgfun = function(df, pkgArg, userfun, fargs, test, ...) {
    
      dots = list(...)
      funArg = 3
      fargs_names = names(formals(userfun))
      
      passed_in  <- setdiff(intersect(ls(),fargs_names),"df")
      names(passed_in) <- passed_in
      args_to_pass <- map(passed_in,\(x)get(x,pos = -1)) |> setNames(passed_in)
      args_to_pass <- c(args_to_pass,dots)
    
      ufRes = df %>% 
        group_by(himgid) %>%  nest() %>%
        mutate(ufres = map(data, \(subdata){
          local_args <- c(list("df"=subdata),args_to_pass)
          do.call(userfun,args=local_args)})) %>%
        unnest(ufres) %>% ungroup %>% select(ufres) %>% pull 
      print("ufRes")
      ufRes
    }
    
    userfun = function(df, aboveLimit, pkgArg, funArg) {
      print(funArg)
      sum(df$gridvar > aboveLimit) >= pkgArg
    }
    
    pkgfun(df, userfun = userfun, aboveLimit = 5, pkgArg = 5)