rdplyracross

How to use a function that returns multiple values in dplyr::across()?


I want to perform several operations on multiple columns and I can use dplyr::across() to do it that way:

library(tidyverse)

df = tibble(x=1:5, p1=x*2, p2=x*4, p3=x*5)
r1 = df %>% 
    mutate(across(starts_with("p"), c(inf=~.x-1, sup=~.x+1)))
r1
#> # A tibble: 5 x 10
#>       x    p1    p2    p3 p1_inf p1_sup p2_inf p2_sup p3_inf p3_sup
#>   <int> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1     1     2     4     5      1      3      3      5      4      6
#> 2     2     4     8    10      3      5      7      9      9     11
#> 3     3     6    12    15      5      7     11     13     14     16
#> 4     4     8    16    20      7      9     15     17     19     21
#> 5     5    10    20    25      9     11     19     21     24     26
names(r1)
#>  [1] "x"      "p1"     "p2"     "p3"     "p1_inf" "p1_sup" "p2_inf" "p2_sup"
#>  [9] "p3_inf" "p3_sup"

However, this is not very scalable if the function calculates a lot of things as it would be evaluated twice.

Instead, it would be nice if I could use a function that calculates the things that need to be calculated, and then returns a list of the 2 (or more) results.

For instance, consider this example:

#perform heavy calculation on x2 and return 2 flavours of it
f = function(x) {
    x2=x #wow, such heavy, very calculate
    Sys.sleep(1)
    data.frame(inf=x2-10, sup=x2+10)
}

r2 = df %>% 
    mutate(across(starts_with("p"), f, .names="{.col}_{.fn}"))
r2
#> # A tibble: 5 x 7
#>       x    p1    p2    p3 p1_1$inf  $sup p2_1$inf  $sup p3_1$inf  $sup
#>   <int> <dbl> <dbl> <dbl>    <dbl> <dbl>    <dbl> <dbl>    <dbl> <dbl>
#> 1     1     2     4     5       -8    12       -6    14       -5    15
#> 2     2     4     8    10       -6    14       -2    18        0    20
#> 3     3     6    12    15       -4    16        2    22        5    25
#> 4     4     8    16    20       -2    18        6    26       10    30
#> 5     5    10    20    25        0    20       10    30       15    35
names(r2)
#> [1] "x"    "p1"   "p2"   "p3"   "p1_1" "p2_1" "p3_1"
map_chr(r2, class)
#>            x           p1           p2           p3         p1_1         p2_1 
#>    "integer"    "numeric"    "numeric"    "numeric" "data.frame" "data.frame" 
#>         p3_1 
#> "data.frame"

Created on 2021-10-25 by the reprex package (v2.0.1)

Using rbind() instead of data.frame() would end in the same result with slightly different names (p1_1$inf becomes p1_1[,"inf"]) and a different class (data.frame becomes c("matrix", "array").

Moreover, when using a single function, {.fn} is the position of the function so there are naming issues.

I also tried to unnest() the result but with no success.

Is there a way to get the exact result of my first output using a function in across()?


Solution

  • EDIT in recent versions

    Since dplyr 1.1.0 (2023-01-29), across() has a .unpack argument that you only need to set to TRUE for this to work.

    Original answer:

    Actually, this has been considered in an issue on the Github of dplyr: https://github.com/tidyverse/dplyr/issues/5563#issuecomment-721769342.

    There, @romainfrancois gives an incredibly useful solution as this unpackross() function:

    library(tidyverse)
    f = function(x) tibble(inf=x-10, sup=x+10)
    unpackross = function(...) {
        out = across(...)
        tidyr::unpack(out, names(out), names_sep = "_")
    }
    
    df = tibble(x=1:5, p1=x*2, p2=x*4, p3=x*5)
    r2 = df %>% 
        mutate(unpackross(starts_with("p"), f, .names="{.col}_{.fn}"))
    r2
    #> # A tibble: 5 x 10
    #>       x    p1    p2    p3 p1_1_inf p1_1_sup p2_1_inf p2_1_sup p3_1_inf p3_1_sup
    #>   <int> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
    #> 1     1     2     4     5       -8       12       -6       14       -5       15
    #> 2     2     4     8    10       -6       14       -2       18        0       20
    #> 3     3     6    12    15       -4       16        2       22        5       25
    #> 4     4     8    16    20       -2       18        6       26       10       30
    #> 5     5    10    20    25        0       20       10       30       15       35
    names(r2)
    #>  [1] "x"        "p1"       "p2"       "p3"       "p1_1_inf" "p1_1_sup"
    #>  [7] "p2_1_inf" "p2_1_sup" "p3_1_inf" "p3_1_sup"
    map_chr(r2, class)
    #>         x        p1        p2        p3  p1_1_inf  p1_1_sup  p2_1_inf  p2_1_sup 
    #> "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
    #>  p3_1_inf  p3_1_sup 
    #> "numeric" "numeric"
    

    Created on 2021-10-26 by the reprex package (v2.0.1)

    Hopefully, there will be an unpack parameter in across() one day! (add a +1 on my suggestion here if you agree)