rdtplyr

R dtplyr using mutated columns in functions


I have data in lazy_tbl and create columns which I would to use in future to calculate something else, but I'm missing something as getting errors. Here is example.

library(dtplyr)
library(dplyr)
library(implied)

#helper
possibly_mean <- possibly(mean, otherwise = NA)

ready_to_calculate %>% 
  lazy_dt() %>% 
  mutate(avgH = map_dbl(odds_1x2, ~possibly_mean(.x$home_odds, na.rm = T)),
         avgD = map_dbl(odds_1x2, ~possibly_mean(.x$draw_odds, na.rm = T)),
         avgA = map_dbl(odds_1x2, ~possibly_mean(.x$away_odds, na.rm = T)),
         prob1 = pmap_dbl(list(avgH, avgD, avgA), ~implied::implied_probabilities(c(..1, ..2, ..3), method = "wpo")$probabilities[1,1]))

This will give me following error:

Error in `[.data.table`(copy(`_DT20`), , `:=`(c("avgH", "avgD", "avgA",  : 
  Variable '1' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.

If I use as_tibble() before mutating last row, I will get answer I'm looking for. So what am I missing here and how can I use just created columns in formulas in future?

ready_to_calculate %>% 
  lazy_dt() %>% 
  mutate(avgH = map_dbl(odds_1x2, ~possibly_mean(.x$home_odds, na.rm = T)),
         avgD = map_dbl(odds_1x2, ~possibly_mean(.x$draw_odds, na.rm = T)),
         avgA = map_dbl(odds_1x2, ~possibly_mean(.x$away_odds, na.rm = T))) %>% 
  as_tibble() %>% 
  mutate(prob1 = pmap_dbl(list(avgH, avgD, avgA), ~implied::implied_probabilities(c(..1, ..2, ..3), method = "wpo")$probabilities[1,1]))

# A tibble: 2 × 7
  home_team away_team   odds_1x2       avgH  avgD  avgA  prob1
  <chr>     <chr>       <list>        <dbl> <dbl> <dbl>  <dbl>
1 Arsenal   Aston Villa <df [18 × 4]>  1.96  3.64  3.92  0.496
2 Leeds     Wolves      <lgl [1]>     NA    NA    NA    NA    

DATA:

dput(ready_to_calculate)
structure(list(home_team = c("Arsenal", "Leeds"), away_team = c("Aston Villa", 
"Wolves"), odds_1x2 = list(structure(list(bookmaker = c("10Bet", 
"188BET", "1xBet", "888sport", "bet-at-home", "bet365", "Betfair", 
"Betsafe", "Betsson", "BetVictor", "Betway", "bwin", "ComeOn", 
"Interwetten", "Pinnacle", "Unibet", "William Hill", "Betfair Exchange"
), home_odds = c(2.05, 2.02, 1.84, 1.93, 2.02, 1.85, 1.91, 2.05, 
2.05, 1.95, 1.91, 1.9, 2.05, 1.95, 2.04, 1.96, 1.91, 1.97), draw_odds = c(3.6, 
3.6, 3.85, 3.6, 3.6, 3.75, 3.7, 3.65, 3.65, 3.6, 3.6, 3.5, 3.65, 
3.6, 3.65, 3.7, 3.6, 3.61), away_odds = c(3.65, 3.8, 4.41, 4.1, 
3.65, 4, 4.2, 3.7, 3.7, 4, 4, 4, 3.75, 3.85, 3.87, 4.1, 3.9, 
3.95)), class = "data.frame", row.names = c(NA, 18L)), NA)), class = "data.frame", row.names = c(NA, 
-2L))

Solution

  • I think the anonymous lambda function combined with accessing the elements with ..1 etc. is the problem. We can put this in a custom function and just call it as bare function name inside pmap_dbl:

    library(dtplyr)
    library(dplyr)
    library(purrr)
    library(implied)
    
    #helper
    possibly_mean <- possibly(mean, otherwise = NA)
    
    calc_implied_probalities <- function(x,y,z) {
      out <- implied_probabilities(
        c(x, y, z),
        method = "wpo")$probabilities[1,1]
    }
    
    ready_to_calculate %>% 
      lazy_dt() %>% 
      mutate(avgH = map_dbl(odds_1x2, ~possibly_mean(.x$home_odds, na.rm = T)),
             avgD = map_dbl(odds_1x2, ~possibly_mean(.x$draw_odds, na.rm = T)),
             avgA = map_dbl(odds_1x2, ~possibly_mean(.x$away_odds, na.rm = T)),
             prob1 = pmap_dbl(list(avgH, avgD, avgA), calc_implied_probalities)
             )
    
    #> Source: local data table [2 x 7]
    #> Call:   copy(`_DT1`)[, `:=`(c("avgH", "avgD", "avgA", "prob1"), {
    #>     avgH <- map_dbl(odds_1x2, ~possibly_mean(.x$home_odds, na.rm = TRUE))
    #>     avgD <- map_dbl(odds_1x2, ~possibly_mean(.x$draw_odds, na.rm = TRUE))
    #>     avgA <- map_dbl(odds_1x2, ~possibly_mean(.x$away_odds, na.rm = TRUE))
    #>     prob1 <- pmap_dbl(list(avgH, avgD, avgA), ..calc_implied_probalities)
    #>     .(avgH, avgD, avgA, prob1)
    #> })]
    #> 
    #>   home_team away_team   odds_1x2       avgH  avgD  avgA  prob1
    #>   <chr>     <chr>       <list>        <dbl> <dbl> <dbl>  <dbl>
    #> 1 Arsenal   Aston Villa <df [18 × 4]>  1.96  3.64  3.92  0.496
    #> 2 Leeds     Wolves      <lgl [1]>     NA    NA    NA    NA    
    #> 
    #> # Use as.data.table()/as.data.frame()/as_tibble() to access results
    

    Data from OP:

    ready_to_calculate <- structure(list(home_team = c("Arsenal", "Leeds"), away_team = c("Aston Villa", 
                                                                                          "Wolves"), odds_1x2 = list(structure(list(bookmaker = c("10Bet", 
                                                                                                                                                  "188BET", "1xBet", "888sport", "bet-at-home", "bet365", "Betfair", 
                                                                                                                                                  "Betsafe", "Betsson", "BetVictor", "Betway", "bwin", "ComeOn", 
                                                                                                                                                  "Interwetten", "Pinnacle", "Unibet", "William Hill", "Betfair Exchange"
                                                                                          ), home_odds = c(2.05, 2.02, 1.84, 1.93, 2.02, 1.85, 1.91, 2.05, 
                                                                                                           2.05, 1.95, 1.91, 1.9, 2.05, 1.95, 2.04, 1.96, 1.91, 1.97), draw_odds = c(3.6, 
                                                                                                                                                                                     3.6, 3.85, 3.6, 3.6, 3.75, 3.7, 3.65, 3.65, 3.6, 3.6, 3.5, 3.65, 
                                                                                                                                                                                     3.6, 3.65, 3.7, 3.6, 3.61), away_odds = c(3.65, 3.8, 4.41, 4.1, 
                                                                                                                                                                                                                               3.65, 4, 4.2, 3.7, 3.7, 4, 4, 4, 3.75, 3.85, 3.87, 4.1, 3.9, 
                                                                                                                                                                                                                               3.95)), class = "data.frame", row.names = c(NA, 18L)), NA)), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                -2L))
    

    Created on 2023-03-14 with reprex v2.0.2