rdplyr

mutate dynamic variables in for loop


Here is my mock data frame object dfhave

structure(list(ID = 1:10, DOB = structure(c(-3704, -7119, -7375, 
-8217, -8504, -4886, -6899, -3027, -5546, -4512), class = "Date"), 
    date1 = structure(c(7284, 9182, 4609, 6370, 5796, 9004, 8215, 
    5391, 8778, 6724), class = "Date"), date2 = structure(c(12429, 
    10203, 10764, 14487, 11367, 14495, 14845, 12190, 10216, 13144
    ), class = "Date"), age.date1 = c(30.0835044490075, 44.6297056810404, 
    32.8104038329911, 39.937029431896, 39.1512662559891, 38.0287474332649, 
    41.3798767967146, 23.047227926078, 39.2169746748802, 30.7624914442163
    ), age.date2 = c(44.1697467488022, 47.4250513347023, 49.6618754277892, 
    62.1601642710472, 54.403832991102, 53.0622861054073, 59.5318275154004, 
    41.6618754277892, 43.1540041067762, 48.3394934976044)), row.names = c(NA, 
10L), class = "data.frame")

I want to loop over age values, with example iterations here

dfwant <- dfhave %>% mutate(age20day = DOB %m+% years(20),
                        age30day = DOB %m+% years(30),
                        age40day = DOB %m+% years(40),
                        age50day = DOB %m+% years(50),
                    
                    expyrs.20 = case_when(age.date1<=20 & age.date2<=20 ~ as.duration(date1 %--% date2)/dyears(1),
                                          age.date1<=20 & age.date2>20 ~ as.duration(date1 %--% age20day)/dyears(1), 
                                          age.date1>20 ~ 0),
                    
                    expyrs.30 = case_when(age.date1<=30 & age.date2<=30 ~ as.duration(date1 %--% date2)/dyears(1),
                                          age.date1<=30 & age.date2>30 ~ as.duration(date1 %--% age30day)/dyears(1), 
                                          age.date1>30 ~ 0),
                    
                    expyrs.40 = case_when(age.date1<=40 & age.date2<=40 ~ as.duration(date1 %--% date2)/dyears(1),
                                          age.date1<=40 & age.date2>40 ~ as.duration(date1 %--% age40day)/dyears(1), 
                                          age.date1>40 ~ 0),
                    
                    expyrs.50 = case_when(age.date1<=50 & age.date2<=50 ~ as.duration(date1 %--% date2)/dyears(1),
                                            age.date1<=50 & age.date2>50 ~ as.duration(date1 %--% age50day)/dyears(1), 
                                            age.date1>50 ~ 0)
                    
                    )

I tried a for loop as in

ages <- c(20,30,40,50)

for (i in ages) {
  
ageday =  paste0("age", i, "day")
expyrs =  paste0("expyrs.", i)
  
 df2 <- dfhave %>% mutate(!!ageday := DOB %m+% years(i),
    
    !!expyrs := case_when(age.date1<=ages[i] & age.date2<=ages[i] ~ as.duration(date1 %--% date2)/dyears(1),
                                                             age.date1<=ages[i] & age.date2>ages[i] ~ as.duration(date1 %--% as.Date(!!sym(ageday)))/dyears(1), 
                                                             age.date1>ages[i] ~ 0
  )
  )
}

but the result was not what I wanted (only the last iteration [expyrs.50] was output, and all these values were NA:

structure(list(ID = 1:10, DOB = structure(c(-3704, -7119, -7375, 
-8217, -8504, -4886, -6899, -3027, -5546, -4512), class = "Date"), 
    date1 = structure(c(7284, 9182, 4609, 6370, 5796, 9004, 8215, 
    5391, 8778, 6724), class = "Date"), date2 = structure(c(12429, 
    10203, 10764, 14487, 11367, 14495, 14845, 12190, 10216, 13144
    ), class = "Date"), age.date1 = c(30.0835044490075, 44.6297056810404, 
    32.8104038329911, 39.937029431896, 39.1512662559891, 38.0287474332649, 
    41.3798767967146, 23.047227926078, 39.2169746748802, 30.7624914442163
    ), age.date2 = c(44.1697467488022, 47.4250513347023, 49.6618754277892, 
    62.1601642710472, 54.403832991102, 53.0622861054073, 59.5318275154004, 
    41.6618754277892, 43.1540041067762, 48.3394934976044), age50day = structure(c(14559, 
    11144, 10887, 10046, 9759, 13376, 11364, 15235, 12717, 13750
    ), class = "Date"), expyrs.50 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_)), row.names = c(NA, 10L), class = "data.frame") 

My desired output:

> dput(dfwant)
structure(list(ID = 1:10, DOB = structure(c(-3704, -7119, -7375, 
-8217, -8504, -4886, -6899, -3027, -5546, -4512), class = "Date"), 
    date1 = structure(c(7284, 9182, 4609, 6370, 5796, 9004, 8215, 
    5391, 8778, 6724), class = "Date"), date2 = structure(c(12429, 
    10203, 10764, 14487, 11367, 14495, 14845, 12190, 10216, 13144
    ), class = "Date"), age.date1 = c(30.0835044490075, 44.6297056810404, 
    32.8104038329911, 39.937029431896, 39.1512662559891, 38.0287474332649, 
    41.3798767967146, 23.047227926078, 39.2169746748802, 30.7624914442163
    ), age.date2 = c(44.1697467488022, 47.4250513347023, 49.6618754277892, 
    62.1601642710472, 54.403832991102, 53.0622861054073, 59.5318275154004, 
    41.6618754277892, 43.1540041067762, 48.3394934976044), age20day = structure(c(3601, 
    186, -70, -912, -1199, 2419, 406, 4278, 1759, 2793), class = "Date"), 
    age30day = structure(c(7254, 3839, 3582, 2741, 2454, 6071, 
    4059, 7930, 5412, 6445), class = "Date"), age40day = structure(c(10906, 
    7491, 7235, 6393, 6106, 9724, 7711, 11583, 9064, 10098), class = "Date"), 
    age50day = structure(c(14559, 11144, 10887, 10046, 9759, 
    13376, 11364, 15235, 12717, 13750), class = "Date"), expyrs.20 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), expyrs.30 = c(0, 0, 0, 0, 0, 
    0, 0, 6.95140314852841, 0, 0), expyrs.40 = c(9.91649555099247, 
    0, 7.1895961670089, 0.0629705681040383, 0.848733744010951, 
    1.97125256673511, 0, 16.952772073922, 0.783025325119781, 
    9.23750855578371), expyrs.50 = c(14.0862422997947, 2.79534565366188, 
    16.8514715947981, 10.0643394934976, 10.8501026694045, 11.9698836413415, 
    8.62149212867899, 18.6146475017112, 3.93702943189596, 17.5770020533881
    )), row.names = c(NA, 10L), class = "data.frame")

Solution

  • One way you could do this:

    map(c(20, 30, 40, 50), \(yr)df %>%
        mutate(
          "age{{yr}}day":= DOB %m+% years(yr),
          "expyrs{{yr}}" := case_when(age.date1 <=yr & age.date2<=yr ~ 
                  as.duration(date1 %--% date2)/dyears(1),
                age.date1 <= yr & age.date2 > yr ~ 
                  as.duration(date1 %--% get(sprintf("age%dday", yr), 
                                             pick(everything())))/dyears(1), 
                age.date1 > yr ~ 0),.keep='none')
        )%>%
      list_cbind() %>%
      bind_cols(df, .)
    

    Although using function might be simpler?

    fn <- function(yr){
      col1 <-  df$DOB %m+% years(yr)
      col2 <-  with(df, case_when(age.date1 <= yr & age.date2<=yr ~ 
                                    as.duration(date1 %--% date2)/dyears(1),
                                  age.date1 <= yr & age.date2 > yr ~ 
                                    as.duration(date1 %--% col1)/dyears(1), 
                                  age.date1 > yr ~ 0))
      setNames(data.frame(col1, col2), paste0(c("age", "expyrs"), yr, c("day", "")))
    }
    
    map(c(20,30,40,50), fn)%>% bind_cols(df, .)