I would like to calculate the number of cases following the initial record in a data panel (detect dropout). Could someone assist me with the methodology or provide guidance on how to achieve this?
dt <- tibble(
`id` = c("id1", "id2","id3"),
`2018` = c(NA,NA,"id3"),
`2019` = c(NA,"id2","id3"),
`2020` = c(NA, "id2",NA),
`2021` = c("id1", NA,"id3"),
`2022` = c("id1", NA,"id3"),
`2023` = c(NA, "id2","id3")
) %>% as.data.table(); dt
id 2018 2019 2020 2021 2022 2023
<char> <char> <char> <char> <char> <char> <char>
1: id1 <NA> <NA> <NA> id1 id1 <NA> ---> 1st record 2021
2: id2 <NA> id2 id2 <NA> <NA> id2 ---> 1st record 2019
3: id3 id3 id3 <NA> id3 id3 id3 ---> 1st record 2018
dt <- dt[, .(
`2018` = ifelse(is.na(`2018`), 1, 0),
`2019` = ifelse(is.na(`2019`), 1, 0),
`2020` = ifelse(is.na(`2020`), 1, 0),
`2021` = ifelse(is.na(`2021`), 1, 0),
`2022` = ifelse(is.na(`2022`), 1, 0),
`2023` = ifelse(is.na(`2023`), 1, 0)), by = c("id")]
dt
output <- tibble(
`id` = c("id1", "id2","id3"),
`2018` = c(0,0,0),
`2019` = c(0,0,0),
`2020` = c(0, 0,1),
`2021` = c(0, 1,0),
`2022` = c(0, 1,0),
`2023` = c(1, 0,0),
n = c(1,2,1)
) %>% as.data.table(); output
id 2018 2019 2020 2021 2022 2023 n_dropOut
<char> <num> <num> <num> <num> <num> <num> <num>
1: id1 0 0 0 0 0 1 1
2: id2 0 0 0 1 1 0 2
3: id3 0 0 1 0 0 0 1
I've tried this code but it is incorrect
dt[, n_dropOut :=
`2018` +
`2019` +
`2020` +
`2021` +
`2022` +
`2023`
, by = c("id")]
id 2018 2019 2020 2021 2022 2023 n_dropOut
<char> <num> <num> <num> <num> <num> <num> <num>
1: id1 1 1 1 0 0 1 4
2: id2 1 0 0 1 1 0 3
3: id3 0 0 1 0 0 0 1
helper <- function(x) {
i <- Position(\(x) !is.na(x), x) # i is the position of first non-NA
if (is.na(i)) return(0L) # If not found return 0
j <- length(x) # Otherwise sum number of NAs from i to the end
sum(is.na(x[i:j]))
}
dt[, n_dropout := helper(unlist(.SD)), .SDcols = `2018`:`2023`, by = id]
id 2018 2019 2020 2021 2022 2023 n_dropout
<char> <char> <char> <char> <char> <char> <char> <int>
1: id1 <NA> <NA> <NA> id1 id1 <NA> 1
2: id2 <NA> id2 id2 <NA> <NA> id2 2
3: id3 id3 id3 <NA> id3 id3 id3 1