I have the following data:
structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L)), row.names = c(NA, 48L), class = "data.frame")
I want to find IDs who have >3 observations for A in a row, and create the following data:
structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L), Censor = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L), Day_2 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L)), row.names = c(NA,
48L), class = "data.frame")
Where Censor starts from the first Day that an ID has >3 observations for A in a row, and Day_2 is the day that first observation occurred.
Wrapping an rle
approach in a f
unction that identifies those subsequent 1
s with length > 3, and then using it in ave
.
f <- \(x) with(rle(x), rep.int(replace(numeric(length(values)),
which(values == 1 & lengths > 3), 1), lengths))
res <- within(dat, {
censor <- ave(A, ID, FUN=f)
Day_2 <- ave(censor, ID, FUN=\(x) if (sum(x) != 0) which.max(x) else NA_integer_)
})
res
# ID A Day Day_2 censor
# 1 1 0 1 NA 0
# 2 1 0 2 NA 0
# 3 1 0 3 NA 0
# 4 1 0 4 NA 0
# 5 1 0 5 NA 0
# 6 1 1 6 NA 0
# 7 1 1 7 NA 0
# 8 1 0 8 NA 0
# 9 1 0 9 NA 0
# 10 1 0 10 NA 0
# 11 1 0 11 NA 0
# 12 1 1 12 NA 0
# 13 1 1 13 NA 0
# 14 1 1 14 NA 0
# 15 1 0 15 NA 0
# 16 1 0 16 NA 0
# 17 1 0 17 NA 0
# 18 2 0 1 5 0
# 19 2 0 2 5 0
# 20 2 0 3 5 0
# 21 2 0 4 5 0
# 22 2 1 5 5 1
# 23 2 1 6 5 1
# 24 2 1 7 5 1
# 25 2 1 8 5 1
# 26 2 1 9 5 1
# 27 2 1 10 5 1
# 28 2 1 11 5 1
# 29 2 1 12 5 1
# 30 2 1 13 5 1
# 31 2 1 14 5 1
# 32 3 0 1 14 0
# 33 3 0 2 14 0
# 34 3 0 3 14 0
# 35 3 0 4 14 0
# 36 3 0 5 14 0
# 37 3 1 6 14 0
# 38 3 1 7 14 0
# 39 3 1 8 14 0
# 40 3 0 9 14 0
# 41 3 1 10 14 0
# 42 3 0 11 14 0
# 43 3 0 12 14 0
# 44 3 0 13 14 0
# 45 3 1 14 14 1
# 46 3 1 15 14 1
# 47 3 1 16 14 1
# 48 3 1 17 14 1
Data:
dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L)), row.names = c(NA, 48L), class = "data.frame")