I have written the following code in R which adds a date (dp_date) and creates a flag (dp_flag) in dt.all sample based on columns from data tables dt.all and Info. The issue that I cannot create dp_flag properly. So, the dp_flag should be equal to 1 when all mentioned conditions in code are met (status, type, P_ID = Id2, DT_after_year >= StatusDate), but also it should be set year before dp_flag_curr becomes 1 and dp_flag must be 1 during whole period when dp_flag_curr =1 except last month. For example, if dp_flag_curr = 1 from 2021-03-31 until 2021-07-31, then dp_flag should be equal to 1 from 2020-03-31 until 2021-06-30. The way I described conditions works fine for the cases where there no NAs in DT. Right now, if to apply the code on whole data set, if there are some NA values as dates for some P_ID, then the dp_flag does not end month earlier than dp_flag_curr. I tried to add na.rm = TRUE in max function and !is.na(DT), but it did not help. Maybe issue is not in NA at all why the dp_flag is not finished a month earlier than dp_flag_curr. Here is minimal example of data tables, but not full version of them for confidentiality of information and as it is really huge (lot of rows and columns). P.S NA rows cannot be removed, other columns have values which are not shown here.
library("data.table")
dt.all[, DT_after_year := DT + 365]
dt.all <-
dt.all[Info[Status == "DP" & Type %in% c(1, 2, 3, 4, 5, 11, 13),
.(StatusDate, Id2)],
on = .(
P_ID = Id2,
DT_after_year >= StatusDate
),
`:=`(dp_flag = ifelse(!is.na(DT) & DT <= max(dt.all[dp_flag_curr == 1, DT], na.rm = TRUE) - 30, 1, 0),
dp_date = StatusDate)]```
dt.all <-
structure(list(ID = c("N589GG", "EA826", "EA826", "N589GG", "EA826", "N589GG",
"EA826", "N589GG", "EA826", "N589GG", "EA826",
"N589GG", "EA826", "N589GG", "EA826", "N589GG",
"EA826", "N589GG", "EA826", "N589GG", "EA826",
"N589GG", "EA826", "N589GG", "EA826", "N589GG",
"EA826", "N589GG", "EA826", "N589GG", "EA826", "N589GG",
"N589GG", "N589GG", "N589GG", "EA826", "N589GG", "N589GG", "N589GG",
"N589GG", "N589GG", "N589GG", "EA826", "N589GG", "N589GG", "N589GG",
"N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG",
"N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG",
"N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG", "N589GG",
"N589GG", "N589GG"),
SEG = c("MP", "SER", "SER", "MP", "SER", "MP",
"SER", "MP", "SER", "MP", "SER",
"MP", "SER", "MP", "SER", "MP",
"SER", "MP", "SER", "MP", "SER",
"MP", "SER", "MP", "SER", "MP",
"SER", "MP", "SER", "MP", "SER", "MP",
"MP", "MP", "MP", "SER", "MP", "MP", "MP",
"MP", "MP", "MP", "SER", "MP", "MP", "MP",
"MP", "MP", "MP", "MP", "MP", "MP", "MP",
"MP", "MP", "MP", "MP", "MP", "MP", "MP",
"MP", "MP", "MP", "MP", "MP", "MP", "MP",
"MP", "MP"),
P_ID = c(NA, NA, NA, NA, NA, "589GG",
"A826", "589GG", "A826", "589GG", "A826",
"589GG", "A826", "589GG", "A826", "589GG",
"A826", "589GG", "A826", "589GG", "A826",
"589GG", "A826", "589GG", "A826", "589GG",
"A826", "589GG", "A826", "589GG", NA, "589GG",
"589GG", "589GG", "589GG", NA, "589GG", "589GG", "589GG",
"589GG", "589GG", "589GG", NA, "589GG", "589GG", "589GG",
NA, "589GG", "589GG", "589GG", "589GG", "589GG", "589GG",
"589GG", "589GG", "589GG", "589GG", "589GG", "589GG", "589GG",
"589GG", "589GG", "589GG", "589GG", "589GG", "589GG", "589GG",
"589GG", "589GG"),
DT = structure(c(NA, NA, NA, NA, NA, 17531, 17531, 17562, 17562, 17590, 17590, 17621, 17621, 17651,
17651, 17682, 17682, 17712, 17712, 17743, 17743, 17774, 17774,
17804, 17804, 17835, 17835, 17865, 17865, 17896, NA, 17927, 17955,
17986, 18016, NA, 18047, 18077, 18108, 18139, 18169, 18200, NA,
18230, 18261, 18292, NA, 18321, 18352, 18382, 18413, 18443, 18474,
18505, 18535, 18566, 18596, 18627, 18658, 18686, 18717, 18747,
18778, 18808, 18839, 18870, 18900, 18931, 18961), class = "Date"),
dp_flag_curr = c(NA, NA, NA, NA, NA, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, 1, 1, 1, 1, NA, 1, 1, 1,
1, 1, 1, NA, 1, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -69L
), class = c("data.table", "data.frame"))
Info <- structure(list(Status = c("DP", "DP", "DP", "DP", "DP",
"DP", "DP", "DP", "DP", "DP", "DP", "DP"), Type = c(11, 11, 12, 1, 2, 13, 13, 5, 15, 16, 19, 98),
StatusDate = structure(c(17595, 17738, 17595, 17738, 17738, 17738, 17595, 17595, 17738, 17738, 17738, 17738), class = "Date"),
Id2 = c("A826", "589GG", "A826", "589GG", "589GG", "589GG", "A826",
"A826", "589GG", "589GG", "589GG", "589GG")), row.names = c(NA, -12L), class = c("data.table", "data.frame"))
Example of desired output for ID "EA826":
ID SEG P_ID DT dp_flag_curr DT_after_year dp_flag dp_date
EA826 SER <NA> <NA> NA <NA> NA <NA>
EA826 SER <NA> <NA> NA <NA> NA <NA>
EA826 SER <NA> <NA> NA <NA> NA <NA>
EA826 SER A826 2017-12-31 1 2018-12-31 1 2018-03-05
EA826 SER A826 2018-01-31 1 2019-01-31 1 2018-03-05
EA826 SER A826 2018-02-28 1 2019-02-28 1 2018-03-05
EA826 SER A826 2018-03-31 1 2019-03-31 1 2018-03-05
EA826 SER A826 2018-04-30 1 2019-04-30 1 2018-03-05
EA826 SER A826 2018-05-31 1 2019-05-31 1 2018-03-05
EA826 SER A826 2018-06-30 1 2019-06-30 1 2018-03-05
EA826 SER A826 2018-07-31 1 2019-07-31 1 2018-03-05
EA826 SER A826 2018-08-31 1 2019-08-31 1 2018-03-05
EA826 SER A826 2018-09-30 1 2019-09-30 1 2018-03-05
EA826 SER A826 2018-10-31 1 2019-10-31 1 2018-03-05
EA826 SER A826 2018-11-30 1 2019-11-30 0 2018-03-05
EA826 SER <NA> <NA> NA <NA> NA <NA>
EA826 SER <NA> <NA> NA <NA> NA <NA>
EA826 SER <NA> <NA> NA <NA> NA <NA>
I believe that the construction of j
should be done by=.EACH
, and should not reference dt.all
in the max()
function.
Try this:
dt.all = dt.all[
i = Info[Status == "DP" & Type %in% c(1, 2, 3, 4, 5, 11, 13), .(StatusDate, Id2)] |> unique()
on = .(P_ID = Id2, DT_after_year >= StatusDate),
j = let(
dp_flag = ifelse(!is.na(DT) & DT <= max(DT[dp_flag_curr==1], na.rm = TRUE) - 30, 1, 0),
dp_date = StatusDate
),
by=.EACHI
]
Output (for EA826)
ID SEG P_ID DT dp_flag_curr DT_after_year dp_flag dp_date
<char> <char> <char> <Date> <num> <Date> <num> <Date>
1: EA826 SER <NA> <NA> NA <NA> NA <NA>
2: EA826 SER <NA> <NA> NA <NA> NA <NA>
3: EA826 SER <NA> <NA> NA <NA> NA <NA>
4: EA826 SER A826 2017-12-31 1 2018-12-31 1 2018-03-05
5: EA826 SER A826 2018-01-31 1 2019-01-31 1 2018-03-05
6: EA826 SER A826 2018-02-28 1 2019-02-28 1 2018-03-05
7: EA826 SER A826 2018-03-31 1 2019-03-31 1 2018-03-05
8: EA826 SER A826 2018-04-30 1 2019-04-30 1 2018-03-05
9: EA826 SER A826 2018-05-31 1 2019-05-31 1 2018-03-05
10: EA826 SER A826 2018-06-30 1 2019-06-30 1 2018-03-05
11: EA826 SER A826 2018-07-31 1 2019-07-31 1 2018-03-05
12: EA826 SER A826 2018-08-31 1 2019-08-31 1 2018-03-05
13: EA826 SER A826 2018-09-30 1 2019-09-30 1 2018-03-05
14: EA826 SER A826 2018-10-31 1 2019-10-31 1 2018-03-05
15: EA826 SER A826 2018-11-30 1 2019-11-30 0 2018-03-05
16: EA826 SER <NA> <NA> NA <NA> NA <NA>
17: EA826 SER <NA> <NA> NA <NA> NA <NA>
18: EA826 SER <NA> <NA> NA <NA> NA <NA>