I have a dataframe like this
library(dplyr)
library(lubridate)
ticket_id <- c("id_1234","id_2234","id_1334","id_1244","id_1294",
"id_1264","id_1634","id_1734","id_1834","id_1934")
created <- c("2024-09-22 22:00:00","2024-09-21 23:00:00","2024-09-19 11:30:00","2024-09-23 6:00:00","2024-09-20 18:30:00",
"2024-09-15 10:00:00","2024-09-14 23:00:00","2024-09-11 5:30:00","2024-09-16 7:30:00","2024-09-13 19:27:00")
tat <- c(14.4, 38, 106, 6.4, 64, 29, 37, 129.5, 2.8, 66)
df.sample <- data.frame(ticket_id,created,tat,stringsAsFactors=FALSE)
df.sample$created <- ymd_hms(df.sample$created)
df.sample$tat = as.numeric(df.sample$tat)
I want to create a new column called actual_tat
that is computed by excluding the times in the tat
for any tickets created between previous week Friday 6 PM to current week Monday 8 AM
For any tickets created outside of the previous week Friday 6 PM to current week Monday 8 AM, actual_tat
is same as tat
Eg: If the ticket was created on Sunday at 7 AM and the tat
shows 14 hours, the final actual_tat
column should show 1 hour (14-13), which is the number of hours after current week Monday 8 AM
I tried doing something like this but not getting it right
df.sample %>%
mutate(
created_dt = ymd_hms(created), # Convert Created column to datetime format
day_of_week = wday(created_dt, label = TRUE), # Get day of week
hour_of_day = hour(created_dt), # Get hour of day
exclude_hours = ifelse(
(day_of_week == "Fri" & hour_of_day >= 18) |
(day_of_week == "Sat") |
(day_of_week == "Sun") |
(day_of_week == "Mon" & hour_of_day < 8),
TRUE, FALSE
),
current_monday_8am = (floor_date(created_dt, "week") + days(1)) + hours(8), # Current week Monday 8 AM
time_diff = as.numeric(difftime(current_monday_8am, created_dt, units = "hours")), # Calculate time difference
actual_tat = ifelse(
exclude_hours,
pmax(tat - time_diff, 0),
tat
)
)
My desired output is
ticket_id created tat actual_tat
1 id_1234 2024-09-22 22:00:00 14.4 4.4
2 id_2234 2024-09-21 23:00:00 38.0 5.0
3 id_1334 2024-09-19 11:30:00 106.0 106.0
4 id_1244 2024-09-23 06:00:00 6.4 4.4
5 id_1294 2024-09-20 18:30:00 64.0 2.5
6 id_1264 2024-09-15 10:00:00 29.0 7.0
7 id_1634 2024-09-14 23:00:00 37.0 4.0
8 id_1734 2024-09-11 05:30:00 129.5 129.5
9 id_1834 2024-09-16 07:30:00 2.8 2.3
10 id_1934 2024-09-13 19:27:00 66.0 5.5
Can someone point me in the right direction?
You need to use ceiling_date
instead of floor_date
, and also subtract 8 hours first, then add the 8 hours back. This handles those cases where the created_dt
was between midnight on Sunday and 8am on Monday. Finally, tell ceiling_date
to start the week on Monday, not Sunday (the default).
library(lubridate)
library(dplyr)
df.sample %>%
mutate(
created_dt = ymd_hms(created), # Convert Created column to datetime format
day_of_week = wday(created_dt, label = TRUE), # Get day of week
hour_of_day = hour(created_dt), # Get hour of day
exclude_hours = ifelse(
(day_of_week == "Fri" & hour_of_day >= 18) |
(day_of_week == "Sat") |
(day_of_week == "Sun") |
(day_of_week == "Mon" & hour_of_day < 8),
TRUE, FALSE
),
current_monday_8am = ceiling_date(created_dt - hours(8), "week", week_start=1) + hours(8), # Current week Monday 8 AM
time_diff = as.numeric(difftime(current_monday_8am, created_dt, units = "hours")), # Calculate time difference
actual_tat = ifelse(
exclude_hours,
tat - time_diff,
tat
)
) |>
select(ticket_id, tat, created_dt, actual_tat)
ticket_id tat created_dt actual_tat
1 id_1234 14.4 2024-09-22 22:00:00 4.40
2 id_2234 38.0 2024-09-21 23:00:00 5.00
3 id_1334 106.0 2024-09-19 11:30:00 106.00
4 id_1244 6.4 2024-09-23 06:00:00 4.40
5 id_1294 64.0 2024-09-20 18:30:00 2.50
6 id_1264 29.0 2024-09-15 10:00:00 7.00
7 id_1634 37.0 2024-09-14 23:00:00 4.00
8 id_1734 129.5 2024-09-11 05:30:00 129.50
9 id_1834 2.8 2024-09-16 07:30:00 2.30
10 id_1934 66.0 2024-09-13 19:27:00 5.45