I'm trying to calculate the rowise cumulative sum of Rates from DATE to DATE_following.
For example:
library(tidyverse)
library(bizdays)
library(lubridate)
set.seed(1)
dat <- seq.Date(from = as.Date(as.Date("2023-04-06")- days(10)),
to = as.Date(as.Date("2023-04-06")),
by = "day") %>%
data.frame(DATE = .) %>%
mutate(Rates = sample(seq(from=1,to=10,by=1), size = length(DATE),replace=TRUE),
DATE_following = modified.following(DATE %m+% days(3)))
dat
DATE Rates DATE_following
1 2023-03-27 9 2023-03-30
2 2023-03-28 4 2023-03-31
3 2023-03-29 7 2023-04-01
4 2023-03-30 1 2023-04-02
5 2023-03-31 2 2023-04-03
6 2023-04-01 7 2023-04-04
7 2023-04-02 2 2023-04-05
8 2023-04-03 3 2023-04-06
9 2023-04-04 1 2023-04-07
10 2023-04-05 5 2023-04-08
11 2023-04-06 5 2023-04-09
The output i'm trying to get is:
DATE Rates DATE_following Results
1 2023-03-27 9 2023-03-30 21
2 2023-03-28 4 2023-03-31 14
3 2023-03-29 7 2023-04-01 17
4 2023-03-30 1 2023-04-02 12
5 2023-03-31 2 2023-04-03 14
6 2023-04-01 7 2023-04-04 13
7 2023-04-02 2 2023-04-05 11
8 2023-04-03 3 2023-04-06 14
9 2023-04-04 1 2023-04-07 NA
10 2023-04-05 5 2023-04-08 NA
11 2023-04-06 5 2023-04-09 NA
Is it possible to get this result using dplyr functions like rowwise() and cumsum()? My main problem is that I don't know how to define this condition within these functions.
If you want a rolling sum for four consecutive Rates
, you could use zoo
s rollsum()
function:
library(dplyr)
library(zoo)
dat %>%
mutate(Result = rollsum(Rates, k = 4, fill = NA_real_, align = "left"))
This returns
# A tibble: 11 × 5
no DATE Rates DATE_following Result
<dbl> <date> <dbl> <date> <dbl>
1 1 2023-03-27 9 2023-03-30 21
2 2 2023-03-28 4 2023-03-31 14
3 3 2023-03-29 7 2023-04-01 17
4 4 2023-03-30 1 2023-04-02 12
5 5 2023-03-31 2 2023-04-03 14
6 6 2023-04-01 7 2023-04-04 13
7 7 2023-04-02 2 2023-04-05 11
8 8 2023-04-03 3 2023-04-06 14
9 9 2023-04-04 1 2023-04-07 NA
10 10 2023-04-05 5 2023-04-08 NA
11 11 2023-04-06 5 2023-04-09 NA
A slightly more general answer based on LeMarque's comment:
dat2 %>%
mutate(days = as.integer(DATE_following - DATE) + 1,
res = rollapply(data = Rates, width = days, FUN = sum, align = "left", fill = NA_real_))
This returns
# A tibble: 11 × 6
no DATE Rates DATE_following days res
<dbl> <date> <dbl> <date> <dbl> <dbl>
1 1 2023-03-27 9 2023-03-30 4 21
2 2 2023-03-28 4 2023-03-31 4 14
3 3 2023-03-29 7 2023-04-01 4 17
4 4 2023-03-30 1 2023-04-02 4 12
5 5 2023-03-31 2 2023-04-10 11 NA
6 6 2023-04-01 7 2023-04-04 4 13
7 7 2023-04-02 2 2023-04-05 4 11
8 8 2023-04-03 3 2023-04-06 4 14
9 9 2023-04-04 1 2023-04-07 4 NA
10 10 2023-04-05 5 2023-04-08 4 NA
11 11 2023-04-06 5 2023-04-09 4 NA
Since the DATE_following
in row 5 isn't present in the data, this version returns NA
. Furthermore this version doesn't sum four consecutive days but calculates the days between DATE
and DATE_following
and applies them to the rolling sum.
dat <- structure(list(no = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), DATE = structure(c(19443,
19444, 19445, 19446, 19447, 19448, 19449, 19450, 19451, 19452,
19453), class = "Date"), Rates = c(9, 4, 7, 1, 2, 7, 2, 3, 1,
5, 5), DATE_following = structure(c(19446, 19447, 19448, 19449,
19450, 19451, 19452, 19453, 19454, 19455, 19456), class = "Date")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L), spec = structure(list(
cols = list(no = structure(list(), class = c("collector_double",
"collector")), DATE = structure(list(format = ""), class = c("collector_date",
"collector")), Rates = structure(list(), class = c("collector_double",
"collector")), DATE_following = structure(list(format = ""), class = c("collector_date",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
dat2 <- structure(list(no = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), DATE = structure(c(19443,
19444, 19445, 19446, 19447, 19448, 19449, 19450, 19451, 19452,
19453), class = "Date"), Rates = c(9, 4, 7, 1, 2, 7, 2, 3, 1,
5, 5), DATE_following = structure(c(19446, 19447, 19448, 19449,
19457, 19451, 19452, 19453, 19454, 19455, 19456), class = "Date")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L), spec = structure(list(
cols = list(no = structure(list(), class = c("collector_double",
"collector")), DATE = structure(list(format = ""), class = c("collector_date",
"collector")), Rates = structure(list(), class = c("collector_double",
"collector")), DATE_following = structure(list(format = ""), class = c("collector_date",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))