I'm forecasting revenue by fiscal year (not calendar year) for contracts with start dates and either lengths of time or end dates or both. My method involves finding the fraction of the contract which exists in each fiscal year and applying the total contract value (represented by amounts
in the sample code). This gives the fraction of the contract value for that fiscal year.
library(dplyr)
library(lubridate)
FY16 <- interval("2015-10-01", "2016-09-30")
FY17 <- interval("2016-10-01", "2017-09-30")
FY18 <- interval("2017-10-01", "2018-09-30")
FY19 <- interval("2018-10-01", "2019-09-30")
FY20 <- interval("2019-10-01", "2020-09-30")
FY21 <- interval("2020-10-01", "2021-09-30")
FY22 <- interval("2021-10-01", "2022-09-30")
FY23 <- interval("2022-10-01", "2023-09-30")
FY24 <- interval("2023-10-01", "2024-09-30")
FY25 <- interval("2024-10-01", "2025-09-30")
FY26 <- interval("2025-10-01", "2026-09-30")
FY27 <- interval("2026-10-01", "2027-09-30")
FY28 <- interval("2027-10-01", "2028-09-30")
FY29 <- interval("2028-10-01", "2029-09-30")
FY30 <- interval("2029-10-01", "2030-09-30")
FY31 <- interval("2030-10-01", "2031-09-30")
FY32 <- interval("2031-10-01", "2032-09-30")
data <- tibble(startDates = c("2018-04-01", "2025-09-17", "2021-07-21", "2026-11-17"),
years = c(5,3,3,2),
amounts = c(350000, 1200000, 7500000, 130000),
surcharge = c(0.035, 0.04, 0.375, 0.05))
data <- as.data.frame(data)%>%
mutate(startDates =as.POSIXct(`startDates`),
endDate = `startDates`%m+%years(years),
contractInterval = interval(startDates, endDate),
`Days in Contract` = startDates%--%endDate/ddays(1),
`FY16 Interval` = intersect(FY16, contractInterval),
`FY17 Interval` = intersect(FY17, contractInterval),
`FY18 Interval` = intersect(FY18, contractInterval),
`FY19 Interval` = intersect(FY19, contractInterval),
`FY20 Interval` = intersect(FY20, contractInterval),
`FY21 Interval` = intersect(FY21, contractInterval),
#and so on to FY32
`Days in FY16` = int_start(`FY16 Interval`)%--%int_end(`FY16 Interval`)/ddays(1),
`Days in FY17` = int_start(`FY17 Interval`)%--%int_end(`FY17 Interval`)/ddays(1),
`Days in FY18` = int_start(`FY18 Interval`)%--%int_end(`FY18 Interval`)/ddays(1),
`Days in FY19` = int_start(`FY19 Interval`)%--%int_end(`FY19 Interval`)/ddays(1),
`Days in FY20` = int_start(`FY20 Interval`)%--%int_end(`FY20 Interval`)/ddays(1),
`Days in FY21` = int_start(`FY21 Interval`)%--%int_end(`FY21 Interval`)/ddays(1),
#and so on to FY32
`FY16 Amount` = amounts*`Days in FY16`/`Days in Contract`,
`FY17 Amount` = amounts*`Days in FY17`/`Days in Contract`,
`FY18 Amount` = amounts*`Days in FY18`/`Days in Contract`,
`FY19 Amount` = amounts*`Days in FY19`/`Days in Contract`,
`FY20 Amount` = amounts*`Days in FY20`/`Days in Contract`,
`FY21 Amount` = amounts*`Days in FY21`/`Days in Contract`,
#and so on to FY32
`FY16 Surcharge Amount` = surcharge*`FY16 Amount`,
`FY17 Surcharge Amount` = surcharge*`FY17 Amount`,
`FY18 Surcharge Amount` = surcharge*`FY18 Amount`,
`FY19 Surcharge Amount` = surcharge*`FY19 Amount`,
`FY20 Surcharge Amount` = surcharge*`FY20 Amount`,
`FY21 Surcharge Amount` = surcharge*`FY21 Amount`
#and so on to FY32
)
data.summary <- data%>%
summarise(`FY16 Total` = sum(`FY16 Amount`, na.rm = T),
`FY17 Total` = sum(`FY17 Amount`, na.rm = T),
`FY18 Total` = sum(`FY18 Amount`, na.rm = T),
`FY19 Total` = sum(`FY19 Amount`, na.rm = T),
`FY20 Total` = sum(`FY20 Amount`, na.rm = T),
`FY21 Total` = sum(`FY21 Amount`, na.rm = T),
# and so on to FY32
`FY16 Surcharge Total` = sum(`FY16 Surcharge Amount`, na.rm = T),
`FY17 Surcharge Total` = sum(`FY17 Surcharge Amount`, na.rm = T),
`FY18 Surcharge Total` = sum(`FY18 Surcharge Amount`, na.rm = T),
`FY19 Surcharge Total` = sum(`FY19 Surcharge Amount`, na.rm = T),
`FY20 Surcharge Total` = sum(`FY20 Surcharge Amount`, na.rm = T),
`FY21 Surcharge Total` = sum(`FY21 Surcharge Amount`, na.rm = T),
#and so on to FY32
)
It's pretty straightforward and works fine, but it's incredibly tedious to code! What's worse is that my method has garnered attention and will likely be deployed across all regions and divisions. Great news, right? Not when I have to code every fiscal year separately in both the preparation and the summarization. Is my life to be reduced to copying and pasting and changing two digits? Please say it isn't. How can I do this more efficiently? I welcome all suggestions and thank you in advance.
I suggest thinking about this in a "long data" way instead of a "wide data" way: it is more dynamic, much shorter code, and allows for easily changing to fewer or more fiscal years without much effort.
First, these are the fiscal years I think we're interested in covering:
FYs <- tibble(FYstart = as.Date("2015-10-01") %m+% years(0:15)) %>%
mutate(
FYend = FYstart %m+% years(1) - 1,
FY = paste0("FY", substr(year(FYend), 3, 4))
)
From there, we'll start with your initial four columns, then join on FYs
and filter/mutate, then summarize.
data <-
tibble(startDates = as.POSIXct(c("2018-04-01", "2025-09-17", "2021-07-21", "2026-11-17")),
years = c(5,3,3,2),
amounts = c(350000, 1200000, 7500000, 130000),
surcharge = c(0.035, 0.04, 0.375, 0.05)) %>%
mutate(endDates = startDates %m+% years(years)) %>%
left_join(FYs, join_by(overlaps(startDates, endDates, FYstart, FYend))) |> # thanks @JonSpring for this recommendation
mutate(
FYstart = pmax(startDates, FYstart),
FYend = pmin(endDates, FYend),
`Days in Contract` = as.numeric(endDates - startDates, units = "days"),
`Days in FY` = as.numeric(FYend - FYstart, units = "days"),
`FY Amount` = amounts * `Days in FY` / `Days in Contract`,
`FY Surcharge Amount` = surcharge * `FY Amount`
)
data
# # A tibble: 17 × 12
# startDates years amounts surcharge endDates FYstart FYend FY `Days in Contract` `Days in FY` `FY Amount` `FY Surcharge Amount`
# <dttm> <dbl> <dbl> <dbl> <dttm> <dttm> <dttm> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2018-04-01 00:00:00 2018-09-29 20:00:00 FY18 1826 182. 34853. 1220.
# 2 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2018-09-30 20:00:00 2019-09-29 20:00:00 FY19 1826 364 69770. 2442.
# 3 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2019-09-30 20:00:00 2020-09-29 20:00:00 FY20 1826 365 69962. 2449.
# 4 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2020-09-30 20:00:00 2021-09-29 20:00:00 FY21 1826 364 69770. 2442.
# 5 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2021-09-30 20:00:00 2022-09-29 20:00:00 FY22 1826 364 69770. 2442.
# 6 2018-04-01 00:00:00 5 350000 0.035 2023-04-01 00:00:00 2022-09-30 20:00:00 2023-04-01 00:00:00 FY23 1826 182. 34917. 1222.
# 7 2025-09-17 00:00:00 3 1200000 0.04 2028-09-17 00:00:00 2025-09-17 00:00:00 2025-09-29 20:00:00 FY25 1096 12.8 14051. 562.
# 8 2025-09-17 00:00:00 3 1200000 0.04 2028-09-17 00:00:00 2025-09-30 20:00:00 2026-09-29 20:00:00 FY26 1096 364 398540. 15942.
# 9 2025-09-17 00:00:00 3 1200000 0.04 2028-09-17 00:00:00 2026-09-30 20:00:00 2027-09-29 20:00:00 FY27 1096 364 398540. 15942.
# 10 2025-09-17 00:00:00 3 1200000 0.04 2028-09-17 00:00:00 2027-09-30 20:00:00 2028-09-17 00:00:00 FY28 1096 352. 385584. 15423.
# 11 2021-07-21 00:00:00 3 7500000 0.375 2024-07-21 00:00:00 2021-07-21 00:00:00 2021-09-29 20:00:00 FY21 1096 70.8 484717. 181769.
# 12 2021-07-21 00:00:00 3 7500000 0.375 2024-07-21 00:00:00 2021-09-30 20:00:00 2022-09-29 20:00:00 FY22 1096 364 2490876. 934078.
# 13 2021-07-21 00:00:00 3 7500000 0.375 2024-07-21 00:00:00 2022-09-30 20:00:00 2023-09-29 20:00:00 FY23 1096 364 2490876. 934078.
# 14 2021-07-21 00:00:00 3 7500000 0.375 2024-07-21 00:00:00 2023-09-30 20:00:00 2024-07-21 00:00:00 FY24 1096 294. 2013002. 754876.
# 15 2026-11-17 00:00:00 2 130000 0.05 2028-11-17 00:00:00 2026-11-17 00:00:00 2027-09-29 20:00:00 FY27 731 317. 56338. 2817.
# 16 2026-11-17 00:00:00 2 130000 0.05 2028-11-17 00:00:00 2027-09-30 20:00:00 2028-09-29 20:00:00 FY28 731 365 64911. 3246.
# 17 2026-11-17 00:00:00 2 130000 0.05 2028-11-17 00:00:00 2028-09-30 20:00:00 2028-11-17 00:00:00 FY29 731 47.2 8395. 420.
The summarizing is really direct, too:
data.summary <- data %>%
summarize(
.by = FY,
across(c(`FY Amount`, `FY Surcharge Amount`), ~ sum(.x, na.rm = TRUE),
.names = "{.col} Total")
)
data.summary
# # A tibble: 12 × 3
# FY `FY Amount Total` `FY Surcharge Amount Total`
# <chr> <dbl> <dbl>
# 1 FY18 34853. 1220.
# 2 FY19 69770. 2442.
# 3 FY20 69962. 2449.
# 4 FY21 554487. 184211.
# 5 FY22 2560646. 936520.
# 6 FY23 2525793. 935301.
# 7 FY25 14051. 562.
# 8 FY26 398540. 15942.
# 9 FY27 454878. 18758.
# 10 FY28 450495. 18669.
# 11 FY24 2013002. 754876.
# 12 FY29 8395. 420.