rdplyr

List all months between two dates in R


I'm trying to add the list of month and year between two dates.

I have the following dataset:

PKID    Name    Gender  DateStart   DateEnd
68      PAUL    1       24/11/2021  23/02/2022
68      PAUL    1       24/04/2022  23/06/2023
40      KATE    2       01/01/2000  14/03/2000
40      KATE    2       03/12/2000  31/01/2001

And I Want to create the following dataset:

PKID    Name    Gender  DateStart   DateEnd     year    Month
68      PAUL    1       24/11/2021  23/02/2022  2021    11
68      PAUL    1       24/11/2021  23/02/2022  2021    12
68      PAUL    1       24/11/2021  23/02/2022  2022    1
68      PAUL    1       24/11/2021  23/02/2022  2022    2
68      PAUL    1       24/04/2022  23/06/2023  2022    4
68      PAUL    1       24/04/2022  23/06/2023  2022    5
68      PAUL    1       24/04/2022  23/06/2023  2022    6
40      KATE    2       01/01/2000  14/03/2000  2000    1
40      KATE    2       01/01/2000  14/03/2000  2000    2
40      KATE    2       01/01/2000  14/03/2000  2000    3
40      KATE    2       03/12/2000  31/01/2001  2000    12
40      KATE    2       03/12/2000  31/01/2001  2001    1

Where month corresponds to the month between the star date and the end date and year correspond to the month.

I have tried the following:

# Load necessary libraries
library(dplyr)
library(tidyr)
library(lubridate) # For handling date operations

# Sample data 
df <- read.table(text = "
PKID    Name    Gender  DateStart   DateEnd
68  PAUL    1   24/11/2021  23/02/2022
68  PAUL    1   24/04/2022  23/06/2023
40  KATE    2   01/01/2000  14/03/2000
40  KATE    2   03/12/2000  31/01/2001
", header = TRUE, stringsAsFactors = FALSE)

# Convert date columns to Date format
df$DateStart <- dmy(df$DateStart)
df$DateEnd <- dmy(df$DateEnd)

# Generate sequence of dates for each row
df <- df %>%
  group_by(PKID, Name, Gender, DateStart, DateEnd) %>%
  complete(Date = seq.Date(DateStart, DateEnd, by = "month")) %>%
  ungroup() %>%
  mutate(
    year = year(Date),   # Extract year
    Month = month(Date)  # Extract month
  ) %>%
  select(-Date)  # Remove the temporary Date column

# Print the result
print(df)

But I get the following error:

Error in reframe():
ℹ In argument: complete(data = pick(everything()), ..., fill = fill, explicit = explicit).
ℹ In group 1: PKID = 40, Name = "KATE", Gender = 2, DateStart = 2000-01-01, DateEnd = 2000-03-14.
Caused by error:
! object 'DateStart' not found

Solution

  • You can try:

    library(dplyr)
    library(purrr)
    library(tidyr)
    library(lubridate)
    
    df |> 
      mutate(date = map2(DateStart,  rollforward(DateEnd), \(x, y) seq(x, y, by = "month"))) |> 
      unnest(date) |> 
      mutate(year = year(date),
             month = month(date),
             date = NULL)
    
    # A tibble: 26 × 7
        PKID Name  Gender DateStart  DateEnd     year month
       <int> <chr>  <int> <date>     <date>     <dbl> <dbl>
     1    68 PAUL       1 2021-11-24 2022-02-23  2021    11
     2    68 PAUL       1 2021-11-24 2022-02-23  2021    12
     3    68 PAUL       1 2021-11-24 2022-02-23  2022     1
     4    68 PAUL       1 2021-11-24 2022-02-23  2022     2
     5    68 PAUL       1 2022-04-24 2023-06-23  2022     4
     6    68 PAUL       1 2022-04-24 2023-06-23  2022     5
     7    68 PAUL       1 2022-04-24 2023-06-23  2022     6
     8    68 PAUL       1 2022-04-24 2023-06-23  2022     7
     9    68 PAUL       1 2022-04-24 2023-06-23  2022     8
    10    68 PAUL       1 2022-04-24 2023-06-23  2022     9
    # ℹ 16 more rows
    # ℹ Use `print(n = ...)` to see more rows
    

    Or alternatively with reframe()

    df |> 
      reframe(date = seq(DateStart, rollforward(DateEnd), by = "month"),
              year = year(date),
              month = month(date),
              .by = everything()) |> 
      select(-date)