rpadr

padding for weeks outside of the dates in the original data


How can I pad for weeks outside of the dates in the original data?

library(tidyverse)
df <- data.frame(x=c("2019-01-02", 
                     "2019-01-02",
                     #"2019-01-03",
                     "2019-01-04",
                     "2019-01-04",
                     "2019-01-04",
                     "2019-01-09",
                     "2019-01-19"),
                 y=c(1, 0, 1, 1, 0, 1, 0))

# aggregate by week
df %>% 
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 week")) 

# # A tibble: 7 x 3
# # Groups:   date [3]
#     x              y date      
#     <date>     <dbl> <date>    
#   1 2019-01-02     1 2018-12-30
#   2 2019-01-02     0 2018-12-30
#   3 2019-01-04     1 2018-12-30
#   4 2019-01-04     1 2018-12-30
#   5 2019-01-04     0 2018-12-30
#   6 2019-01-09     1 2019-01-06
#   7 2019-01-19     0 2019-01-13

df %>%
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 week")) %>%
  count(name = "count") %>%
  ungroup() %>%
  padr::pad(interval = "week",
            start_val = lubridate::ymd("2019-01-01"),
            end_val = lubridate::ymd("2019-02-20")) %>% 
  replace(is.na(.), 0) 

# Error: The specified interval is invalid for the datetime variable.

It works for day intervals:

df %>% 
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 day")) %>%
  count(name = "count") %>%
  ungroup() %>%
  padr::pad(interval = "day",
            start_val = lubridate::ymd("2019-01-01"),
            end_val = lubridate::ymd("2019-02-20")) %>% 
  replace(is.na(.), 0) 

Solution

  • The issue is the mismatch between the dates given by floor_date() (e.g. 2018/12/30) and the period that you give to padr::pad() (e.g. 2019/01/01 to 2019/02/20).

    If you change them to be the same then it works.

    For example:

    
        library(lubridate)
    
        Start_val <- ymd("2019-01-01")
        End_val <- ymd("2019-02-20")
    
        Adjust_days <- Start_val - floor_date(Start_val, "1 week")
    
        # Example: Changing floor_date dates
    
        df %>%
          mutate(x = ymd(as.character(x))) %>%
          group_by(date = floor_date(x, "1 week") + Adjust_days) %>%
          count(name = "count") %>%
          ungroup() %>%
          padr::pad(interval = "1 week",
                    start_val = Start_val,
                    end_val = End_val) %>% 
          replace(is.na(.), 0)
    
        #> # A tibble: 8 x 3
        #>   date       name      n
        #>   <date>     <chr> <dbl>
        #> 1 2019-01-01 count     5
        #> 2 2019-01-08 count     1
        #> 3 2019-01-15 count     1
        #> 4 2019-01-22 0         0
        #> 5 2019-01-29 0         0
        #> 6 2019-02-05 0         0
        #> 7 2019-02-12 0         0
        #> 8 2019-02-19 0         0
    
    
        # Example: Changing padr dates
    
        df %>%
          mutate(x = ymd(as.character(x))) %>%
          group_by(date = floor_date(x, "1 week")) %>%
          count(name = "count") %>%
          ungroup() %>%
          padr::pad(interval = "1 week",
                    start_val = floor_date(Start_val, "1 week"),
                    end_val = floor_date(End_val, "1 week")) %>% 
          replace(is.na(.), 0)
    
        #> # A tibble: 8 x 3
        #>   date       name      n
        #>   <date>     <chr> <dbl>
        #> 1 2018-12-30 count     5
        #> 2 2019-01-06 count     1
        #> 3 2019-01-13 count     1
        #> 4 2019-01-20 0         0
        #> 5 2019-01-27 0         0
        #> 6 2019-02-03 0         0
        #> 7 2019-02-10 0         0
        #> 8 2019-02-17 0         0