rtidyversetsibble

Replacing observations to NA from tsibble based on date range


I have this following code:

library(tidyverse)
library(tsibble)

time <- c("2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
          "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
          "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
          "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06")
state <- c(rep("CA", 6), rep("PA", 6), rep("NY", 6), rep("WI", 6))
values <- rnorm(24)

dataf <- data.frame(time, state, values)

dataf <- dataf %>%
  mutate(time = yearmonth(time)) %>%
  as.data.frame()

dataf_tsible <- as_tsibble(dataf, index = time, key = state)

Now, I want to remove the values in the variable values from March to May 2020.

This works:

dataf_tsible$values[dataf_tsible$time== yearmonth("2020 Mar")] <- NA
dataf_tsible$values[dataf_tsible$time== yearmonth("2020 Apr")] <- NA
dataf_tsible$values[dataf_tsible$time== yearmonth("2020 May")] <- NA

But is there any better and more efficient way to do that without changing the time class?


Solution

  • You could carryout a mutate to replace selected values with NA prior to creating the tsibble.

    library(tidyverse)
    library(tsibble)
    
    set.seed(42)
    
    time <- c("2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
              "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
              "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06",
              "2020 01", "2020 02", "2020 03", "2020 04", "2020 05", "2020 06")
    state <- c(rep("CA", 6), rep("PA", 6), rep("NY", 6), rep("WI", 6))
    values <- rnorm(24)
    
    dataf <- 
      data.frame(time, state, values) |> 
      mutate(time = yearmonth(time),
             values = ifelse(between(time, yearmonth("2020 Mar"), yearmonth("2020 May")), NA_real_, values))
    
    dataf_tsible <- 
      as_tsibble(dataf, index = time, key = state)
    
    dataf_tsible  
    #> # A tsibble: 24 x 3 [1M]
    #> # Key:       state [4]
    #>        time state values
    #>       <mth> <chr>  <dbl>
    #>  1 2020 Jan CA     1.37 
    #>  2 2020 Feb CA    -0.565
    #>  3 2020 Mar CA    NA    
    #>  4 2020 Apr CA    NA    
    #>  5 2020 May CA    NA    
    #>  6 2020 Jun CA    -0.106
    #>  7 2020 Jan NY    -1.39 
    #>  8 2020 Feb NY    -0.279
    #>  9 2020 Mar NY    NA    
    #> 10 2020 Apr NY    NA    
    #> # ℹ 14 more rows
    

    Created on 2023-10-23 with reprex v2.0.2