rdataframetime-seriesforecastingforecast

R: Turn Yearly Forecast Function to Monthly Equivalent for Time Series Data Analysis


I have a function that works for yearly time series forecast; now, I want to make a similar function with a twist on the existing function to forecast monthly time series data.

The Existing Function

library(forecast)
library(zoo)

set.seed(289805)
y <- arima.sim(n = 10, model = list(ar = 0.8, order = c(1, 0, 0)), sd = 1)
y <- ts(y, frequency = 1, start = c(1981))


#-- Extract Training Data, Fit the Wrong Model, and Forecast
yt <- window(y, end = 1990)

#yfit <- Arima(yt,order = c(3, 1, 4))
yfit <- forecast::auto.arima(yt)

yfor <- forecast(yfit)

#---Extract the Data for ggplot using funggcast()
funggcast <- function(dn, fcast){

  en <- max(time(fcast$mean)) # Extract the max date used in the forecast

  # Extract Source and Training Data
  ds <- as.data.frame(window(dn, end = en))
  names(ds) <- 'observed'
  ds$Year <- as.Date(time(window(dn, end = en)))

  # Extract the Fitted Values (need to figure out how to grab confidence intervals)
  dfit <- as.data.frame(fcast$fitted)
  dfit$Year <- as.Date(time(fcast$fitted))
  names(dfit)[1] <- 'fitted'

  ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data

  # Extract the Forecast values and confidence intervals
  dfcastn <- as.data.frame(fcast)
  dfcastn$Year <- as.Date(paste(row.names(dfcastn), "01", "01", sep = "-"))
  names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'Year')

  pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
  return(pd)

}

pd <- funggcast(y ,yfor)

pd
#         Year   observed     fitted  forecast       lo80     hi80      lo95     hi95
#1  1981-01-01 -0.2505125 -0.1002050        NA         NA       NA        NA       NA
#2  1982-01-01  0.8855338 -0.2004100        NA         NA       NA        NA       NA
#3  1983-01-01  0.1975730  0.7084271        NA         NA       NA        NA       NA
#4  1984-01-01  1.5355101  0.1580584        NA         NA       NA        NA       NA
#5  1985-01-01  4.6956397  1.2284081        NA         NA       NA        NA       NA
#6  1986-01-01  4.4414254  3.7565120        NA         NA       NA        NA       NA
#7  1987-01-01  3.0230805  3.5531406        NA         NA       NA        NA       NA
#8  1988-01-01  1.8058732  2.4184646        NA         NA       NA        NA       NA
#9  1989-01-01  1.1661036  1.4446987        NA         NA       NA        NA       NA
#10 1990-01-01  1.1631486  0.9328829        NA         NA       NA        NA       NA
#11 1991-01-01         NA         NA 0.9305189 -0.8119452 2.672983 -1.734350 3.595388
#12 1992-01-01         NA         NA 0.7444152 -1.4870278 2.975858 -2.668282 4.157113
#13 1993-01-01         NA         NA 0.5955322 -1.8990535 3.090118 -3.219607 4.410672
#14 1994-01-01         NA         NA 0.4764258 -2.1728901 3.125742 -3.575353 4.528205
#15 1995-01-01         NA         NA 0.3811406 -2.3626271 3.124908 -3.815090 4.577371
#16 1996-01-01         NA         NA 0.3049125 -2.4976344 3.107459 -3.981213 4.591038
#17 1997-01-01         NA         NA 0.2439300 -2.5955971 3.083457 -4.098752 4.586612
#18 1998-01-01         NA         NA 0.1951440 -2.6677997 3.058088 -4.183351 4.573639
#19 1999-01-01         NA         NA 0.1561152 -2.7217152 3.033946 -4.245147 4.557377
#20 2000-01-01         NA         NA 0.1248922 -2.7624254 3.012210 -4.290879 4.540663

What I Tried

library(forecast)
library(zoo)

set.seed(289805)
y <- arima.sim(n = 10, model = list(ar = 0.8, order = c(1, 0, 0)), sd = 1)
y <- ts(y, frequency = 12, start = c(1981, 1))


#-- Extract Training Data, Fit the Wrong Model, and Forecast
yt <- window(y, end = c(1981, 10))

#yfit <- Arima(yt,order = c(3, 1, 4))
yfit <- forecast::auto.arima(yt)

yfor <- forecast(yfit)

#---Extract the Data for ggplot using funggcast()
funggcast <- function(dn, fcast){

  en <- max(time(fcast$mean)) # Extract the max date used in the forecast

  # Extract Source and Training Data
  ds <- as.data.frame(window(dn, end = en))
  names(ds) <- 'observed'
  ds$Year <- as.Date(time(window(dn, end = en)))

  # Extract the Fitted Values (need to figure out how to grab confidence intervals)
  dfit <- as.data.frame(fcast$fitted)
  dfit$Year <- as.Date(time(fcast$fitted))
  names(dfit)[1] <- 'fitted'

  ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data

  # Extract the Forecast values and confidence intervals
  dfcastn <- as.data.frame(fcast)
  dfcastn$Year <- as.Date(paste(row.names(dfcastn), "01", "01", sep = "-"))
  names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'Year')

  pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
  return(pd)

}

pd <- funggcast(y ,yfor)

pd

I Got this Error Message:

Error in charToDate(x) : character string is not in a standard unambiguous format In addition: Warning messages: 1: In window.default(x, ...) : 'end' value not changed 2: In window.default(x, ...) : 'end' value not changed

What I Want

Instead of the Year column, I need Month/Year

I want something of this data frame where the time series data provided is monthly from January 1 1981, through February 1, 1982, to October 1, 1981. the out-of-sample forecast will be a monthly time series forecast from November 1 1981, through December 1 1981, to August 1 1982, as demonstrated below:

#    Year/Month   observed     fitted  forecast       lo80     hi80      lo95     hi95
#1  1981-01-01 -0.2505125 -0.1002050        NA         NA       NA        NA       NA
#2  1981-02-01  0.8855338 -0.2004100        NA         NA       NA        NA       NA
#3  1981-03-01  0.1975730  0.7084271        NA         NA       NA        NA       NA
#4  1981-04-01  1.5355101  0.1580584        NA         NA       NA        NA       NA
#5  1981-05-01  4.6956397  1.2284081        NA         NA       NA        NA       NA
#6  1981-06-01  4.4414254  3.7565120        NA         NA       NA        NA       NA
#7  1981-07-01  3.0230805  3.5531406        NA         NA       NA        NA       NA
#8  1981-08-01  1.8058732  2.4184646        NA         NA       NA        NA       NA
#9  1981-09-01  1.1661036  1.4446987        NA         NA       NA        NA       NA
#10 1991-10-01  1.1631486  0.9328829        NA         NA       NA        NA       NA
#11 1981-11-01         NA         NA 0.9305189 -0.8119452 2.672983 -1.734350 3.595388
#12 1981-12-01         NA         NA 0.7444152 -1.4870278 2.975858 -2.668282 4.157113
#13 1982-01-01         NA         NA 0.5955322 -1.8990535 3.090118 -3.219607 4.410672
#14 1982-02-01         NA         NA 0.4764258 -2.1728901 3.125742 -3.575353 4.528205
#15 1982-03-01         NA         NA 0.3811406 -2.3626271 3.124908 -3.815090 4.577371
#16 1982-04-01         NA         NA 0.3049125 -2.4976344 3.107459 -3.981213 4.591038
#17 1982-05-01         NA         NA 0.2439300 -2.5955971 3.083457 -4.098752 4.586612
#18 1982-06-01         NA         NA 0.1951440 -2.6677997 3.058088 -4.183351 4.573639
#19 1982-07-01         NA         NA 0.1561152 -2.7217152 3.033946 -4.245147 4.557377
#20 1982-08-01         NA         NA 0.1248922 -2.7624254 3.012210 -4.290879 4.540663

Solution

  • The issue is that the row names of dfcastn already include the abbreviation of the month name. To account for that you could use e.g. as.Date(paste("01", row.names(dfcastn)), format = "%d %B %Y") to convert to proper dates.

    Note: As I'm running on a German locale I had to temporarily switch to an English locale to convert the english abbreviations of the month names to proper dates. If that is not the case the case for you, you could drop the respective code lines.

    library(forecast)
    library(zoo)
    
    funggcast <- function(dn, fcast) {
      en <- max(time(fcast$mean)) # Extract the max date used in the forecast
    
      # Extract Source and Training Data
      ds <- as.data.frame(window(dn, end = en))
      names(ds) <- "observed"
      ds$Year <- as.Date(time(window(dn, end = en)))
    
      # Extract the Fitted Values (need to figure out how to grab confidence intervals)
      dfit <- as.data.frame(fcast$fitted)
      dfit$Year <- as.Date(time(fcast$fitted))
      names(dfit)[1] <- "fitted"
    
      ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
    
      # Extract the Forecast values and confidence intervals
      dfcastn <- as.data.frame(fcast)
      
      # Switch to english locale
      old <- Sys.getlocale(category = "LC_TIME")
      Sys.setlocale(category = "LC_TIME", locale = "en_US.UTF-8")
      dfcastn$Year <- as.Date(paste("01", row.names(dfcastn)), format = "%d %B %Y")
      # Restore old locale
      Sys.setlocale(category = "LC_TIME", locale = old)
      
      names(dfcastn) <- c("forecast", "lo80", "hi80", "lo95", "hi95", "Year")
      
      
      merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
    }
    
    pd <- funggcast(y, yfor)
    #> Warning in window.default(x, ...): 'end' value not changed
    
    #> Warning in window.default(x, ...): 'end' value not changed
    
    pd
    #>          Year   observed     fitted    forecast       lo80     hi80      lo95
    #> 1  1981-01-01 -0.2505125 -0.1002050          NA         NA       NA        NA
    #> 2  1981-02-01  0.8855338 -0.2004100          NA         NA       NA        NA
    #> 3  1981-03-01  0.1975730  0.7084271          NA         NA       NA        NA
    #> 4  1981-04-01  1.5355101  0.1580584          NA         NA       NA        NA
    #> 5  1981-05-01  4.6956397  1.2284081          NA         NA       NA        NA
    #> 6  1981-06-01  4.4414254  3.7565120          NA         NA       NA        NA
    #> 7  1981-07-01  3.0230805  3.5531406          NA         NA       NA        NA
    #> 8  1981-08-01  1.8058732  2.4184646          NA         NA       NA        NA
    #> 9  1981-09-01  1.1661036  1.4446987          NA         NA       NA        NA
    #> 10 1981-10-01  1.1631486  0.9328829          NA         NA       NA        NA
    #> 11 1981-11-01         NA         NA 0.930518905 -0.8119452 2.672983 -1.734350
    #> 12 1981-12-01         NA         NA 0.744415169 -1.4870278 2.975858 -2.668282
    #> 13 1982-01-01         NA         NA 0.595532171 -1.8990535 3.090118 -3.219607
    #> 14 1982-02-01         NA         NA 0.476425765 -2.1728901 3.125742 -3.575353
    #> 15 1982-03-01         NA         NA 0.381140635 -2.3626271 3.124908 -3.815090
    #> 16 1982-04-01         NA         NA 0.304912527 -2.4976344 3.107459 -3.981213
    #> 17 1982-05-01         NA         NA 0.243930036 -2.5955971 3.083457 -4.098752
    #> 18 1982-06-01         NA         NA 0.195144041 -2.6677997 3.058088 -4.183351
    #> 19 1982-07-01         NA         NA 0.156115242 -2.7217152 3.033946 -4.245147
    #> 20 1982-08-01         NA         NA 0.124892201 -2.7624254 3.012210 -4.290879
    #> 21 1982-09-01         NA         NA 0.099913767 -2.7934593 2.993287 -4.325119
    #> 22 1982-10-01         NA         NA 0.079931018 -2.8173109 2.977173 -4.351018
    #> 23 1982-11-01         NA         NA 0.063944818 -2.8357705 2.963660 -4.370787
    #> 24 1982-12-01         NA         NA 0.051155858 -2.8501413 2.952453 -4.385995
    #> 25 1983-01-01         NA         NA 0.040924689 -2.8613844 2.943234 -4.397774
    #> 26 1983-02-01         NA         NA 0.032739753 -2.8702168 2.935696 -4.406949
    #> 27 1983-03-01         NA         NA 0.026191804 -2.8771790 2.929563 -4.414131
    #> 28 1983-04-01         NA         NA 0.020953444 -2.8826825 2.924589 -4.419775
    #> 29 1983-05-01         NA         NA 0.016762757 -2.8870429 2.920568 -4.424225
    #> 30 1983-06-01         NA         NA 0.013410206 -2.8905040 2.917324 -4.427743
    #> 31 1983-07-01         NA         NA 0.010728165 -2.8932555 2.914712 -4.430532
    #> 32 1983-08-01         NA         NA 0.008582533 -2.8954456 2.912611 -4.432745
    #> 33 1983-09-01         NA         NA 0.006866027 -2.8971906 2.910923 -4.434505
    #> 34 1983-10-01         NA         NA 0.005492822 -2.8985820 2.909568 -4.435906
    #>        hi95
    #> 1        NA
    #> 2        NA
    #> 3        NA
    #> 4        NA
    #> 5        NA
    #> 6        NA
    #> 7        NA
    #> 8        NA
    #> 9        NA
    #> 10       NA
    #> 11 3.595388
    #> 12 4.157113
    #> 13 4.410672
    #> 14 4.528205
    #> 15 4.577371
    #> 16 4.591038
    #> 17 4.586612
    #> 18 4.573639
    #> 19 4.557377
    #> 20 4.540663
    #> 21 4.524946
    #> 22 4.510880
    #> 23 4.498677
    #> 24 4.488307
    #> 25 4.479623
    #> 26 4.472429
    #> 27 4.466514
    #> 28 4.461681
    #> 29 4.457750
    #> 30 4.454564
    #> 31 4.451988
    #> 32 4.449910
    #> 33 4.448237
    #> 34 4.446892