I have a function that works for yearly time series forecast; now, I want to make a similar function with a twist on the existing function to forecast monthly time series data.
The Existing Function
library(forecast)
library(zoo)
set.seed(289805)
y <- arima.sim(n = 10, model = list(ar = 0.8, order = c(1, 0, 0)), sd = 1)
y <- ts(y, frequency = 1, start = c(1981))
#-- Extract Training Data, Fit the Wrong Model, and Forecast
yt <- window(y, end = 1990)
#yfit <- Arima(yt,order = c(3, 1, 4))
yfit <- forecast::auto.arima(yt)
yfor <- forecast(yfit)
#---Extract the Data for ggplot using funggcast()
funggcast <- function(dn, fcast){
en <- max(time(fcast$mean)) # Extract the max date used in the forecast
# Extract Source and Training Data
ds <- as.data.frame(window(dn, end = en))
names(ds) <- 'observed'
ds$Year <- as.Date(time(window(dn, end = en)))
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$Year <- as.Date(time(fcast$fitted))
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$Year <- as.Date(paste(row.names(dfcastn), "01", "01", sep = "-"))
names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'Year')
pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
return(pd)
}
pd <- funggcast(y ,yfor)
pd
# Year observed fitted forecast lo80 hi80 lo95 hi95
#1 1981-01-01 -0.2505125 -0.1002050 NA NA NA NA NA
#2 1982-01-01 0.8855338 -0.2004100 NA NA NA NA NA
#3 1983-01-01 0.1975730 0.7084271 NA NA NA NA NA
#4 1984-01-01 1.5355101 0.1580584 NA NA NA NA NA
#5 1985-01-01 4.6956397 1.2284081 NA NA NA NA NA
#6 1986-01-01 4.4414254 3.7565120 NA NA NA NA NA
#7 1987-01-01 3.0230805 3.5531406 NA NA NA NA NA
#8 1988-01-01 1.8058732 2.4184646 NA NA NA NA NA
#9 1989-01-01 1.1661036 1.4446987 NA NA NA NA NA
#10 1990-01-01 1.1631486 0.9328829 NA NA NA NA NA
#11 1991-01-01 NA NA 0.9305189 -0.8119452 2.672983 -1.734350 3.595388
#12 1992-01-01 NA NA 0.7444152 -1.4870278 2.975858 -2.668282 4.157113
#13 1993-01-01 NA NA 0.5955322 -1.8990535 3.090118 -3.219607 4.410672
#14 1994-01-01 NA NA 0.4764258 -2.1728901 3.125742 -3.575353 4.528205
#15 1995-01-01 NA NA 0.3811406 -2.3626271 3.124908 -3.815090 4.577371
#16 1996-01-01 NA NA 0.3049125 -2.4976344 3.107459 -3.981213 4.591038
#17 1997-01-01 NA NA 0.2439300 -2.5955971 3.083457 -4.098752 4.586612
#18 1998-01-01 NA NA 0.1951440 -2.6677997 3.058088 -4.183351 4.573639
#19 1999-01-01 NA NA 0.1561152 -2.7217152 3.033946 -4.245147 4.557377
#20 2000-01-01 NA NA 0.1248922 -2.7624254 3.012210 -4.290879 4.540663
What I Tried
library(forecast)
library(zoo)
set.seed(289805)
y <- arima.sim(n = 10, model = list(ar = 0.8, order = c(1, 0, 0)), sd = 1)
y <- ts(y, frequency = 12, start = c(1981, 1))
#-- Extract Training Data, Fit the Wrong Model, and Forecast
yt <- window(y, end = c(1981, 10))
#yfit <- Arima(yt,order = c(3, 1, 4))
yfit <- forecast::auto.arima(yt)
yfor <- forecast(yfit)
#---Extract the Data for ggplot using funggcast()
funggcast <- function(dn, fcast){
en <- max(time(fcast$mean)) # Extract the max date used in the forecast
# Extract Source and Training Data
ds <- as.data.frame(window(dn, end = en))
names(ds) <- 'observed'
ds$Year <- as.Date(time(window(dn, end = en)))
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$Year <- as.Date(time(fcast$fitted))
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$Year <- as.Date(paste(row.names(dfcastn), "01", "01", sep = "-"))
names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'Year')
pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
return(pd)
}
pd <- funggcast(y ,yfor)
pd
I Got this Error Message:
Error in charToDate(x) : character string is not in a standard unambiguous format In addition: Warning messages: 1: In window.default(x, ...) : 'end' value not changed 2: In window.default(x, ...) : 'end' value not changed
What I Want
Instead of the Year
column, I need Month/Year
I want something of this data frame
where the time series data provided is monthly from January 1 1981, through February 1, 1982, to October 1, 1981. the out-of-sample forecast will be a monthly time series forecast from November 1 1981, through December 1 1981, to August 1 1982, as demonstrated below:
# Year/Month observed fitted forecast lo80 hi80 lo95 hi95
#1 1981-01-01 -0.2505125 -0.1002050 NA NA NA NA NA
#2 1981-02-01 0.8855338 -0.2004100 NA NA NA NA NA
#3 1981-03-01 0.1975730 0.7084271 NA NA NA NA NA
#4 1981-04-01 1.5355101 0.1580584 NA NA NA NA NA
#5 1981-05-01 4.6956397 1.2284081 NA NA NA NA NA
#6 1981-06-01 4.4414254 3.7565120 NA NA NA NA NA
#7 1981-07-01 3.0230805 3.5531406 NA NA NA NA NA
#8 1981-08-01 1.8058732 2.4184646 NA NA NA NA NA
#9 1981-09-01 1.1661036 1.4446987 NA NA NA NA NA
#10 1991-10-01 1.1631486 0.9328829 NA NA NA NA NA
#11 1981-11-01 NA NA 0.9305189 -0.8119452 2.672983 -1.734350 3.595388
#12 1981-12-01 NA NA 0.7444152 -1.4870278 2.975858 -2.668282 4.157113
#13 1982-01-01 NA NA 0.5955322 -1.8990535 3.090118 -3.219607 4.410672
#14 1982-02-01 NA NA 0.4764258 -2.1728901 3.125742 -3.575353 4.528205
#15 1982-03-01 NA NA 0.3811406 -2.3626271 3.124908 -3.815090 4.577371
#16 1982-04-01 NA NA 0.3049125 -2.4976344 3.107459 -3.981213 4.591038
#17 1982-05-01 NA NA 0.2439300 -2.5955971 3.083457 -4.098752 4.586612
#18 1982-06-01 NA NA 0.1951440 -2.6677997 3.058088 -4.183351 4.573639
#19 1982-07-01 NA NA 0.1561152 -2.7217152 3.033946 -4.245147 4.557377
#20 1982-08-01 NA NA 0.1248922 -2.7624254 3.012210 -4.290879 4.540663
The issue is that the row names of dfcastn
already include the abbreviation of the month name. To account for that you could use e.g. as.Date(paste("01", row.names(dfcastn)), format = "%d %B %Y")
to convert to proper dates.
Note: As I'm running on a German locale I had to temporarily switch to an English locale to convert the english abbreviations of the month names to proper dates. If that is not the case the case for you, you could drop the respective code lines.
library(forecast)
library(zoo)
funggcast <- function(dn, fcast) {
en <- max(time(fcast$mean)) # Extract the max date used in the forecast
# Extract Source and Training Data
ds <- as.data.frame(window(dn, end = en))
names(ds) <- "observed"
ds$Year <- as.Date(time(window(dn, end = en)))
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$Year <- as.Date(time(fcast$fitted))
names(dfit)[1] <- "fitted"
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
# Switch to english locale
old <- Sys.getlocale(category = "LC_TIME")
Sys.setlocale(category = "LC_TIME", locale = "en_US.UTF-8")
dfcastn$Year <- as.Date(paste("01", row.names(dfcastn)), format = "%d %B %Y")
# Restore old locale
Sys.setlocale(category = "LC_TIME", locale = old)
names(dfcastn) <- c("forecast", "lo80", "hi80", "lo95", "hi95", "Year")
merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
}
pd <- funggcast(y, yfor)
#> Warning in window.default(x, ...): 'end' value not changed
#> Warning in window.default(x, ...): 'end' value not changed
pd
#> Year observed fitted forecast lo80 hi80 lo95
#> 1 1981-01-01 -0.2505125 -0.1002050 NA NA NA NA
#> 2 1981-02-01 0.8855338 -0.2004100 NA NA NA NA
#> 3 1981-03-01 0.1975730 0.7084271 NA NA NA NA
#> 4 1981-04-01 1.5355101 0.1580584 NA NA NA NA
#> 5 1981-05-01 4.6956397 1.2284081 NA NA NA NA
#> 6 1981-06-01 4.4414254 3.7565120 NA NA NA NA
#> 7 1981-07-01 3.0230805 3.5531406 NA NA NA NA
#> 8 1981-08-01 1.8058732 2.4184646 NA NA NA NA
#> 9 1981-09-01 1.1661036 1.4446987 NA NA NA NA
#> 10 1981-10-01 1.1631486 0.9328829 NA NA NA NA
#> 11 1981-11-01 NA NA 0.930518905 -0.8119452 2.672983 -1.734350
#> 12 1981-12-01 NA NA 0.744415169 -1.4870278 2.975858 -2.668282
#> 13 1982-01-01 NA NA 0.595532171 -1.8990535 3.090118 -3.219607
#> 14 1982-02-01 NA NA 0.476425765 -2.1728901 3.125742 -3.575353
#> 15 1982-03-01 NA NA 0.381140635 -2.3626271 3.124908 -3.815090
#> 16 1982-04-01 NA NA 0.304912527 -2.4976344 3.107459 -3.981213
#> 17 1982-05-01 NA NA 0.243930036 -2.5955971 3.083457 -4.098752
#> 18 1982-06-01 NA NA 0.195144041 -2.6677997 3.058088 -4.183351
#> 19 1982-07-01 NA NA 0.156115242 -2.7217152 3.033946 -4.245147
#> 20 1982-08-01 NA NA 0.124892201 -2.7624254 3.012210 -4.290879
#> 21 1982-09-01 NA NA 0.099913767 -2.7934593 2.993287 -4.325119
#> 22 1982-10-01 NA NA 0.079931018 -2.8173109 2.977173 -4.351018
#> 23 1982-11-01 NA NA 0.063944818 -2.8357705 2.963660 -4.370787
#> 24 1982-12-01 NA NA 0.051155858 -2.8501413 2.952453 -4.385995
#> 25 1983-01-01 NA NA 0.040924689 -2.8613844 2.943234 -4.397774
#> 26 1983-02-01 NA NA 0.032739753 -2.8702168 2.935696 -4.406949
#> 27 1983-03-01 NA NA 0.026191804 -2.8771790 2.929563 -4.414131
#> 28 1983-04-01 NA NA 0.020953444 -2.8826825 2.924589 -4.419775
#> 29 1983-05-01 NA NA 0.016762757 -2.8870429 2.920568 -4.424225
#> 30 1983-06-01 NA NA 0.013410206 -2.8905040 2.917324 -4.427743
#> 31 1983-07-01 NA NA 0.010728165 -2.8932555 2.914712 -4.430532
#> 32 1983-08-01 NA NA 0.008582533 -2.8954456 2.912611 -4.432745
#> 33 1983-09-01 NA NA 0.006866027 -2.8971906 2.910923 -4.434505
#> 34 1983-10-01 NA NA 0.005492822 -2.8985820 2.909568 -4.435906
#> hi95
#> 1 NA
#> 2 NA
#> 3 NA
#> 4 NA
#> 5 NA
#> 6 NA
#> 7 NA
#> 8 NA
#> 9 NA
#> 10 NA
#> 11 3.595388
#> 12 4.157113
#> 13 4.410672
#> 14 4.528205
#> 15 4.577371
#> 16 4.591038
#> 17 4.586612
#> 18 4.573639
#> 19 4.557377
#> 20 4.540663
#> 21 4.524946
#> 22 4.510880
#> 23 4.498677
#> 24 4.488307
#> 25 4.479623
#> 26 4.472429
#> 27 4.466514
#> 28 4.461681
#> 29 4.457750
#> 30 4.454564
#> 31 4.451988
#> 32 4.449910
#> 33 4.448237
#> 34 4.446892