I have converted a normal DF into a tsibble object and used that for my time-series forecasting. While fitting the model I experience the date format error- "Error in decimal_date.default(x) : date(s) not in POSIXt or Date format". As you could see from the below code- the converted tsibble object clearly identifies column "Week.1" as week date type. Could you please help me clarify why I'm still getting the date format when I fit forecast models to the tsibble object?
library(dplyr)
library(tsibble)
library(fpp3)
library(forecast)
library(fable)
Original.df<- structure(list(YearWeek = c("201901", "201902", "201903", "201904",
"201905", "201906", "201907", "201908", "201909", "201910", "201911",
"201912", "201913", "201914", "201915", "201916", "201917", "201918",
"201919", "201920", "201921", "201922", "201923", "201924", "201925",
"201926", "201927", "201928", "201929", "201930", "201931", "201932",
"201933", "201934", "201935", "201936", "201937", "201938", "201939",
"201940", "201941", "201942", "201943", "201944", "201945", "201946",
"201947", "201948", "201949", "201950", "201951", "201952", "202001",
"202002", "202003", "202004", "202005", "202006", "202007", "202008",
"202009", "202010", "202011", "202012", "202013", "202014", "202015",
"202016", "202017", "202018", "202019", "202020", "202021", "202022",
"202023", "202024", "202025", "202026", "202027", "202028", "202029",
"202030", "202031", "202032", "202033", "202034", "202035", "202036",
"202037", "202038", "202039", "202040", "202041", "202042", "202043",
"202044", "202045", "202046", "202047", "202048", "202049", "202050",
"202051", "202052", "202053", "202101", "202102", "202103", "202104",
"202105", "202106", "202107", "202108", "202109", "202110", "202111",
"202112", "202113", "202114", "202115", "202116", "202117", "202118",
"202119", "202120", "202121", "202122", "202123", "202124", "202125",
"202126", "202127", "202128", "202129", "202130", "202131", "202132",
"202133", "202134", "202135", "202136", "202137", "202138", "202139",
"202140", "202141", "202142", "202143"), Shipment = c(418, 1442,
1115, 1203, 1192, 1353, 1191, 1411, 933, 1384, 1362, 1353, 1739,
1751, 1595, 1380, 1711, 2058, 1843, 1602, 2195, 2159, 2009, 1812,
2195, 1763, 821, 1892, 1781, 2071, 1789, 1789, 1732, 1384, 1435,
1247, 1839, 2034, 1963, 1599, 1596, 1548, 1084, 1350, 1856, 1882,
1979, 1021, 1311, 2031, 1547, 591, 724, 1535, 1268, 1021, 1269,
1763, 1275, 1411, 1847, 1379, 1606, 1473, 1180, 926, 800, 840,
1375, 1755, 1902, 1921, 1743, 1275, 1425, 1088, 1416, 1168, 842,
1185, 1570, 1435, 1209, 1470, 1368, 1926, 1233, 1189, 1245, 1465,
1226, 887, 1489, 1369, 1358, 1179, 1200, 1226, 1066, 823, 1913,
2308, 1842, 910, 794, 1098, 1557, 1417, 1851, 1876, 1010, 160,
1803, 1607, 1185, 1347, 1700, 981, 1191, 1058, 1464, 1513, 1333,
1169, 1294, 978, 962, 1254, 987, 1290, 758, 436, 579, 636, 614,
906, 982, 649, 564, 502, 274, 473, 506, 902, 639, 810, 398, 488
), Production = c(0, 198, 1436, 1055, 1396, 1330, 1460, 1628,
1513, 1673, 1737, 1274, 1726, 1591, 2094, 1411, 2009, 1909, 1759,
1693, 1748, 1455, 2078, 1717, 1737, 1886, 862, 1382, 1779, 1423,
1460, 1454, 1347, 1409, 1203, 1235, 1397, 1563, 1411, 1455, 1706,
688, 1446, 1336, 1618, 1404, 1759, 746, 1560, 1665, 1317, 0,
441, 1390, 1392, 1180, 1477, 1265, 1485, 1495, 1543, 1584, 1575,
1609, 1233, 1420, 908, 1008, 1586, 1392, 1385, 1259, 1010, 973,
1053, 905, 1101, 1196, 891, 1033, 925, 889, 1136, 1058, 1179,
1047, 967, 900, 904, 986, 1014, 945, 1030, 1066, 1191, 1143,
1292, 574, 1174, 515, 1296, 1315, 1241, 0, 0, 1182, 1052, 1107,
1207, 1254, 1055, 258, 1471, 1344, 1353, 1265, 1444, 791, 1397,
1186, 1264, 1032, 949, 1059, 954, 798, 956, 1074, 1136, 1209,
975, 833, 994, 1127, 1153, 1202, 1234, 1336, 1484, 1515, 1151,
1175, 976, 1135, 1272, 869, 1900, 1173), Net.Production.Qty = c(22,
188, 1428, 1031, 1382, 1368, 1456, 1578, 1463, 1583, 1699, 1318,
1582, 1537, 2118, 1567, 1961, 1897, 1767, 1603, 1666, 1419, 2186,
1621, 1677, 1840, 698, 1290, 1411, 927, 1754, 1222, 1411, 1549,
1491, 1359, 1179, 1945, 1463, 1465, 1764, 764, 810, 1308, 1830,
1542, 1695, 544, 1482, 1673, 1659, 0, 445, 1358, 1364, 1224,
1417, 1239, 1387, 1595, 1469, 1624, 1643, 1763, 1217, 1456, 568,
1290, 1666, 1428, 1327, 773, 1118, 1231, 1143, 921, 1083, 1124,
935, 903, 937, 849, 1132, 1032, 1143, 1081, 891, 886, 880, 1002,
1072, 969, 1000, 996, 1243, 1183, 1306, 650, 1226, 553, 1306,
1379, 1359, 0, 0, 1182, 988, 1099, 1173, 1244, 1039, 254, 1425,
1318, 1385, 1221, 1364, 739, 1397, 1112, 1160, 924, 971, 1015,
978, 828, 868, 994, 1090, 1165, 783, 887, 934, 1023, 1045, 1114,
1052, 1186, 1456, 1401, 1249, 779, 430, 1625, 1498, 883, 1860,
1101), isoweek = c("2019-W01-1", "2019-W02-1", "2019-W03-1",
"2019-W04-1", "2019-W05-1", "2019-W06-1", "2019-W07-1", "2019-W08-1",
"2019-W09-1", "2019-W10-1", "2019-W11-1", "2019-W12-1", "2019-W13-1",
"2019-W14-1", "2019-W15-1", "2019-W16-1", "2019-W17-1", "2019-W18-1",
"2019-W19-1", "2019-W20-1", "2019-W21-1", "2019-W22-1", "2019-W23-1",
"2019-W24-1", "2019-W25-1", "2019-W26-1", "2019-W27-1", "2019-W28-1",
"2019-W29-1", "2019-W30-1", "2019-W31-1", "2019-W32-1", "2019-W33-1",
"2019-W34-1", "2019-W35-1", "2019-W36-1", "2019-W37-1", "2019-W38-1",
"2019-W39-1", "2019-W40-1", "2019-W41-1", "2019-W42-1", "2019-W43-1",
"2019-W44-1", "2019-W45-1", "2019-W46-1", "2019-W47-1", "2019-W48-1",
"2019-W49-1", "2019-W50-1", "2019-W51-1", "2019-W52-1", "2020-W01-1",
"2020-W02-1", "2020-W03-1", "2020-W04-1", "2020-W05-1", "2020-W06-1",
"2020-W07-1", "2020-W08-1", "2020-W09-1", "2020-W10-1", "2020-W11-1",
"2020-W12-1", "2020-W13-1", "2020-W14-1", "2020-W15-1", "2020-W16-1",
"2020-W17-1", "2020-W18-1", "2020-W19-1", "2020-W20-1", "2020-W21-1",
"2020-W22-1", "2020-W23-1", "2020-W24-1", "2020-W25-1", "2020-W26-1",
"2020-W27-1", "2020-W28-1", "2020-W29-1", "2020-W30-1", "2020-W31-1",
"2020-W32-1", "2020-W33-1", "2020-W34-1", "2020-W35-1", "2020-W36-1",
"2020-W37-1", "2020-W38-1", "2020-W39-1", "2020-W40-1", "2020-W41-1",
"2020-W42-1", "2020-W43-1", "2020-W44-1", "2020-W45-1", "2020-W46-1",
"2020-W47-1", "2020-W48-1", "2020-W49-1", "2020-W50-1", "2020-W51-1",
"2020-W52-1", "2020-W53-1", "2021-W01-1", "2021-W02-1", "2021-W03-1",
"2021-W04-1", "2021-W05-1", "2021-W06-1", "2021-W07-1", "2021-W08-1",
"2021-W09-1", "2021-W10-1", "2021-W11-1", "2021-W12-1", "2021-W13-1",
"2021-W14-1", "2021-W15-1", "2021-W16-1", "2021-W17-1", "2021-W18-1",
"2021-W19-1", "2021-W20-1", "2021-W21-1", "2021-W22-1", "2021-W23-1",
"2021-W24-1", "2021-W25-1", "2021-W26-1", "2021-W27-1", "2021-W28-1",
"2021-W29-1", "2021-W30-1", "2021-W31-1", "2021-W32-1", "2021-W33-1",
"2021-W34-1", "2021-W35-1", "2021-W36-1", "2021-W37-1", "2021-W38-1",
"2021-W39-1", "2021-W40-1", "2021-W41-1", "2021-W42-1", "2021-W43-1"
), date = structure(c(17896, 17903, 17910, 17917, 17924, 17931,
17938, 17945, 17952, 17959, 17966, 17973, 17980, 17987, 17994,
18001, 18008, 18015, 18022, 18029, 18036, 18043, 18050, 18057,
18064, 18071, 18078, 18085, 18092, 18099, 18106, 18113, 18120,
18127, 18134, 18141, 18148, 18155, 18162, 18169, 18176, 18183,
18190, 18197, 18204, 18211, 18218, 18225, 18232, 18239, 18246,
18253, 18260, 18267, 18274, 18281, 18288, 18295, 18302, 18309,
18316, 18323, 18330, 18337, 18344, 18351, 18358, 18365, 18372,
18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428, 18435,
18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491, 18498,
18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554, 18561,
18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617, 18624,
18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680, 18687,
18694, 18701, 18708, 18715, 18722, 18729, 18736, 18743, 18750,
18757, 18764, 18771, 18778, 18785, 18792, 18799, 18806, 18813,
18820, 18827, 18834, 18841, 18848, 18855, 18862, 18869, 18876,
18883, 18890, 18897, 18904, 18911, 18918, 18925), class = "Date")), row.names = c(NA,
148L), class = "data.frame")
# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
mutate(
isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
date = ISOweek::ISOweek2date(isoweek)
)
View(Original.df)
# creating test and train data
Original.train.df <- Original.df %>%
filter(date >= "2018-12-31", date <= "2021-03-29")
Original.test.df <- Original.df %>%
filter(date >= "2021-04-05", date <= "2021-10-25")
# splitting the original train data with multiple variables to have only one variable(univariate time series)
Net.Production.train.df <- Original.train.df %>%
mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
select(-YearWeek, -Shipment, -Production, -date,-isoweek) %>%
as_tsibble(index = Week.1)
Net.Production.train.df
class(Net.Production.train.df$Week.1)
#Fitting forecast model(Arima errors) to Net.Production.qty
bestfit.Net.Prod <- list(aicc=Inf)
for(K in seq(25))
{
fit.Net.Prod <- auto.arima(Net.Production.train.df, xreg=fourier(Net.Production.train.df, K=K), seasonal=FALSE,approximation = F)
if(fit.Net.Prod$aicc < bestfit.Net.Prod$aicc)
{
bestfit.Net.Prod <- fit.Net.Prod
bestK.Net.Prod <- K
}
}
forecast.net.prod<- forecast(bestfit.Net.Prod,xreg = fourier(Net.Production.train.df,K=bestK.Net.Prod,h=30))
forecast.net.prod
Please advise Thank you
You are mixing 2 different ways of doing forecasts. you either use fable or you use forecast. auto.arima
is from the forecast package. Though it does work with fable, it is better to keep everything to the same package eco system. Fable is the successor of forecast. Your library loading problably conflicted somewhere.
For arima forecasts check out chapter 9.7 from Forecasting: Principles and Practice 3rd edition.
I adjusted your code to work with fable. I have included 2 ways of doing this. My preference is the second one, because then you can see the difference in AICc values and see that they are very close to each other.
library(fpp3)
#... your code until just before the loop
# placeholder for the AICc
bestfit.Net.AICc <- Inf
for(K in seq(25)){
fit <- Net.Production.train.df %>%
model(ARIMA(Net.Production.Qty ~ fourier(K = K), approximation = FALSE))
if(purrr::pluck(glance(fit), "AICc") < bestfit.Net.AICc)
{
bestfit.Net.AICc <- purrr::pluck(glance(fit), "AICc")
bestfit.Net.Prod <- fit
bestK.Net.Prod <- K
}
}
bestK.Net.Prod # in my case 13
glance(bestfit.Net.Prod)
# A tibble: 1 x 8
.model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 ARIMA(Net.Production.Qty ~ fourier(K = K), approximation = FALSE) 96156. -822. 1702. 1722. 1782. <cpl [0~ <cpl [2~
# run a forecast and plot it
bestfit.Net.Prod %>%
forecast(h = 30) %>%
autoplot(Net.Production.train.df)
Second option:
#... your code until just before the loop
fit_all_models <- list()
for(K in seq(25)){
fit <- Net.Production.train.df %>%
model(ARIMA(Net.Production.Qty ~ fourier(K = K), approximation = FALSE))
names(fit) <- paste0("arima_", K)
fit_all_models <- bind_cols(fit_all_models, fit)
}
glance(fit_all_models) %>% arrange(AICc) %>% select(.model:BIC)
# A tibble: 25 x 6
.model sigma2 log_lik AIC AICc BIC
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_13 96156. -822. 1702. 1722. 1782.
2 arima_11 104327. -829. 1709. 1723. 1778.
3 arima_12 102962. -827. 1709. 1726. 1783.
4 arima_14 95447. -820. 1702. 1726. 1788.
5 arima_10 108961. -833. 1713. 1726. 1780.
6 arima_5 127801. -848. 1725. 1730. 1767.
7 arima_8 117956. -839. 1721. 1730. 1779.
8 arima_15 95685. -819. 1704. 1731. 1795.
9 arima_6 127660. -846. 1727. 1733. 1774.
10 arima_9 123129. -842. 1724. 1733. 1779.
# ... with 15 more rows
best_model <- glance(fit_all_models) %>%
filter(AICc == min(AICc)) %>%
select(.model) %>%
as.character
# run a forecast and plot it
fit_all_models %>%
select(best_model) %>%
forecast(h = 30) %>%
autoplot(Net.Production.train.df)