I have this dataset.
s=structure(list(date = c("01.10.2020", "06.10.2020", "11.10.2020",
"16.10.2020", "21.10.2020", "31.10.2020", "05.11.2020", "10.11.2020",
"20.11.2020", "30.11.2020", "05.12.2020", "10.12.2020", "19.12.2020",
"20.12.2020", "24.12.2020", "25.12.2020", "03.01.2021", "04.01.2021",
"08.01.2021", "13.01.2021", "18.01.2021", "23.01.2021", "28.01.2021",
"29.01.2021", "02.02.2021", "07.02.2021", "17.02.2021", "18.02.2021",
"22.02.2021", "23.02.2021", "27.02.2021", "28.02.2021", "04.03.2021",
"05.03.2021", "09.03.2021", "10.03.2021", "14.03.2021"), red = c(1600L,
1360L, 1520L, 1360L, 1528L, 1600L, 1634L, 1508L, 1380L, 1548L,
1456L, 1460L, 1103L, 1732L, 1084L, 1080L, 1504L, 1008L, 1259L,
1230L, 1393L, 1225L, 1482L, 1068L, 1386L, 1316L, 1400L, 784L,
1418L, 580L, 1546L, 920L, 1540L, 578L, 1644L, 1016L, 1568L),
green = c(1392L, 1056L, 1248L, 1120L, 1160L, 1296L, 1426L,
1388L, 1172L, 1372L, 1352L, 1252L, 964L, 1524L, 940L, 936L,
1392L, 1008L, 1054L, 999L, 1244L, 964L, 1350L, 1092L, 1043L,
1044L, 1064L, 640L, 1045L, 660L, 1170L, 884L, 1148L, 602L,
1212L, 808L, 1176L), blue = c(960L, 712L, 816L, 784L, 664L,
960L, 1226L, 1204L, 932L, 1068L, 1200L, 1044L, 737L, 1292L,
668L, 728L, 864L, 832L, 838L, 672L, 1060L, 628L, 1166L, 860L,
678L, 772L, 792L, 272L, 639L, 420L, 802L, 644L, 756L, 362L,
844L, 408L, 776L), nir = c(3096L, 2696L, 2904L, 2680L, 2960L,
2392L, 2120L, 2020L, 2232L, 2088L, 1928L, 2072L, 2488L, 2488L,
2262L, 1912L, 2120L, 1928L, 2516L, 2504L, 2566L, 2414L, 2556L,
2296L, 2568L, 2621L, 2616L, 2472L, 2584L, 2696L, 2744L, 2792L,
2743L, 3000L, 2840L, 2568L, 2772L), swir = c(3040L, 3104L,
3040L, 3040L, 3232L, 2848L, 2656L, 2256L, 2656L, 2656L, 2464L,
2592L, 2976L, 2464L, 2885L, 2080L, 2720L, 2144L, 3216L, 3104L,
3104L, 3020L, 3120L, 2080L, 3104L, 3337L, 3488L, 2016L, 3232L,
1744L, 3603L, 2080L, 3552L, 1888L, 3808L, 2144L, 3614L),
B05 = c(2016L, 1696L, 1952L, 1824L, 1952L, 1888L, 1824L,
1712L, 1696L, 1760L, 1648L, 1696L, 1376L, 1952L, 1440L, 1376L,
1712L, 1376L, 1568L, 1504L, 1712L, 1554L, 1773L, 1424L, 1632L,
1632L, 1696L, 1184L, 1632L, 1056L, 1862L, 1296L, 1888L, 1056L,
2000L, 1376L, 1923L), B06 = c(2656L, 2272L, 2512L, 2400L,
2528L, 2208L, 2016L, 1872L, 1952L, 1952L, 1824L, 1888L, 1952L,
2160L, 1952L, 1696L, 1968L, 1888L, 2050L, 1952L, 2208L, 2062L,
2224L, 2016L, 2075L, 2081L, 2208L, 1952L, 2208L, 2096L, 2250L,
2272L, 2283L, 2208L, 2462L, 2144L, 2336L), B07 = c(2976L,
2528L, 2784L, 2528L, 2784L, 2272L, 2080L, 2016L, 2016L, 2016L,
2016L, 2016L, 2185L, 2320L, 2123L, 1824L, 2080L, 1952L, 2314L,
2272L, 2400L, 2144L, 2452L, 2128L, 2208L, 2395L, 2400L, 2272L,
2330L, 2576L, 2466L, 2656L, 2400L, 2848L, 2644L, 2464L, 2541L
), B08A = c(3232L, 2848L, 2976L, 2848L, 2976L, 2464L, 2208L,
2080L, 2256L, 2144L, 2016L, 2128L, 2455L, 2464L, 2336L, 1952L,
2272L, 2080L, 2540L, 2524L, 2592L, 2388L, 2604L, 2192L, 2588L,
2648L, 2699L, 2464L, 2656L, 2656L, 2833L, 2784L, 2720L, 2976L,
2953L, 2656L, 2902L), B12 = c(2400L, 2400L, 2400L, 2528L,
2496L, 2400L, 2336L, 1744L, 2144L, 2336L, 2224L, 2272L, 2336L,
2144L, 2144L, 1760L, 2096L, 1616L, 2592L, 2392L, 2528L, 2336L,
2576L, 1632L, 2448L, 2656L, 2845L, 1552L, 2512L, 1312L, 2947L,
1568L, 2842L, 1376L, 3087L, 1696L, 2784L)), class = "data.frame", row.names = c(NA,
-37L))
I want perform time series analysis for all variables at once.
I am experiencing several difficulties. To begin with, the series must be turned into ts
object, but how it do for all variable at once.
mts <- ts(s, start = (ymd("2020-01-10")),
frequency = 365.25 / 7)
But this is some kind of decimal series format, I need a regular one. dd-mm-yyyy
Now i see
Time Series:
Start = 18271
End = 18271.6899383984
Frequency = 52.1785714285714
date red green blue nir swir B05 B06 B07 B08A B12
18271.00 1 1600 1392 960 3096 3040 2016 2656 2976 3232 2400
18271.02 9 1360 1056 712 2696 3104 1696 2272 2528 2848 2400
For me, this format is not very convenient.
The second
fit <- auto.arima(mts)
Error in auto.arima(mts) :
auto.arima can only handle univariate time series
This is not surprising, because my series is multivariate, so I need perform for all the variables at once.
3 aspect is more theoretical:
frequency = 365.25 / 7
The fact is that I find it difficult to correctly set the frequency.
The fact is that the total data for six months, but they are not daily, but go in leaps, for example, 4 days after the last date, then 5, then 10, then 3, for example. Therefore, please tell me how to correctly set the frequency parameter here?
And if I need to predict, for example, 5 dates ahead, but each step is 4 days well, that is, for example, a forecast
14.03.2021
18.03.2021
22.03.2021
26.03.2021
30.03.2021
How can I correctly set the value when predicting?
forecast(fit, 5) #5?
5 dates but they are the same in 4 steps.
So the question is how do I:
auto.arima
for all variables, after correctly specifying the frequency parameter for non-daily data?I will be grateful for any help.
You would be better off using the tsibble and fable packages, rather than the forecast package. It handles daily data better, with explicit dates, and allows many series to be forecast simultaneously. It includes the same algorithm as forecast::auto.arima
for ARIMA models. Here is some code for your data set.
library(tsibble)
library(fable)
library(lubridate)
library(dplyr)
my_ts <- as_tibble(s) |>
mutate(
date = stringr::str_replace_all(date, "\\.", "-"),
date = dmy(date)
) |>
tidyr::pivot_longer(red:B12, names_to = "series", values_to = "value") |>
as_tsibble(index = date, key = series) |>
fill_gaps()
my_ts |>
model(arima = ARIMA(value)) |>
forecast(h = 5)
#> # A fable: 50 x 5 [1D]
#> # Key: series, .model [10]
#> series .model date value .mean
#> <chr> <chr> <date> <dist> <dbl>
#> 1 B05 arima 2021-03-15 N(1922, 20524) 1922.
#> 2 B05 arima 2021-03-16 N(1922, 41048) 1922.
#> 3 B05 arima 2021-03-17 N(1921, 61572) 1921.
#> 4 B05 arima 2021-03-18 N(1921, 82096) 1921.
#> 5 B05 arima 2021-03-19 N(1920, 1e+05) 1920.
#> 6 B06 arima 2021-03-15 N(2334, 3173) 2334.
#> 7 B06 arima 2021-03-16 N(2332, 6346) 2332.
#> 8 B06 arima 2021-03-17 N(2330, 9519) 2330.
#> 9 B06 arima 2021-03-18 N(2328, 12693) 2328.
#> 10 B06 arima 2021-03-19 N(2326, 15866) 2326.
#> # ℹ 40 more rows
Created on 2023-05-09 with reprex v2.0.2
More information about forecasting using these packages is available in my textbook at OTexts.com/fpp3.