I have timeseries data with multiple values and categorical grouping variables. I would like to calculate the running average for each value, per group. My data includes NAs for some timesteps, that I want to be ignored when calculating the rolling mean. I found several questions and answers on this on stackoverflow, but none of the solutions worked so far and I'm wondering what I'm missing. I know I could do this in long format, but I have several other categorical variables that I want to retain untouched.
df[1:10,]
Timestamp val1 val2 group
1 2018-03-28 10:56:14 NA NA 2
2 2018-03-28 10:56:18 152.4206 150.2132 2
3 2018-03-28 10:56:30 140.1681 140.4355 2
4 2018-03-28 10:56:38 141.4961 140.6526 2
5 2018-03-28 10:56:40 140.0825 139.0457 2
6 2018-03-28 10:56:42 140.0453 140.0724 2
7 2018-03-28 10:56:44 140.7152 140.6107 2
8 2018-03-28 10:56:46 137.1136 137.7038 2
9 2018-03-28 10:56:52 138.1676 138.1061 2
10 2018-03-28 10:57:00 139.7503 138.8120 2
df.sma<-as.data.frame(df%>%
group_by(group)%>%
mutate(across(2:3, rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA
)))
> head(df.sma)
Timestamp val1 val2 group
1 2018-03-28 10:56:14 NaN NaN 2
2 2018-03-28 10:56:18 152.4206 150.2132 2
3 2018-03-28 10:56:30 146.2944 145.3243 2
4 2018-03-28 10:56:38 144.6950 143.7671 2
5 2018-03-28 10:56:40 143.5419 142.5867 2
6 2018-03-28 10:56:42 142.8425 142.0839 2
df.sma2<-as.data.frame(df%>%
group_by(group)%>%
mutate_at(vars(2:3), rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA)%>%
ungroup())
head(df.sma2)
Timestamp val1 val2 group
1 2018-03-28 10:56:14 NaN NaN 2
2 2018-03-28 10:56:18 152.4206 150.2132 2
3 2018-03-28 10:56:30 146.2944 145.3243 2
4 2018-03-28 10:56:38 144.6950 143.7671 2
5 2018-03-28 10:56:40 143.5419 142.5867 2
6 2018-03-28 10:56:42 142.8425 142.0839 2
some example data:
df<-structure(list(Timestamp = structure(c(1522270574, 1522270578,
1522270590, 1522270598, 1522270600, 1522270602, 1522270604, 1522270606,
1522270612, 1522270620, 1522270624, 1522270626, 1522270630, 1522270638,
1522270646, 1522270650, 1522270652, 1522270656, 1522270658, 1522270666,
1522270672, 1522270674, 1522270678, 1522270682, 1522270684, 1522270700,
1522270704, 1522270706, 1522270710, 1522270712, 1522270714, 1522270720,
1522270728, 1522270732, 1522270736, 1522270742, 1522270760, 1522270764,
1522270766, 1522270770, 1522270788, 1522270792, 1522270796, 1522270800,
1522270808, 1522270814, 1522270820, 1522270828, 1522270832, 1522270858,
1522272018, 1522272022, 1522272038, 1522272042, 1522272044, 1522272045,
1522272047, 1522272048, 1522272049, 1522272051, 1522272052, 1522272053,
1522272055, 1522272056, 1522272068, 1522272070, 1522272071, 1522272072,
1522272074, 1522272075, 1522272077, 1522272079, 1522272080, 1522272081,
1522272083, 1522272084, 1522272086, 1522272094, 1522272096, 1522272097,
1522272099, 1522272100, 1522272102, 1522272104, 1522272105, 1522272108,
1522272116, 1522272142, 1522272150, 1522272154, 1522272162, NA,
NA, NA, NA, NA, NA, NA, NA, NA), class = c("POSIXct", "POSIXt"
), tzone = "HST"), val1 = c(NA, 152.420640676898, 140.168146874079,
141.496126708293, 140.082529977673, 140.045275612881, 140.715183202103,
137.113586611079, 138.16758361454, 139.75031798328, 140.215984869446,
139.764008780732, 140.037498184457, 139.200923807515, 138.761322782691,
139.228081249134, 140.132374508114, 140.603522297505, 140.303591344302,
138.79444324265, 138.592897758994, 138.933406279942, 136.231013692759,
139.381687594324, 140.308379184618, 137.223670050664, 139.508300292213,
139.405762972775, 140.498207629702, 140.557352578793, 141.007893031604,
138.469342717392, 138.988541937918, 140.073618166769, 140.465068264112,
140.943824496289, 139.503847830698, 139.828469267282, 139.866901113749,
140.231110786363, 138.700458457692, 140.256192399129, 141.361416033739,
140.713907096823, 138.980835812608, 138.303403317905, 139.408023758492,
139.030991942014, 137.83274536635, 140.004326485218, 138.181652816362,
138.080081349046, 140.177572453938, 144.798191178199, 145.348070693365,
NA, 142.87268547048, NA, 143.973806261838, 145.697177880129,
NA, 147.039813464252, 143.441477045751, NA, NA, 148.579113171457,
NA, 147.138583941394, 146.320431451225, NA, 146.226167224113,
142.448219150013, NA, 145.885281881276, 144.985690058093, NA,
144.912584374715, 145.582507664862, 148.121988771797, NA, 146.248506959323,
NA, 145.151760593063, 149.537625507927, NA, 144.812868524753,
146.137437723411, 149.767161659034, 150.696017313469, 152.223027191881,
153.784657674474, NA, NA, NA, NA, NA, NA, NA, NA, NA), val2 = c(NA,
150.213158225837, 140.435520231684, 140.652568715905, 139.045674169061,
140.072442579546, 140.610737632044, 137.703775588197, 138.106142335419,
138.812041508755, 140.683604067168, 140.038379585955, 140.880060790739,
138.942708204401, 138.041374001909, 139.30346343814, 139.652246580653,
140.711970095059, 139.504007815648, 138.203914835007, 137.584657523716,
138.547354791723, 138.663066399806, 138.546464167489, 139.327159278758,
138.505236822648, 138.446301065168, 139.119069068746, 139.88861955423,
140.430073431184, 139.008313625876, 137.484299942125, 138.41216391793,
139.133242967442, 140.59965630854, 141.946089778931, 139.224562211565,
140.076879364045, 139.765358074175, 139.632715461796, 135.802066432194,
140.129853910036, 140.270979286976, 139.992006786585, 138.808982052871,
137.756255920595, 139.103795821709, 137.881639260096, 136.807200185064,
139.397877607737, 137.239641462637, 138.30344591474, 139.844021908301,
144.416297156919, 143.146988896577, NA, 142.678276011255, NA,
142.89580356053, 144.437403738214, NA, 144.108154956793, 143.871264835533,
NA, NA, 145.343486626838, NA, 146.610006556818, 146.45235647302,
NA, 145.459509738373, 145.022626197219, NA, 144.175461347449,
142.859062038079, NA, 142.76288245402, 144.170746259675, 146.629646285058,
NA, 146.095040263688, NA, 144.142762608373, 146.492350679929,
NA, 143.929271519015, 143.03006571855, 148.272680355239, 154.998098793559,
150.788837950036, 152.361521480621, NA, NA, NA, NA, NA, NA, NA,
NA, NA), group = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -100L
), class = "data.frame")
Are you looking for
library(dplyr)
df |>
mutate(across(starts_with("val"), ~
zoo::rollapplyr(.x, width = 5L, mean, na.rm = TRUE, fill = NA), .names = "{col}_rm"),
.by = group)
gives
Timestamp val1 val2 group val1_rm val2_rm
1 2018-03-28 10:56:14 NA NA 2 NA NA
2 2018-03-28 10:56:18 152.4206 150.2132 2 NA NA
3 2018-03-28 10:56:30 140.1681 140.4355 2 NA NA
4 2018-03-28 10:56:38 141.4961 140.6526 2 NA NA
5 2018-03-28 10:56:40 140.0825 139.0457 2 143.5419 142.5867
6 2018-03-28 10:56:42 140.0453 140.0724 2 142.8425 142.0839
7 2018-03-28 10:56:44 140.7152 140.6107 2 140.5015 140.1634
8 2018-03-28 10:56:46 137.1136 137.7038 2 139.8905 139.6170
9 2018-03-28 10:56:52 138.1676 138.1061 2 139.2248 139.1078
10 2018-03-28 10:57:00 139.7503 138.8120 2 139.1584 139.0610
11 2018-03-28 10:57:04 140.2160 140.6836 2 139.1925 139.1833
12 2018-03-28 10:57:06 139.7640 140.0384 2 139.0023 139.0688
13 2018-03-28 10:57:10 140.0375 140.8801 2 139.5871 139.7040
14 2018-03-28 10:57:18 139.2009 138.9427 2 139.7937 139.8714
15 2018-03-28 10:57:26 138.7613 138.0414 2 139.5959 139.7172
16 2018-03-28 10:57:30 139.2281 139.3035 2 139.3984 139.4412
17 2018-03-28 10:57:32 140.1324 139.6522 2 139.4720 139.3640
18 2018-03-28 10:57:36 140.6035 140.7120 2 139.5852 139.3304
19 2018-03-28 10:57:38 140.3036 139.5040 2 139.8058 139.4426
20 2018-03-28 10:57:46 138.7944 138.2039 2 139.8124 139.4751
21 2018-03-28 10:57:52 138.5929 137.5847 2 139.6854 139.1314
22 2018-03-28 10:57:54 138.9334 138.5474 2 139.4456 138.9104
23 2018-03-28 10:57:58 136.2310 138.6631 2 138.5711 138.5006
24 2018-03-28 10:58:02 139.3817 138.5465 2 138.3867 138.3091
25 2018-03-28 10:58:04 140.3084 139.3272 2 138.6895 138.5337
26 2018-03-28 10:58:20 137.2237 138.5052 2 138.4156 138.7179
27 2018-03-28 10:58:24 139.5083 138.4463 2 138.5306 138.6976
28 2018-03-28 10:58:26 139.4058 139.1191 2 139.1656 138.7888
29 2018-03-28 10:58:30 140.4982 139.8886 2 139.3889 139.0573
30 2018-03-28 10:58:32 140.5574 140.4301 2 139.4387 139.2779
31 2018-03-28 10:58:34 141.0079 139.0083 2 140.1955 139.3785
32 2018-03-28 10:58:40 138.4693 137.4843 2 139.9877 139.1861
33 2018-03-28 10:58:48 138.9885 138.4122 2 139.9043 139.0447
34 2018-03-28 10:58:52 140.0736 139.1332 2 139.8193 138.8936
35 2018-03-28 10:58:56 140.4651 140.5997 2 139.8009 138.9275
36 2018-03-28 10:59:02 140.9438 141.9461 2 139.7881 139.5151
37 2018-03-28 10:59:20 139.5038 139.2246 2 139.9950 139.8631
38 2018-03-28 10:59:24 139.8285 140.0769 2 140.1630 140.1961
39 2018-03-28 10:59:26 139.8669 139.7654 2 140.1216 140.3225
40 2018-03-28 10:59:30 140.2311 139.6327 2 140.0748 140.1291
41 2018-03-28 10:59:48 138.7005 135.8021 2 139.6262 138.9003
42 2018-03-28 10:59:52 140.2562 140.1299 2 139.7766 139.0814
43 2018-03-28 10:59:56 141.3614 140.2710 2 140.0832 139.1202
44 2018-03-28 11:00:00 140.7139 139.9920 2 140.2526 139.1655
45 2018-03-28 11:00:08 138.9808 138.8090 2 140.0026 139.0008
46 2018-03-28 11:00:14 138.3034 137.7563 2 139.9232 139.3916
47 2018-03-28 11:00:20 139.4080 139.1038 2 139.7535 139.1864
48 2018-03-28 11:00:28 139.0310 137.8816 2 139.2874 138.7085
49 2018-03-28 11:00:32 137.8327 136.8072 2 138.7112 138.0716
50 2018-03-28 11:00:58 140.0043 139.3979 2 138.9159 138.1894
51 2018-03-28 11:20:18 138.1817 137.2396 3 NA NA
52 2018-03-28 11:20:22 138.0801 138.3034 3 NA NA
53 2018-03-28 11:20:38 140.1776 139.8440 3 NA NA
54 2018-03-28 11:20:42 144.7982 144.4163 3 NA NA
55 2018-03-28 11:20:44 145.3481 143.1470 3 141.3171 140.5901
56 2018-03-28 11:20:45 NA NA 3 142.1010 141.4277
57 2018-03-28 11:20:47 142.8727 142.6783 3 143.2991 142.5214
58 2018-03-28 11:20:48 NA NA 3 144.3396 143.4139
59 2018-03-28 11:20:49 143.9738 142.8958 3 144.0649 142.9070
60 2018-03-28 11:20:51 145.6972 144.4374 3 144.1812 143.3372
61 2018-03-28 11:20:52 NA NA 3 144.1812 143.3372
62 2018-03-28 11:20:53 147.0398 144.1082 3 145.5703 143.8138
63 2018-03-28 11:20:55 143.4415 143.8713 3 145.0381 143.8282
64 2018-03-28 11:20:56 NA NA 3 145.3928 144.1389
65 2018-03-28 11:21:08 NA NA 3 145.2406 143.9897
66 2018-03-28 11:21:10 148.5791 145.3435 3 146.3535 144.4410
67 2018-03-28 11:21:11 NA NA 3 146.0103 144.6074
68 2018-03-28 11:21:12 147.1386 146.6100 3 147.8588 145.9767
69 2018-03-28 11:21:14 146.3204 146.4524 3 147.3460 146.1353
70 2018-03-28 11:21:15 NA NA 3 147.3460 146.1353
71 2018-03-28 11:21:17 146.2262 145.4595 3 146.5617 146.1740
72 2018-03-28 11:21:19 142.4482 145.0226 3 145.5334 145.8861
73 2018-03-28 11:21:20 NA NA 3 144.9983 145.6448
74 2018-03-28 11:21:21 145.8853 144.1755 3 144.8532 144.8859
75 2018-03-28 11:21:23 144.9857 142.8591 3 144.8863 144.3792
76 2018-03-28 11:21:24 NA NA 3 144.4397 144.0190
77 2018-03-28 11:21:26 144.9126 142.7629 3 145.2612 143.2658
78 2018-03-28 11:21:34 145.5825 144.1707 3 145.3415 143.4920
79 2018-03-28 11:21:36 148.1220 146.6296 3 145.9007 144.1056
80 2018-03-28 11:21:37 NA NA 3 146.2057 144.5211
81 2018-03-28 11:21:39 146.2485 146.0950 3 146.2164 144.9146
82 2018-03-28 11:21:40 NA NA 3 146.6510 145.6318
83 2018-03-28 11:21:42 145.1518 144.1428 3 146.5074 145.6225
84 2018-03-28 11:21:44 149.5376 146.4924 3 146.9793 145.5767
85 2018-03-28 11:21:45 NA NA 3 146.9793 145.5767
86 2018-03-28 11:21:48 144.8129 143.9293 3 146.5008 144.8548
87 2018-03-28 11:21:56 146.1374 143.0301 3 146.4099 144.3986
88 2018-03-28 11:22:22 149.7672 148.2727 3 147.5638 145.4311
89 2018-03-28 11:22:30 150.6960 154.9981 3 147.8534 147.5575
90 2018-03-28 11:22:34 152.2230 150.7888 3 148.7273 148.2038
91 2018-03-28 11:22:42 153.7847 152.3615 3 150.5217 149.8902
92 <NA> NA NA NA NA NA
93 <NA> NA NA NA NA NA
94 <NA> NA NA NA NA NA
95 <NA> NA NA NA NA NA
96 <NA> NA NA NA NaN NaN
97 <NA> NA NA NA NaN NaN
98 <NA> NA NA NA NaN NaN
99 <NA> NA NA NA NaN NaN
100 <NA> NA NA NA NaN NaN