rdplyrzoorollapply

Rollapplyr with multiple columns by group with dplyr


I have timeseries data with multiple values and categorical grouping variables. I would like to calculate the running average for each value, per group. My data includes NAs for some timesteps, that I want to be ignored when calculating the rolling mean. I found several questions and answers on this on stackoverflow, but none of the solutions worked so far and I'm wondering what I'm missing. I know I could do this in long format, but I have several other categorical variables that I want to retain untouched.

df[1:10,]
             Timestamp     val1     val2 group
1  2018-03-28 10:56:14       NA       NA     2
2  2018-03-28 10:56:18 152.4206 150.2132     2
3  2018-03-28 10:56:30 140.1681 140.4355     2
4  2018-03-28 10:56:38 141.4961 140.6526     2
5  2018-03-28 10:56:40 140.0825 139.0457     2
6  2018-03-28 10:56:42 140.0453 140.0724     2
7  2018-03-28 10:56:44 140.7152 140.6107     2
8  2018-03-28 10:56:46 137.1136 137.7038     2
9  2018-03-28 10:56:52 138.1676 138.1061     2
10 2018-03-28 10:57:00 139.7503 138.8120     2

df.sma<-as.data.frame(df%>%
                        group_by(group)%>%
                        mutate(across(2:3, rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA
                        )))

> head(df.sma)
            Timestamp     val1     val2 group
1 2018-03-28 10:56:14      NaN      NaN     2
2 2018-03-28 10:56:18 152.4206 150.2132     2
3 2018-03-28 10:56:30 146.2944 145.3243     2
4 2018-03-28 10:56:38 144.6950 143.7671     2
5 2018-03-28 10:56:40 143.5419 142.5867     2
6 2018-03-28 10:56:42 142.8425 142.0839     2

df.sma2<-as.data.frame(df%>%
                        group_by(group)%>%
                        mutate_at(vars(2:3), rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA)%>%
                        ungroup())

head(df.sma2)
            Timestamp     val1     val2 group
1 2018-03-28 10:56:14      NaN      NaN     2
2 2018-03-28 10:56:18 152.4206 150.2132     2
3 2018-03-28 10:56:30 146.2944 145.3243     2
4 2018-03-28 10:56:38 144.6950 143.7671     2
5 2018-03-28 10:56:40 143.5419 142.5867     2
6 2018-03-28 10:56:42 142.8425 142.0839     2

some example data:

df<-structure(list(Timestamp = structure(c(1522270574, 1522270578, 
1522270590, 1522270598, 1522270600, 1522270602, 1522270604, 1522270606, 
1522270612, 1522270620, 1522270624, 1522270626, 1522270630, 1522270638, 
1522270646, 1522270650, 1522270652, 1522270656, 1522270658, 1522270666, 
1522270672, 1522270674, 1522270678, 1522270682, 1522270684, 1522270700, 
1522270704, 1522270706, 1522270710, 1522270712, 1522270714, 1522270720, 
1522270728, 1522270732, 1522270736, 1522270742, 1522270760, 1522270764, 
1522270766, 1522270770, 1522270788, 1522270792, 1522270796, 1522270800, 
1522270808, 1522270814, 1522270820, 1522270828, 1522270832, 1522270858, 
1522272018, 1522272022, 1522272038, 1522272042, 1522272044, 1522272045, 
1522272047, 1522272048, 1522272049, 1522272051, 1522272052, 1522272053, 
1522272055, 1522272056, 1522272068, 1522272070, 1522272071, 1522272072, 
1522272074, 1522272075, 1522272077, 1522272079, 1522272080, 1522272081, 
1522272083, 1522272084, 1522272086, 1522272094, 1522272096, 1522272097, 
1522272099, 1522272100, 1522272102, 1522272104, 1522272105, 1522272108, 
1522272116, 1522272142, 1522272150, 1522272154, 1522272162, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), class = c("POSIXct", "POSIXt"
), tzone = "HST"), val1 = c(NA, 152.420640676898, 140.168146874079, 
141.496126708293, 140.082529977673, 140.045275612881, 140.715183202103, 
137.113586611079, 138.16758361454, 139.75031798328, 140.215984869446, 
139.764008780732, 140.037498184457, 139.200923807515, 138.761322782691, 
139.228081249134, 140.132374508114, 140.603522297505, 140.303591344302, 
138.79444324265, 138.592897758994, 138.933406279942, 136.231013692759, 
139.381687594324, 140.308379184618, 137.223670050664, 139.508300292213, 
139.405762972775, 140.498207629702, 140.557352578793, 141.007893031604, 
138.469342717392, 138.988541937918, 140.073618166769, 140.465068264112, 
140.943824496289, 139.503847830698, 139.828469267282, 139.866901113749, 
140.231110786363, 138.700458457692, 140.256192399129, 141.361416033739, 
140.713907096823, 138.980835812608, 138.303403317905, 139.408023758492, 
139.030991942014, 137.83274536635, 140.004326485218, 138.181652816362, 
138.080081349046, 140.177572453938, 144.798191178199, 145.348070693365, 
NA, 142.87268547048, NA, 143.973806261838, 145.697177880129, 
NA, 147.039813464252, 143.441477045751, NA, NA, 148.579113171457, 
NA, 147.138583941394, 146.320431451225, NA, 146.226167224113, 
142.448219150013, NA, 145.885281881276, 144.985690058093, NA, 
144.912584374715, 145.582507664862, 148.121988771797, NA, 146.248506959323, 
NA, 145.151760593063, 149.537625507927, NA, 144.812868524753, 
146.137437723411, 149.767161659034, 150.696017313469, 152.223027191881, 
153.784657674474, NA, NA, NA, NA, NA, NA, NA, NA, NA), val2 = c(NA, 
150.213158225837, 140.435520231684, 140.652568715905, 139.045674169061, 
140.072442579546, 140.610737632044, 137.703775588197, 138.106142335419, 
138.812041508755, 140.683604067168, 140.038379585955, 140.880060790739, 
138.942708204401, 138.041374001909, 139.30346343814, 139.652246580653, 
140.711970095059, 139.504007815648, 138.203914835007, 137.584657523716, 
138.547354791723, 138.663066399806, 138.546464167489, 139.327159278758, 
138.505236822648, 138.446301065168, 139.119069068746, 139.88861955423, 
140.430073431184, 139.008313625876, 137.484299942125, 138.41216391793, 
139.133242967442, 140.59965630854, 141.946089778931, 139.224562211565, 
140.076879364045, 139.765358074175, 139.632715461796, 135.802066432194, 
140.129853910036, 140.270979286976, 139.992006786585, 138.808982052871, 
137.756255920595, 139.103795821709, 137.881639260096, 136.807200185064, 
139.397877607737, 137.239641462637, 138.30344591474, 139.844021908301, 
144.416297156919, 143.146988896577, NA, 142.678276011255, NA, 
142.89580356053, 144.437403738214, NA, 144.108154956793, 143.871264835533, 
NA, NA, 145.343486626838, NA, 146.610006556818, 146.45235647302, 
NA, 145.459509738373, 145.022626197219, NA, 144.175461347449, 
142.859062038079, NA, 142.76288245402, 144.170746259675, 146.629646285058, 
NA, 146.095040263688, NA, 144.142762608373, 146.492350679929, 
NA, 143.929271519015, 143.03006571855, 148.272680355239, 154.998098793559, 
150.788837950036, 152.361521480621, NA, NA, NA, NA, NA, NA, NA, 
NA, NA), group = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -100L
), class = "data.frame")

Solution

  • Are you looking for

    library(dplyr)
    df |>
      mutate(across(starts_with("val"), ~
                      zoo::rollapplyr(.x, width = 5L, mean, na.rm = TRUE, fill = NA), .names = "{col}_rm"), 
             .by = group)
    

    gives

     Timestamp     val1     val2 group  val1_rm  val2_rm
    1   2018-03-28 10:56:14       NA       NA     2       NA       NA
    2   2018-03-28 10:56:18 152.4206 150.2132     2       NA       NA
    3   2018-03-28 10:56:30 140.1681 140.4355     2       NA       NA
    4   2018-03-28 10:56:38 141.4961 140.6526     2       NA       NA
    5   2018-03-28 10:56:40 140.0825 139.0457     2 143.5419 142.5867
    6   2018-03-28 10:56:42 140.0453 140.0724     2 142.8425 142.0839
    7   2018-03-28 10:56:44 140.7152 140.6107     2 140.5015 140.1634
    8   2018-03-28 10:56:46 137.1136 137.7038     2 139.8905 139.6170
    9   2018-03-28 10:56:52 138.1676 138.1061     2 139.2248 139.1078
    10  2018-03-28 10:57:00 139.7503 138.8120     2 139.1584 139.0610
    11  2018-03-28 10:57:04 140.2160 140.6836     2 139.1925 139.1833
    12  2018-03-28 10:57:06 139.7640 140.0384     2 139.0023 139.0688
    13  2018-03-28 10:57:10 140.0375 140.8801     2 139.5871 139.7040
    14  2018-03-28 10:57:18 139.2009 138.9427     2 139.7937 139.8714
    15  2018-03-28 10:57:26 138.7613 138.0414     2 139.5959 139.7172
    16  2018-03-28 10:57:30 139.2281 139.3035     2 139.3984 139.4412
    17  2018-03-28 10:57:32 140.1324 139.6522     2 139.4720 139.3640
    18  2018-03-28 10:57:36 140.6035 140.7120     2 139.5852 139.3304
    19  2018-03-28 10:57:38 140.3036 139.5040     2 139.8058 139.4426
    20  2018-03-28 10:57:46 138.7944 138.2039     2 139.8124 139.4751
    21  2018-03-28 10:57:52 138.5929 137.5847     2 139.6854 139.1314
    22  2018-03-28 10:57:54 138.9334 138.5474     2 139.4456 138.9104
    23  2018-03-28 10:57:58 136.2310 138.6631     2 138.5711 138.5006
    24  2018-03-28 10:58:02 139.3817 138.5465     2 138.3867 138.3091
    25  2018-03-28 10:58:04 140.3084 139.3272     2 138.6895 138.5337
    26  2018-03-28 10:58:20 137.2237 138.5052     2 138.4156 138.7179
    27  2018-03-28 10:58:24 139.5083 138.4463     2 138.5306 138.6976
    28  2018-03-28 10:58:26 139.4058 139.1191     2 139.1656 138.7888
    29  2018-03-28 10:58:30 140.4982 139.8886     2 139.3889 139.0573
    30  2018-03-28 10:58:32 140.5574 140.4301     2 139.4387 139.2779
    31  2018-03-28 10:58:34 141.0079 139.0083     2 140.1955 139.3785
    32  2018-03-28 10:58:40 138.4693 137.4843     2 139.9877 139.1861
    33  2018-03-28 10:58:48 138.9885 138.4122     2 139.9043 139.0447
    34  2018-03-28 10:58:52 140.0736 139.1332     2 139.8193 138.8936
    35  2018-03-28 10:58:56 140.4651 140.5997     2 139.8009 138.9275
    36  2018-03-28 10:59:02 140.9438 141.9461     2 139.7881 139.5151
    37  2018-03-28 10:59:20 139.5038 139.2246     2 139.9950 139.8631
    38  2018-03-28 10:59:24 139.8285 140.0769     2 140.1630 140.1961
    39  2018-03-28 10:59:26 139.8669 139.7654     2 140.1216 140.3225
    40  2018-03-28 10:59:30 140.2311 139.6327     2 140.0748 140.1291
    41  2018-03-28 10:59:48 138.7005 135.8021     2 139.6262 138.9003
    42  2018-03-28 10:59:52 140.2562 140.1299     2 139.7766 139.0814
    43  2018-03-28 10:59:56 141.3614 140.2710     2 140.0832 139.1202
    44  2018-03-28 11:00:00 140.7139 139.9920     2 140.2526 139.1655
    45  2018-03-28 11:00:08 138.9808 138.8090     2 140.0026 139.0008
    46  2018-03-28 11:00:14 138.3034 137.7563     2 139.9232 139.3916
    47  2018-03-28 11:00:20 139.4080 139.1038     2 139.7535 139.1864
    48  2018-03-28 11:00:28 139.0310 137.8816     2 139.2874 138.7085
    49  2018-03-28 11:00:32 137.8327 136.8072     2 138.7112 138.0716
    50  2018-03-28 11:00:58 140.0043 139.3979     2 138.9159 138.1894
    51  2018-03-28 11:20:18 138.1817 137.2396     3       NA       NA
    52  2018-03-28 11:20:22 138.0801 138.3034     3       NA       NA
    53  2018-03-28 11:20:38 140.1776 139.8440     3       NA       NA
    54  2018-03-28 11:20:42 144.7982 144.4163     3       NA       NA
    55  2018-03-28 11:20:44 145.3481 143.1470     3 141.3171 140.5901
    56  2018-03-28 11:20:45       NA       NA     3 142.1010 141.4277
    57  2018-03-28 11:20:47 142.8727 142.6783     3 143.2991 142.5214
    58  2018-03-28 11:20:48       NA       NA     3 144.3396 143.4139
    59  2018-03-28 11:20:49 143.9738 142.8958     3 144.0649 142.9070
    60  2018-03-28 11:20:51 145.6972 144.4374     3 144.1812 143.3372
    61  2018-03-28 11:20:52       NA       NA     3 144.1812 143.3372
    62  2018-03-28 11:20:53 147.0398 144.1082     3 145.5703 143.8138
    63  2018-03-28 11:20:55 143.4415 143.8713     3 145.0381 143.8282
    64  2018-03-28 11:20:56       NA       NA     3 145.3928 144.1389
    65  2018-03-28 11:21:08       NA       NA     3 145.2406 143.9897
    66  2018-03-28 11:21:10 148.5791 145.3435     3 146.3535 144.4410
    67  2018-03-28 11:21:11       NA       NA     3 146.0103 144.6074
    68  2018-03-28 11:21:12 147.1386 146.6100     3 147.8588 145.9767
    69  2018-03-28 11:21:14 146.3204 146.4524     3 147.3460 146.1353
    70  2018-03-28 11:21:15       NA       NA     3 147.3460 146.1353
    71  2018-03-28 11:21:17 146.2262 145.4595     3 146.5617 146.1740
    72  2018-03-28 11:21:19 142.4482 145.0226     3 145.5334 145.8861
    73  2018-03-28 11:21:20       NA       NA     3 144.9983 145.6448
    74  2018-03-28 11:21:21 145.8853 144.1755     3 144.8532 144.8859
    75  2018-03-28 11:21:23 144.9857 142.8591     3 144.8863 144.3792
    76  2018-03-28 11:21:24       NA       NA     3 144.4397 144.0190
    77  2018-03-28 11:21:26 144.9126 142.7629     3 145.2612 143.2658
    78  2018-03-28 11:21:34 145.5825 144.1707     3 145.3415 143.4920
    79  2018-03-28 11:21:36 148.1220 146.6296     3 145.9007 144.1056
    80  2018-03-28 11:21:37       NA       NA     3 146.2057 144.5211
    81  2018-03-28 11:21:39 146.2485 146.0950     3 146.2164 144.9146
    82  2018-03-28 11:21:40       NA       NA     3 146.6510 145.6318
    83  2018-03-28 11:21:42 145.1518 144.1428     3 146.5074 145.6225
    84  2018-03-28 11:21:44 149.5376 146.4924     3 146.9793 145.5767
    85  2018-03-28 11:21:45       NA       NA     3 146.9793 145.5767
    86  2018-03-28 11:21:48 144.8129 143.9293     3 146.5008 144.8548
    87  2018-03-28 11:21:56 146.1374 143.0301     3 146.4099 144.3986
    88  2018-03-28 11:22:22 149.7672 148.2727     3 147.5638 145.4311
    89  2018-03-28 11:22:30 150.6960 154.9981     3 147.8534 147.5575
    90  2018-03-28 11:22:34 152.2230 150.7888     3 148.7273 148.2038
    91  2018-03-28 11:22:42 153.7847 152.3615     3 150.5217 149.8902
    92                 <NA>       NA       NA    NA       NA       NA
    93                 <NA>       NA       NA    NA       NA       NA
    94                 <NA>       NA       NA    NA       NA       NA
    95                 <NA>       NA       NA    NA       NA       NA
    96                 <NA>       NA       NA    NA      NaN      NaN
    97                 <NA>       NA       NA    NA      NaN      NaN
    98                 <NA>       NA       NA    NA      NaN      NaN
    99                 <NA>       NA       NA    NA      NaN      NaN
    100                <NA>       NA       NA    NA      NaN      NaN