rzoomoving-averagerolling-average

R function to calculate moving average with varying number of measurements for a given time point


For each time point in my data frame, I have any where from 2 to 4 measurements. I want to calculate the moving average, so that for a given time point I have one value that is the average of all measurements for that time point + the time point before and the time point after.

cellcounts <-c(80, 188, 206, 162, 106,  90,  85, 109,  87,  94,  86, 196, 132, 135,  84, 122,  67,  88,  81, 121,   9,  93, 117, 91, 108, 103, 119, 100,  18,  98,  93, 119, 140, 160, 101,  82, 111, 103,  28,  72, 144,  85,   1)
time <-c(-2.7, -2.8, -2.9, -3.0, -3.1, -3.2, -3.3, -3.4, -3.5, -3.6, -2.7, -2.8, -2.9, -3.0, -3.1, -3.2, -3.3, -3.4, -3.5, -3.6, -3.9, -3.0, -3.1, -3.2, -3.3, -3.4, -3.5, -3.7, -2.5, -2.6, -2.9, -3.0, -3.2, -3.3, -3.4, -3.5, -3.7, -3.8, -2.5, -2.6, -3.7, -3.8, -3.9)
df <- data.frame(cellcounts, time)
df <- df[order(df$time),]
df
zoo::rollapply(df, width = 3, FUN = mean, align = "center", fill = NA)

Solution

  • In base R:

    with(
      rle(df$time),
      {
        cs_x <- c(NA, 1L, cs_x <- cumsum(lengths) + 1L, NA)
        cs_y <- c(0, cumsum(df$cellcounts))
        data.frame(
          time = values,
          av_cellcounts = (cs_y[cs_x[-(1:3)]] - cs_y[cs_x[1:(length(cs_x) - 3L)]])/
            (cs_x[-(1:3)] - cs_x[1:(length(cs_x) - 3L)])
        )
      }
    )
    #>    time av_cellcounts
    #> 1  -3.9            NA
    #> 2  -3.8      79.00000
    #> 3  -3.7     108.28571
    #> 4  -3.6     104.33333
    #> 5  -3.5      98.50000
    #> 6  -3.4      99.16667
    #> 7  -3.3     105.33333
    #> 8  -3.2     106.36364
    #> 9  -3.1     114.45455
    #> 10 -3.0     124.70000
    #> 11 -2.9     147.11111
    #> 12 -2.8     140.14286
    #> 13 -2.7     120.00000
    #> 14 -2.6      63.66667
    #> 15 -2.5            NA
    

    Or, averaging only 2 time steps at the endpoints:

    with(
      rle(df$time),
      {
        cs_x <- c(1L, 1L, cs_x <- cumsum(lengths) + 1L, cs_x[length(cs_x)])
        cs_y <- c(0, cumsum(df$cellcounts))
        data.frame(
          time = values,
          av_cellcounts = (cs_y[cs_x[-(1:3)]] - cs_y[cs_x[1:(length(cs_x) - 3L)]])/
            (cs_x[-(1:3)] - cs_x[1:(length(cs_x) - 3L)])
        )
      }
    )
    #>    time av_cellcounts
    #> 1  -3.9      49.50000
    #> 2  -3.8      79.00000
    #> 3  -3.7     108.28571
    #> 4  -3.6     104.33333
    #> 5  -3.5      98.50000
    #> 6  -3.4      99.16667
    #> 7  -3.3     105.33333
    #> 8  -3.2     106.36364
    #> 9  -3.1     114.45455
    #> 10 -3.0     124.70000
    #> 11 -2.9     147.11111
    #> 12 -2.8     140.14286
    #> 13 -2.7     120.00000
    #> 14 -2.6      63.66667
    #> 15 -2.5      54.00000