rdplyrdata-wrangling

In a time series, how to list all the events that have taken place since a certain latency for each focal event?


Let's say I have a time series of behaviours. It contains the timing and identity of people who have performed a particular behaviour. I want to list all the people who performed the behaviour within a certain latency before the focal behaviour.

My current code does the task, but can probably be optimised.

In the example dataset, there is a column for each latency of interest. For instance, the Lat_1 variable contains how many previous rows started a maximum of 1 second before the focal behaviour (i.e., how many previous rows are relevant for the current row at the given latency [column]).

For later steps, it is important that:

Here is the code I have so far. Any help is welcome :-)

library(tidyverse)

LD_SO <- data.frame(Group = "Gr02",
                    Individual = c("B", "A", "B", "B", "A", "A", "C", "A", "B", "C", "A", "A", "C", "A", "A", "A", "B", "C"),
                    Event_type = "Behaviour1",
                    Start_cor = c(2.25, 2.8, 5.9, 6.1, 30.56, 33.45, 34.12, 35.49, 49.78, 54.89, 55.12, 59.24, 136.45, 137, 138.49, 140.21, 141.73, 200.24),
                    Lat_1 = c(0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L),
                    Lat_2 = c(0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L),
                    Lat_3 = c(NA, NA, 0L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 1L, 1L, 0L),
                    Lat_4 = c(NA, NA, 2L, 3L, 0L, 1L, 2L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 3L, 2L, 0L),
                    Lat_5 = c(NA, NA, 2L, 3L, 0L, 1L, 2L, 3L, 0L, 0L, 1L, 2L, 0L, 0L, 2L, 3L, 3L, 0L),
                    Lat_6 = c(NA, NA, NA, 3L, 0L, 1L, 2L, 3L, 0L, 1L, 2L, 2L, 0L, 0L, 2L, 3L, 4L, 0L),
                    Lat_7 = c(NA, NA, NA, NA, 0L, 1L, 2L, 3L, 0L, 1L, 2L, 2L, 0L, 0L, 2L, 3L, 4L, 0L))

###
# first step: list all relevant previous individuals
###

# the values in Lat_1:Lat_7 indicate how many previous events took place within a certain latency [1 to 7 seconds]
LD_SO_step1 <- LD_SO %>%
  mutate(
    # Identify all relevant previous individuals up to 1 second before the focal behaviour
    Infl_1 = case_when(is.na(Lat_1) ~ "Y", # in order to transform NAs into specific letter for later
                       Lat_1 == 0 ~ "Z", # in order to indicate that nobody started the behaviour within that latency
                       Lat_1 == 1 ~ paste0(lag(Individual, 1)), # retrieve the identity of the person performing the previous behaviour
                       Lat_1 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)), # retrieve the identity of the persons performing the two previous behaviours
                       Lat_1 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)), # retrieve the identity of the persons performing the three previous behaviours
                       Lat_1 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))), # and so on
    # Identify all relevant previous individuals up to 2 seconds before the focal behaviour
    Infl_2 = case_when(is.na(Lat_2) ~ "Y",
                       Lat_2 == 0 ~ "Z",
                       Lat_2 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_2 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_2 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_2 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    # Identify all relevant previous individuals up to 3 seconds before the focal behaviour
    Infl_3 = case_when(is.na(Lat_3) ~ "Y",
                       Lat_3 == 0 ~ "Z",
                       Lat_3 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_3 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_3 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_3 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    # and so on
    Infl_4 = case_when(is.na(Lat_4) ~ "Y",
                       Lat_4 == 0 ~ "Z",
                       Lat_4 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_4 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_4 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_4 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    Infl_5 = case_when(is.na(Lat_5) ~ "Y",
                       Lat_5 == 0 ~ "Z",
                       Lat_5 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_5 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_5 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_5 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    Infl_6 = case_when(is.na(Lat_6) ~ "Y",
                       Lat_6 == 0 ~ "Z",
                       Lat_6 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_6 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_6 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_6 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    Infl_7 = case_when(is.na(Lat_7) ~ "Y",
                       Lat_7 == 0 ~ "Z",
                       Lat_7 == 1 ~ paste0(lag(Individual, 1)),
                       Lat_7 == 2 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2)),
                       Lat_7 == 3 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3)),
                       Lat_7 == 4 ~ paste0(lag(Individual, 1),
                                           lag(Individual, 2),
                                           lag(Individual, 3),
                                           lag(Individual, 4))),
    .after = Individual)

Solution

  • This solution iterates over rows, applying your logic to individuals with events within a given latency relative to each row.

    library(purrr)
    library(dplyr)
    
    compute_IT <- function(individual, time, latency) {
      map2_chr(individual, time, \(indiv_i, time_i) {
        if (time_i < latency) return(NA)
        latencies <- time_i - time
        influencers <- unique(individual[latencies > 0 & latencies <= latency])
        n_influencers <- length(influencers)
        if (indiv_i %in% influencers) {
          paste0("Self_", n_influencers - 1)
        } else if (n_influencers > 0) {
          paste0("Other_", n_influencers)
        } else {
          "NoOne_0"
        }
      })
    }
    
    for (i in seq(7)) {
      LD_SO <- mutate(
        LD_SO,
        "IT_{i}" := compute_IT(Individual, Start_cor, latency = i),
        .by = c(Group, Event_type)
      )
    }
    
    #> select(LD_SO, c(Individual, Start_cor, IT_1:IT_7))
       Individual Start_cor    IT_1    IT_2    IT_3    IT_4    IT_5    IT_6    IT_7
    1           B      2.25 NoOne_0 NoOne_0    <NA>    <NA>    <NA>    <NA>    <NA>
    2           A      2.80 Other_1 Other_1    <NA>    <NA>    <NA>    <NA>    <NA>
    3           B      5.90 NoOne_0 NoOne_0 NoOne_0  Self_1  Self_1    <NA>    <NA>
    4           B      6.10  Self_0  Self_0  Self_0  Self_1  Self_1  Self_1    <NA>
    5           A     30.56 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0
    6           A     33.45 NoOne_0 NoOne_0  Self_0  Self_0  Self_0  Self_0  Self_0
    7           C     34.12 Other_1 Other_1 Other_1 Other_1 Other_1 Other_1 Other_1
    8           A     35.49 NoOne_0 Other_1  Self_1  Self_1  Self_1  Self_1  Self_1
    9           B     49.78 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0
    10          C     54.89 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 Other_1 Other_1
    11          A     55.12 Other_1 Other_1 Other_1 Other_1 Other_1 Other_2 Other_2
    12          A     59.24 NoOne_0 NoOne_0 NoOne_0 NoOne_0  Self_1  Self_1  Self_1
    13          C    136.45 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0
    14          A    137.00 Other_1 Other_1 Other_1 Other_1 Other_1 Other_1 Other_1
    15          A    138.49 NoOne_0  Self_0  Self_1  Self_1  Self_1  Self_1  Self_1
    16          A    140.21 NoOne_0  Self_0  Self_0  Self_1  Self_1  Self_1  Self_1
    17          B    141.73 NoOne_0 Other_1 Other_1 Other_1 Other_1 Other_2 Other_2
    18          C    200.24 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0 NoOne_0