rtime-seriescross-correlation

saving ccf() looped output in r


I have a df where the first little bit looks like:

>dput(df_long_binned_sound2[1:48,])
structure(list(id = c(20230420, 20230420, 20230420, 20230420, 
20230420, 20230420, 20230420, 20230420, 20230420, 20230420, 20230420, 
20230420, 20230420, 20230420, 20230420, 20230420, 20230424, 20230424, 
20230424, 20230424, 20230424, 20230424, 20230424, 20230424, 20230424, 
20230424, 20230424, 20230424, 20230424, 20230424, 20230424, 20230424, 
20230424, 20230426, 20230426, 20230426, 20230426, 20230426, 20230426, 
20230426, 20230426, 20230426, 20230426, 20230426, 20230426, 20230426, 
20230426, 20230426), cons_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 16L, 17L, 18L, 19L, 
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 
33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 
46L, 47L), win = c(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 
1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1), sound = c(1, NA, 1.5, 
NA, 2, NA, 2.75, NA, 7, NA, 8, NA, 4, NA, 6.5, NA, NA, 4.5, NA, 
6, NA, 2, NA, 5.5, NA, 4.66666666666667, NA, 4.8, NA, 6, NA, 
4.5, NA, 3, NA, 2.33333333333333, NA, 6, NA, 1, NA, 1, NA, 1.66666666666667, 
NA, 4.5, NA, 5), sound2 = c(NA, 1, NA, 1.5, NA, 1.5, NA, 6, NA, 
8, NA, 1, NA, 8, NA, 7, 3, NA, 5, NA, 5, NA, 5, NA, 6.5, NA, 
8, NA, 6, NA, 5, NA, 5.66666666666667, NA, 3.5, NA, 2, NA, 2.42857142857143, 
NA, 1.5, NA, 2, NA, 8, NA, 2.33333333333333, NA)), row.names = c(NA, 
-48L), class = c("tbl_df", "tbl", "data.frame"))

I am running some cross-correlation analysis on it and I would like to save the number outputs of ccf(). I can save all the correlograms using:

ids <- unique(df_long_binned_sound2$id)
for (i in 1:length(ids)){
  pdf(file = paste("/Users/myname/Desktop/Current Work/CRTT study - 2022/CRTT - Full/CRTT_r_Full/Wack_A_Mole/CC_CustomBin/CC/plot_", ids[i], ".pdf"),
      width = 10, height = 10
  )
  
  ccf(df_long_binned_sound2$sound[which(df_long_binned_sound2$id == ids[i])], df_long_binned_sound2$sound2[which(df_long_binned_sound2$id == ids[i])],
      na.action = na.pass,
      main = paste("Corrected Correlogram \n Pair", ids[i]),
      xlim = c(-6, 6)
  )
  
  dev.off()
}

and I can print the number outputs using:

for (i in 1:length(ids)){
  print(ccf(df_long_binned_sound2$sound[which(df_long_binned_sound2$id == ids[i])], 
                     df_long_binned_sound2$sound2[which(df_long_binned_sound2$id == ids[i])],
                     na.action = na.pass,
    )
  )
}

I would like to save the number outputs so that I end up with something like:

id lag lag_value
20230420 -9 -0.145
20230420 -8 -0.057

...

id lag lag_value
20230420 8 -0.183
20230420 9 -0.203
20230424 -9 0.234

...

I'm sure there is a simple solution but I can't seem to find it. I very optimistically tried and failed with:

df.cff <- data.frame()
for (i in 1:length(ids)){
  cff.standin <- ccf(df_long_binned_sound2$sound[which(df_long_binned_sound2$id == ids[i])], 
      df_long_binned_sound2$sound2[which(df_long_binned_sound2$id == ids[i])],
      na.action = na.pass,
    )
  df.cff <- cbind(df.cff, cff.standin)
}

Error in as.data.frame.default(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors) : 
  cannot coerce class ‘"acf"’ to a data.frame

and:

df.cff <- data.frame()
for (i in 1:length(ids)){
  cff.standin <- ccf(df_long_binned_sound2$sound[which(df_long_binned_sound2$id == ids[i])], 
      df_long_binned_sound2$sound2[which(df_long_binned_sound2$id == ids[i])],
      na.action = na.pass,
    )
  df.cff <- rbind(df.cff, cff.standin)
}

Error in rbind(deparse.level, ...) : 
  invalid list argument: all variables should have the same length

Does anyone know a good way to save the number outputs of ccf() from a for loop? I am especially interested in a solution that formats the output like the table examples above.

TYIA :)


Solution

  • You need to inspect the ccf object with View() or checking it's help page:

    Value

    An object of class "acf", which is a list with the following elements:

    lag A three dimensional array containing the lags at which the acf is estimated.

    acf An array with the same dimensions as lag containing the estimated acf.

    Thus, you just want to do something like:

    cbind(id = ids[i], lag = cff.standin$lag, lag_value = cff.standin$acf)
    

    Now for the full solution:

    ids <- unique(df_long_binned_sound2$id)
    df_ccf <- c() #empty vector to save results
    for (i in ids){ #you can pass the ids directly, instead of their index
      df1_subset <- df_long_binned_sound2[which(df_long_binned_sound2$id == i),] #saving an extra variable saves space in the call below
      
      ccf_output <- ccf(df1_subset$sound, df1_subset$sound2,
                        na.action = na.pass,
                        main = paste("Corrected Correlogram \n Pair", i),
                        xlim = c(-6, 6)
      )
      
      df_ccf <- rbind(df_ccf, cbind(id = i, lag = ccf_output$lag, lag_value = ccf_output$acf)) #iteratively rbind the results
    }
    

    But I prefer something using tidyverse:

    df_ccf <- df_long_binned_sound2 %>%
      group_split(id) %>%
      imap_dfr(function(data, index){
        ccf(data$sound, data$sound2,
            na.action = na.pass,
            main = paste("Corrected Correlogram \n Pair", i),
            xlim = c(-6, 6)) %>%
          {tibble(id = ids[index],
                  lag = as.numeric(.$lag),
                  lag_value = as.numeric(.$acf))}
      })