rcountchi-squared

How to apply chi squared test on a series of count tables?


I have these datasets: df as the main dataframe (but let's imagine all of them as very big datasets).

df = data.frame(x = seq(1,20,2),
y = c('a','a','b','c','a','a','b','c','a','a'),
z = c('d','e','e','d','f','e','e','d','e','f') )

stage1 = data.frame(xx = c(2,3,4,5,7,8,9) )

stage2 = data.frame(xx = c(3,5,7,8,9) )

stage3 = data.frame(xx = c(2,3,6,8) )

stage4 = data.frame(xx = c(1,3,6) )

And then creating count tables as follows:

library(dplyr)
library(purrr)
map(lst(stage1 , stage2 ,stage3 ,stage4 ), 
   ~ inner_join(df, .x, by = c("x" = "xx")) %>%      
       count(y, name = 'Count'))

I wish to apply a chi squared test to study if the difference between each two consecutive tables is significant or not.


Solution

  • library(dplyr)
    library(purrr)
    
    df = data.frame(x = seq(1, 20, 2),
                    y = c('a', 'a', 'b', 'c', 'a', 'a', 'b', 'c', 'a', 'a'),
                    z = c('d', 'e', 'e', 'd', 'f', 'e', 'e', 'd', 'e', 'f') )
    stage1 = data.frame(xx = c(2, 3, 4, 5, 7, 8, 9) )
    stage2 = data.frame(xx = c(3, 5, 7, 8, 9) )
    stage3 = data.frame(xx = c(2, 3, 6, 8))
    stage4 = data.frame(xx = c(1, 3, 6))
    
    tbls <- map(lst(stage1 , stage2 ,stage3 ,stage4 ), 
                ~ inner_join(df, .x, by = c("x" = "xx")) %>%      
                  count(y, name = 'Count'))
    
    results <- cbind(seq(1, length(tbls), by = 2),
                     seq(2, length(tbls), by = 2)) |> 
      apply(1, function(x) {
        result <- list(test_result = NA, 
                       table_idx = NA)
        result$table_idx <- c(x[ 1 ], x[ 2 ])
        test_result <- chisq.test(tbls[[ x[ 1 ] ]]$Count, 
                                  tbls[[ x[ 2 ] ]]$Count, 
                                  correct = FALSE) |>
          try()
        if ('try-error' %in% class(test_result)) {
          return(result)
        }
        result$test_result <- test_result 
        return(result)
      })
    
    print(results)