rkruskal-wallis

Add kruskall-wallis and paiwise comparisons test outputs to dataframe


I have several boxplots that I am running some statistics on through a for loop.

I want to be able to output the data into a dataframe. i.e. something like this (I've made up random numbers):

For the Kruskal-Wallis:

enter image description here

For the pairwise comparisons:

enter image description here

This is my code:

names <- c('B1','B2','B3')

for (name in names){
  mereLoc <- scaledDaily %>%
    filter(scaledDaily$loc == name)
  mereLoc$year <- as.numeric(mereLoc$year)
  median <- tapply(mereLoc$mAODscale, mereLoc$year, median, na.rm=T)
  print(name)
  print(median)

  krusk <- kruskal.test(mAODscale~year, data=mereLoc)
  print(krusk)

  pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
  print(pairs)
}

Test Data

here is some test data. its not my actual data, I've just fabricated some.

structure(list(loc = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L), levels = c("B1", "B2", "B3"), class = "factor"), 
    mAODscale = c(0.561948848, 0.851883432, 0.293151829, 0.683807808, 
    0.864472706, 0.380303934, 0.324501709, 0.765022304, 0.900772901, 
    0.204731715, 0.877104175, 0.56367162, 0.206528162, 0.353350116, 
    0.219628257, 0.840723901, 0.716389918, 0.569798858, 0.707583441, 
    0.120064246, 0.275325307, 0.438391155, 0.308969668, 0.350156436, 
    0.886955315, 0.693416677, 0.710065022), year = structure(c(1L, 
    1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 
    3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), levels = c("1", 
    "2", "3"), class = "factor")), row.names = c(NA, -27L), class = "data.frame")

Solution

  • Here is my solution:

    names <- c('B1','B2','B3','B4','B5','B6','B7','B8',
               'SSSI1','SSSI2','SSSI3',
               "CM1", "CM2", "CM3", "CM4", "CM7", "CM8", "CM9", "CM11",
               "Crose 1", "Crose 2", "Crose 3")
    
    stats <- list()
    for (name in names){
      mereLoc <- scaledDaily %>%
        filter(scaledDaily$loc == name)
      mereLoc$year <- as.numeric(mereLoc$year)
      median <- aggregate(mereLoc$mAODscale, list(mereLoc$year), median, na.rm=T)
      
      krusk <- kruskal.test(mAODscale~year, data=mereLoc)
      krusk2 <- tidy(krusk)
      pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
      pairs2 <- tidy(pairs)
    
      stats[[name]] <- c("loc"=name, 
                         "Y1_median"=median$x[1], 
                         "Y2_median"=median$x[2],
                         "Y3_median"=median$x[3], 
                         "KWCHiSq"=krusk$statistic,
                         "df"=krusk$parameter,
                         "pvalue"=krusk$p.value, 
                         "Y1Y2"=pairs2[1,3], 
                         "Y1Y3"=pairs2[2,3],  
                         "Y2Y3"=pairs2[3,3])
    }
    
    stats <- Reduce(function(x,y) merge(x,y,all=TRUE), stats)
    names(stats) <- c('loc','Y1_median','Y2_median','Y3_median',
                      'KW-ChiSq','df','pvalue','Y1-Y2','Y1-Y3','Y2-Y3')