Programmatically select/run group comparisons (incl post-hoc)
I would like to perform a loop in r, where if the p value of a shaprio-wilk test is less than 0.05, It performs a kruskall-wallis test, but if it is greater than 0.05, a one-way ANOVA is performed. Extending this, if the result of the kruskall-wallis or one-way ANOVA p, value i s < 0.05, a post-hoc test is performed. Would this be possible in r?
#Example columns
Label <- c("blue", "red", "green", "blue", "red", "green","blue", "red", "green","blue", "red", "green","blue", "red", "green")
n <- c(10, 223, 12890, 34, 78, 1902, 34, 211, 1007,209, 330, 90446, 801, 1029, 9011)
#make example data frame
data <- data.frame(Label,n)
#shapiro-wilk test for normality
shapiro.test(data$n)
#if p-value for shapiro-wilk is >0.05, perform one-way ANOVA
OneWay <- lm(n ~ Label, data = data)
anova(OneWay)
#if one-way ANOVA is < 0.05, perform post-hoc
library(mosaic)
TukeyHSD(OneWay)
#If shapiro-wilk < 0.05, perform Krushkall-wallis
kruskal <- kruskal.test(data$n, data$Label) #dependent variable followed by the category (predictor) variable
#if krushkall-wallis is < 0.05, perform post-hoc
library(mosaic)
TukeyHSD(kruskal)
Thanks to the above answers, particularly @TarJae, who helped get the ball rolling. For transparency and to hopefully help others, I am posting the resulting code that filters whether data is normally distributed, and has an equal variance. If either of these conditions are violated, the appropriate ANOVA/Welch ANOVA/Kruskal-Wallis test is performed. It produces the resulting post-hoc tests as well.
#Example columns
Label <- c("blue", "red", "green", "blue", "red", "green","blue", "red", "green","blue", "red", "green","blue", "red", "green")
n <- c(10, 223, 12890, 34, 78, 1902, 34, 211, 1007,209, 330, 90446, 801, 1029, 9011) #use this data to demonstrate non-parametric test
n <- rnorm(15, mean = 30, sd = 1) #use this data to demonstrate normal data
#make example data frame
data <- data.frame(Label,n)
# ################# following statements follow this pattern
# if(condition1) {
# if(condition2) {
# code if both pass
# } else {
# code if 1 passes, 2 fails
# }
# } else {
# code if 1 fails
# }
# ##########
shapiro <- shapiro.test(data$n)
#Homogeneity of variance testing
#If it is normally distributed perform bartlett test
if(shapiro$p.value > 0.05) {
library(tidyverse)
bart <- bartlett.test(n ~ Label, data=data)
}
#If it is normally distributed and homogeneity of variance is equal, perform one-way ANOVA. If normally distributed, but unequal variance, perform Welch ANOVA, if it is not normally distributed, perform Fligner-Killeen’s homogeneity of variance test which is more appropriate for non-normal data
if ((shapiro$p.value > 0.05) && exists("bart")){
if(bart$p.value > 0.05) {
OneWay <- lm(n ~ Label, data = data)
oneway <- anova(OneWay) #normal distibution and equal varaince
} else {
welch <- oneway.test(n ~ Label, data = data) #Welch ANOVA, normal distribution, unequal variance
}
} else {
fligner <- fligner.test(n ~ Label, data = data)
#if not normal distribution, perform fligner-killen homogeneity of variance test
}
#If data is normally distributed, and Fligner homogeneity is equal, perform one-way ANOVA. If data is normally distributed but unequal variance, perform Welch ANOVA, if not normal distribution perform kruskal wallis test.
if((shapiro$p.value > 0.05) && exists("fligner")) {
if(fligner$p.value < 0.05) {
welch <- oneway.test(n ~ Label, data = data) #Welch ANOVA
welch
} else {
OneWay <- lm(n ~ Label, data = data)
oneway <- anova(OneWay)
}
} else {
kruskal <- kruskal.test(n ~ Label, data = data )
}
#post hoc testing for appropriate comparison tests
if(exists("oneway")) {
f <- summary(OneWay)$fstatistic
oneway.pvalue <- unname(pf(f[1],f[2],f[3],lower.tail=F))
oneway.pvalue
}
if(exists("oneway.pvalue") && oneway.pvalue< 0.05){
oneway_ph <- TukeyHSD(oneway)
print(oneway_ph)
}
if(exists("welch") && welch$p.value <0.05) {
welch_ph <- pairwise.t.test(data$n, data$Label, p.adjust.method = "bonferroni")
print(welch_ph)
}
if(exists("kruskal") && kruskal$p.value < 0.05) {
library(FSA)
kruskal_ph <- dunnTest(n ~ as.factor(Label),
data=data,
method="bonferroni")
print(kruskal_ph)
}