I'm struggling to create a series of high-quality ggboxplots, like the one I attach as follows:
Statistics for post-hocs comparisons have been obtained for the example above in the way you could find following this link page and I've run the following code
#Compute the post-hocs
postHocs <- df %>%
tidyr::pivot_longer(., -c(A, C, D),'s')%>%
mutate(s = fct_relevel(s,
c("E", "F", "G",
"H", "I", "J",
"K", "L", "M",
"N", "O", "P")) %>%
arrange(s) %>%
group_by(s) %>%
pairwise_t_test(
value ~ D, paired = TRUE,
p.adjust.method = "bonferroni"
) %>%
#dplyr::select(., -'s')%>%
print()
while the Anova statistics have been obtained:
res.aov <- df %>%
tidyr::pivot_longer(., -c(A, C, D),'s')%>%
mutate(s = fct_relevel(s,c("E", "F", "G",
"H", "I", "J",
"K", "L", "M",
"N", "O", "P")
)))%>%
arrange(s) %>%
group_by(s) %>%
anova_test(., value ~ D, dv = value, wid = A, within = D)%>%
print()
from which I've tried to obtain the interested statistics with this code:
unnested_aov <- lapply(res.aov$anova, `[[`, 1)
I scripted this for loop to adding the statistics to graphs
comparisons <- postHocs %>% add_xy_position(x = "D")
comparisons
for (i in 5:ncol(df)) {
bxp <- ggboxplot(df,
x = 'D', y = colnames(df[i]))
print(
bxp + stat_pvalue_manual(comparisons[,i]) +
labs(
subtitle = get_test_label(unnested_aov[[i]], detailed = TRUE),
caption = get_pwc_label(comparisons[,i])))
Sys.sleep(1)
}
Since I'm getting back some error, I thing that the problem is that 'comparison' contains 36 rows and length does not fit well the number of the graph I should obtain (12).
I'll let you here the code below and I'll be open to your best suggestion in this concern.
Thank you
structure(list(A = 1:20, C = c("Maybe", "Maybe", "Maybe", "Maybe",
"Maybe", "Maybe", "Maybe", "Maybe", "Maybe", "Maybe", "Maybe",
"Maybe", "Maybe", "Maybe", "Maybe", "Maybe", "Maybe", "Maybe",
"Maybe", "Maybe"), D = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L), .Label = c("new_value_for_8",
"new_value_for_6", "new_value_for_4"), class = "factor"), E = c(988.368784828308,
988.856158671407, 996.004085290553, 999.685844324618, 1000.23888564896,
1005.03749946898, 999.786378084971, 997.039675082569, 998.028313183065,
997.168905747014, 1001.09291198164, 993.307008354785, 1004.23849942428,
1002.98988896299, 1003.55106999008, 1009.57481668809, 1005.41677956183,
1001.70676077155, 993.869639239065, 997.170442654021), F = c(994.834756009939,
994.468875098246, 1000.62150212342, 1002.23100741241, 1003.96990710863,
1007.75899775608, 998.699806256246, 996.401009591011, 998.076594704249,
1002.19344184533, 1005.87900720863, 994.076210622421, 1002.44958531768,
1003.10043497883, 1001.65779442628, 1013.71182331817, 1006.86178446511,
1005.31481098188, 995.867593313022, 1000.16218228559), G = c(1011.88022669726,
1012.10534266625, 1012.9554415821, 1015.09810043606, 1015.40462298842,
1016.67103699915, 1003.13771453335, 999.9107434841, 1002.15365554737,
1013.67789244066, 1014.38627383064, 1006.86762877786, 1007.47946451329,
1008.93405130319, 1008.45962311068, 1023.4166601996, 1015.18680921429,
1009.97183712754, 1006.2675210718, 1010.14219845841), H = c(988.221495702721,
990.850727928741, 992.418094914622, 995.984841639886, 993.398346143465,
997.971380355398, 1004.4672957051, 1002.54036572775, 1002.2292388993,
999.116379988893, 997.364309124077, 997.937032776913, 1001.14544537612,
1002.08056674659, 1000.0422658299, 1013.29862597967, 1005.06669915366,
1003.93467692475, 1000.02290694207, 1004.31923128858), I = c(994.035709684742,
994.890815628412, 997.18267770374, 998.564426335124, 996.851278420874,
1000.16039368502, 1003.52155765272, 1002.1043798945, 1002.7069399281,
1005.49897156208, 1005.81171180245, 998.62698748611, 999.56563615154,
1002.87987510596, 998.728473297166, 1017.2093269366, 1007.79412746756,
1008.11964589961, 1004.9525336386, 1009.50695673265), J = c(1008.23981597718,
1009.51261484649, 1009.42367409926, 1005.06332653216, 1005.02619159395,
1009.07903916629, 1007.56089165218, 1005.49719893791, 1004.91476855238,
1013.03209535721, 1010.84145164945, 1005.86927622259, 1003.25309970443,
1004.68478802971, 1002.71096740085, 1025.56743956652, 1016.32418136177,
1013.09901927997, 1011.92002817369, 1014.69013052771), K = c(994.327042030287,
995.608170991922, 997.033470393412, 1000.15918365269, 998.216388150646,
1001.97377908784, 1003.17401220482, 1001.60211665164, 1002.27932356239,
1002.41479226363, 999.832076213262, 1001.37236796086, 1001.17012593697,
1001.40362599894, 999.964771265342, 1012.75282463779, 1008.65746780516,
1005.290878105, 999.464067607865, 1005.14963479715), L = c(999.225538268699,
999.349990537239, 1001.14010250645, 1001.51403741206, 1000.25571835554,
1003.76051565494, 1002.74763442988, 1001.09116707486, 1003.29833843754,
1006.55857216695, 1007.06029312947, 1000.60539548502, 999.637387760292,
1002.72729847885, 998.034039799405, 1016.5065564384, 1009.68783611392,
1009.47863905986, 1003.56318544047, 1009.23934223585), M = c(1009.99385579756,
1011.12126521731, 1010.6989716872, 1003.7899021821, 1004.59413830322,
1008.52123662618, 1006.34418311104, 1004.1077131243, 1004.94124365003,
1011.89121961563, 1010.13326381032, 1005.33467168056, 1001.04545904874,
1002.86650202467, 1000.45601490752, 1022.13789464831, 1016.25544969107,
1012.37379951646, 1008.53744587416, 1012.00856171947), N = c(999.801263745036,
996.838989582336, 1000.89599227983, 1003.11042068113, 1002.27800090558,
1003.83846437952, 1000.70169995102, 1001.75290674649, 998.660833714301,
1006.69246804854, 1004.7636391085, 1005.63873342951, 1001.37744267414,
1000.97339668679, 1001.98775658049, 1004.70492544978, 1012.11738595707,
1001.0458886613, 996.725751886115, 1003.17906097432), O = c(1002.96437294923,
997.870867692911, 1002.94619035116, 1003.44844607015, 1003.02403433836,
1004.70457675466, 999.880559826981, 1000.66826545719, 999.59436981446,
1007.32640154038, 1006.7506344557, 1001.59973104217, 1000.71689406196,
1001.15587576193, 999.988638552344, 1006.4489695839, 1010.51785193511,
1003.79329103591, 999.118472788132, 1004.99936090838), P = c(1006.28027312932,
1005.24535230967, 1007.68162285336, 1001.08242973466, 1002.99896314,
1005.36085942954, 1001.22060069797, 1000.43007709819, 1000.47666761108,
1008.73650967215, 1007.20593389744, 1004.57722295264, 998.66379615346,
998.711140983915, 999.452420534917, 1008.11715753014, 1013.30601537204,
1002.03237948844, 1002.88799699943, 1005.57921718108)), row.names = c(NA,
20L), class = "data.frame")
This takes a bit more work.
Let's start with data preparation
#Loading libraries
library(tidyverse)
library(rstatix)
library(ggpubr)
library(readxl)
#Upload data
df_join <- read_excel("df_join.xlsx")
df = df_join %>%
mutate_at(vars(ID:COND), factor) %>%
pivot_longer(P3FCz:LPP2Pz, names_to = "signals") %>%
group_by(signals) %>%
nest()
#Let's see what we have here
df
df$data[[1]]
output
# A tibble: 12 x 2
# Groups: signals [12]
signals data
<chr> <list>
1 P3FCz <tibble [75 x 5]>
2 P3Cz <tibble [75 x 5]>
3 P3Pz <tibble [75 x 5]>
4 LPPearlyFCz <tibble [75 x 5]>
5 LPPearlyCz <tibble [75 x 5]>
6 LPPearlyPz <tibble [75 x 5]>
7 LPP1FCz <tibble [75 x 5]>
8 LPP1Cz <tibble [75 x 5]>
9 LPP1Pz <tibble [75 x 5]>
10 LPP2FCz <tibble [75 x 5]>
11 LPP2Cz <tibble [75 x 5]>
12 LPP2Pz <tibble [75 x 5]>
> df$data[[1]]
# A tibble: 75 x 5
ID GR SES COND value
<fct> <fct> <fct> <fct> <dbl>
1 01 RP V NEG-CTR -11.6
2 01 RP V NEG-NOC -11.1
3 01 RP V NEU-NOC -4.00
4 04 RP V NEG-CTR -0.314
5 04 RP V NEG-NOC 0.239
6 04 RP V NEU-NOC 5.04
7 06 RP V NEG-CTR -0.214
8 06 RP V NEG-NOC -2.96
9 06 RP V NEU-NOC -1.97
10 07 RP V NEG-CTR -2.83
# ... with 65 more rows
Now we have to prepare functions that will return us some statistics
#Preparation of functions for statistics
ShapiroTest = function(d, alpha=0.05){
st = list(statistic = NA, p.value = NA, test = FALSE)
if(length(d)>5000) d=sample(d, 5000)
if(length(d)>=3 & length(d)<=5000){
sw = shapiro.test(d)
st$statistic = sw$statistic
st$p.value = sw$p.value
st$test = sw$p.value>alpha
}
return(st)
}
sum_stats = function(data, group, value, alpha=0.05)data %>%
group_by(!!enquo(group)) %>%
summarise(
n = n(),
q1 = quantile(!!enquo(value),1/4,8),
min = min(!!enquo(value)),
mean = mean(!!enquo(value)),
median = median(!!enquo(value)),
q3 = quantile(!!enquo(value),3/4,8),
max = max(!!enquo(value)),
sd = sd(!!enquo(value)),
kurtosis = e1071::kurtosis(!!enquo(value)),
skewness = e1071::skewness(!!enquo(value)),
SW.stat = ShapiroTest(!!enquo(value), alpha)$statistic,
SW.p = ShapiroTest(!!enquo(value), alpha)$p.value,
SW.test = ShapiroTest(!!enquo(value), alpha)$test,
nout = length(boxplot.stats(!!enquo(value))$out)
)
#Using the sum stats function
df$data[[1]] %>% sum_stats(COND, value)
df %>% group_by(signals) %>%
mutate(stats = map(data, ~sum_stats(.x, COND, value))) %>%
unnest(stats)
output
# A tibble: 36 x 17
# Groups: signals [12]
signals data COND n q1 min mean median q3 max sd kurtosis skewness SW.stat SW.p SW.test nout
<chr> <list> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <int>
1 P3FCz <tibble [7~ NEG-~ 25 -5.73 -11.6 -1.59 -1.83 1.86 9.57 4.93 -0.612 0.128 0.984 0.951 TRUE 0
2 P3FCz <tibble [7~ NEG-~ 25 -4.14 -11.1 -1.39 -0.522 0.659 8.16 4.44 -0.169 -0.211 0.978 0.842 TRUE 1
3 P3FCz <tibble [7~ NEU-~ 25 -5.35 -6.97 -1.48 -1.97 1.89 6.24 4.00 -1.28 0.189 0.943 0.174 TRUE 0
4 P3Cz <tibble [7~ NEG-~ 25 -2.76 -8.16 0.313 0.906 2.45 13.7 4.58 0.867 0.622 0.952 0.277 TRUE 1
5 P3Cz <tibble [7~ NEG-~ 25 -2.22 -9.23 0.739 0.545 3.10 14.5 4.78 1.05 0.527 0.963 0.477 TRUE 1
6 P3Cz <tibble [7~ NEU-~ 25 -4.14 -6.07 -0.107 0.622 2.16 7.76 3.88 -1.09 0.0112 0.957 0.359 TRUE 0
7 P3Pz <tibble [7~ NEG-~ 25 5.66 -0.856 8.79 7.48 11.9 23.4 5.22 0.502 0.673 0.961 0.436 TRUE 1
8 P3Pz <tibble [7~ NEG-~ 25 3.86 -0.888 8.45 7.88 11.2 20.7 5.23 -0.530 0.206 0.982 0.924 TRUE 0
9 P3Pz <tibble [7~ NEU-~ 25 2.30 -0.932 6.43 6.82 8.46 16.7 4.60 -0.740 0.373 0.968 0.588 TRUE 0
10 LPPearlyFCz <tibble [7~ NEG-~ 25 -4.02 -11.8 -0.666 0.149 1.90 13.3 5.02 0.916 0.202 0.947 0.215 TRUE 1
# ... with 26 more rows
Next we need to analyze the QQ charts. To do this, let's prepare the appropriate functions.
#function that creates a QQ plot
SignalQQPlot = function(data, signal, autor = "G. Anonim") data %>%
group_by(COND) %>%
mutate(label = paste("\nSW p-value:" ,
signif(shapiro.test(value)$p.value, 3))) %>%
ggqqplot("value", facet.by = "COND") %>%
ggpar(title = paste("QQ plots for", signal, "signal"),
caption = autor)+
geom_label(aes(x=0, y=+Inf, label=label))
#QQ plot for the P3FCz signal
df$data[[1]] %>% SignalQQPlot("P3FCz")
#A function that creates a QQ plot and adds it to a data frame
AddSignalQQPlot = function(df, signal, printPlot=TRUE) {
plot = SignalQQPlot(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(qqplot = list(plot))
}
#Added QQ charts
df %>% group_by(signals) %>%
group_modify(~AddSignalQQPlot(.x, .y))
Now we will add statistics from ANOVA and tTest
#Add ANOVA test value
fAddANOVA = function(data) data %>%
anova_test(value~COND) %>% as_tibble()
#Method 1
df %>% group_by(signals) %>%
group_modify(~fAddANOVA(.x$data[[1]]))
#Method 2
df %>% group_by(signals) %>%
mutate(ANOVA = map(data, ~fAddANOVA(.x))) %>%
unnest(ANOVA)
output
# A tibble: 12 x 9
# Groups: signals [12]
signals data Effect DFn DFd F p `p<.05` ges
<chr> <list> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 P3FCz <tibble [75 x 5]> COND 2 72 0.012 0.988 "" 0.000338
2 P3Cz <tibble [75 x 5]> COND 2 72 0.228 0.797 "" 0.006
3 P3Pz <tibble [75 x 5]> COND 2 72 1.61 0.207 "" 0.043
4 LPPearlyFCz <tibble [75 x 5]> COND 2 72 0.885 0.417 "" 0.024
5 LPPearlyCz <tibble [75 x 5]> COND 2 72 2.65 0.077 "" 0.069
6 LPPearlyPz <tibble [75 x 5]> COND 2 72 4.62 0.013 "*" 0.114
7 LPP1FCz <tibble [75 x 5]> COND 2 72 1.08 0.344 "" 0.029
8 LPP1Cz <tibble [75 x 5]> COND 2 72 2.45 0.094 "" 0.064
9 LPP1Pz <tibble [75 x 5]> COND 2 72 3.97 0.023 "*" 0.099
10 LPP2FCz <tibble [75 x 5]> COND 2 72 0.103 0.903 "" 0.003
11 LPP2Cz <tibble [75 x 5]> COND 2 72 0.446 0.642 "" 0.012
12 LPP2Pz <tibble [75 x 5]> COND 2 72 1.17 0.316 "" 0.031
And for tTest
#Add pairwise_t_test
fAddtTest = function(data) data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni")
#Method 1
df %>% group_by(signals) %>%
group_modify(~fAddtTest(.x$data[[1]]))
#Method 2
df %>% group_by(signals) %>%
mutate(tTest = map(data, ~fAddtTest(.x))) %>%
unnest(tTest)
output
# A tibble: 36 x 12
# Groups: signals [12]
signals data .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
<chr> <list> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
1 P3FCz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.322 24 0.75 1 ns
2 P3FCz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 -0.178 24 0.86 1 ns
3 P3FCz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 0.112 24 0.911 1 ns
4 P3Cz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.624 24 0.539 1 ns
5 P3Cz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 0.592 24 0.559 1 ns
6 P3Cz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 0.925 24 0.364 1 ns
7 P3Pz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 0.440 24 0.664 1 ns
8 P3Pz <tibble [75 x 5]> value NEG-CTR NEU-NOC 25 25 3.11 24 0.005 0.014 *
9 P3Pz <tibble [75 x 5]> value NEG-NOC NEU-NOC 25 25 2.43 24 0.023 0.069 ns
10 LPPearlyFCz <tibble [75 x 5]> value NEG-CTR NEG-NOC 25 25 -0.0919 24 0.928 1 ns
# ... with 26 more rows
Now is the time to create a boxplot
#Function to special boxplot
SpecBoxplot = function(data, autor = "G. Anonim"){
box = data %>% ggboxplot(x = "COND", y = "value", add = "point")
pwc = data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni") %>%
add_xy_position(x = "COND")
res.aov = data %>% anova_test(value~COND)
data %>%
ggboxplot(x = "COND", y = "value", add = "point") +
stat_pvalue_manual(pwc) +
labs(
title = get_test_label(res.aov, detailed = TRUE),
subtitle = get_pwc_label(pwc),
caption = autor
)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot()
#A function that creates a special boxplot and adds it to a data frame
AddSignalBoxplot = function(df, signal, printPlot=TRUE) {
plot = SpecBoxplot(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(boxplot = list(plot))
}
#Added special boxplot
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot(.x, .y))
Due to the inconsistency with the normal distribution in the modified function, I used the Wilcoxon test for comparison. I also improved the boxplots a bit.
#Function to special boxplot2
SpecBoxplot2 = function(data, signal, autor = "G. Anonim"){
pwc = data %>% pairwise_wilcox_test(value~COND) %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
data %>% ggplot(aes(COND, value, fill=COND))+
geom_violin(alpha=0.2)+
geom_boxplot(outlier.shape = 23,
outlier.size = 3,
alpha=0.6)+
geom_jitter(shape=21, width =0.1)+
stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
labs(title = signal,
subtitle = get_pwc_label(pwc),
caption = autor)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot2("P3FCz")
#A function that creates a special boxplot2 and adds it to a data frame
AddSignalBoxplot2 = function(df, signal, printPlot=TRUE) {
plot = SpecBoxplot2(df$data[[1]], signal)
if(printPlot) print(plot)
df %>% mutate(boxplot = list(plot))
}
#Added special boxplot2
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot2(.x, .y))
Special update fom my frend
Hey @mały_statystyczny! You will be great!
Here is a special update for you. Below is a function that prepares graphs with both parametric and non-parametric statistics.
#Function to special boxplot3
SpecBoxplot3 = function(data, signal, parametric = FALSE, autor = "G. Anonim"){
if(parametric) {
pwc = data %>%
pairwise_t_test(value~COND, paired = TRUE,
p.adjust.method = "bonferroni") %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
res.test = data %>% anova_test(value~COND)
} else {
pwc = data %>% pairwise_wilcox_test(value~COND) %>%
add_xy_position(x = "COND") %>%
mutate(COND="NEG-CTR",
lab = paste(p, " - ", p.adj.signif))
res.test = data %>% kruskal_test(value~COND)
}
data %>% ggplot(aes(COND, value, fill=COND))+
geom_violin(alpha=0.2)+
geom_boxplot(outlier.shape = 23,
outlier.size = 3,
alpha=0.6)+
geom_jitter(shape=21, width =0.1)+
stat_pvalue_manual(pwc, step.increase=0.05, label = "lab")+
ylab(signal)+
labs(title = get_test_label(res.test, detailed = TRUE),
subtitle = get_pwc_label(pwc),
caption = autor)
}
#special boxplot for the P3FCz signal
df$data[[1]] %>% SpecBoxplot3("P3FCz", TRUE)
df$data[[1]] %>% SpecBoxplot3("P3FCz", FALSE)
#A function that creates a special boxplot3 and adds it to a data frame
AddSignalBoxplot3 = function(df, signal, printPlot=TRUE) {
plot1 = SpecBoxplot3(df$data[[1]], signal, TRUE)
plot2 = SpecBoxplot3(df$data[[1]], signal, FALSE)
if(printPlot) print(plot1)
if(printPlot) print(plot2)
df %>% mutate(boxplot1 = list(plot1),
boxplot2 = list(plot2),
)
}
#Added special boxplot3
df %>% group_by(signals) %>%
group_modify(~AddSignalBoxplot3(.x, .y))