I'm stuck with adding group comparisons to a facetted plot.
Since adding stat_compare_means() to a ggline plot, that has numeric.x.axis = TRUE as an argument doesn't work, I had to work around with geom_text() and compare_means() within as described in this post.
As a result I was able to produce a simple plot with this approach. However, when I want to facet in ggline, I get the same comparison in both facets, and I have no idea how to solve this.
Can anybody help me, to produce the right comparisons between "steatosis" groups in the separate facets of "disease_group"?
library(ggpubr)
library(tidyverse)
#create df
set.seed(123)
num_participants <- 60
timepoints <- c(0, 30, 60, 120, 180)
meal_test_data <- data.frame(
id = rep(1:num_participants, each = length(timepoints)),
disease_group = as.factor(rep(c("0", "2"), each = num_participants / 2)),
MMT_TIMEPOINT = rep(c(0, 30, 60, 120, 180), times = 60),
Metabolite = pmax(rnorm(num_participants * length(timepoints), mean = 30, sd = 20), 0)
) %>%
group_by(id, disease_group) %>%
mutate(
steatosis_percent = ifelse(disease_group == "2", runif(1, 2, 30), runif(1, 0, 10)),
steatosis = if_else(steatosis_percent < 5.6, "no_Steatosis", "Steatosis")
) %>%
ungroup()
#plot
meal_test_data %>%
ggline(x="MMT_TIMEPOINT",
y="Metabolite",
facet.by = "disease_group",
add=c("mean_sd"),
add.params = list(width=3),
color="steatosis",
numeric.x.axis = TRUE,
pal="npc") +
geom_jitter(aes(color = steatosis), alpha=0.2, width=6) +
ylim(0,maximum_plus5) +
#readjust scales
scale_x_continuous(breaks = c(0,30,60,120,180)) +
#add geom_text() because stat_compare_means not working with ggline that has
#a numeric axis...
geom_text(aes(x = MMT_TIMEPOINT, y=rep(maximum_plus3, 10), label = p.signif),
color = "black", inherit.aes = FALSE,
data = meal_test_data %>%
nest(data = -MMT_TIMEPOINT) %>%
mutate(data2 = map(data, ~ compare_means(Metabolite ~ steatosis, .x,
ref.group = "no_Steatosis",
method = "wilcox.test",
symnum.args =
list(cutpoints = c(0, 0.0001, 0.001,
0.01, 0.05, 0.1, Inf),
symbols = c("****", "***", "**",
"*", ".", "ns"))))) %>%
unnest(data2))
Plot so far (but I think it has the same statistics for each facet):
The dataset created for geom_text
only has the mean comparisons for the steatosis groups at each time point, when you also need seperate comparisons of the means at each time point for each disease group. Adding disease group as a nesting variable seems to do the trick! Although I would be wary of this kind of comparison for repeated measures data.
library(ggpubr)
library(tidyverse)
#create df
set.seed(123)
num_participants <- 60
timepoints <- c(0, 30, 60, 120, 180)
meal_test_data <- data.frame(
id = rep(1:num_participants, each = length(timepoints)),
disease_group = as.factor(rep(c("0", "2"), each = num_participants / 2)),
MMT_TIMEPOINT = rep(c(0, 30, 60, 120, 180), times = 60),
Metabolite = pmax(rnorm(num_participants * length(timepoints), mean = 30, sd = 20), 0)
) %>%
group_by(id, disease_group) %>%
mutate(
steatosis_percent = ifelse(disease_group == "2", runif(1, 2, 30), runif(1, 0, 10)),
steatosis = if_else(steatosis_percent < 5.6, "no_Steatosis", "Steatosis")
) %>%
ungroup()
maximum_plus5 <- max(meal_test_data$Metabolite) + 5
maximum_plus3 <- max(meal_test_data$Metabolite) + 3
#plot
meal_test_data %>%
ggline(x="MMT_TIMEPOINT",
y="Metabolite",
facet.by = "disease_group",
add=c("mean_sd"),
add.params = list(width=3),
color="steatosis",
numeric.x.axis = TRUE,
pal="npc") +
geom_jitter(aes(color = steatosis), alpha=0.2, width=6) +
ylim(0,maximum_plus5) +
#readjust scales
scale_x_continuous(breaks = c(0,30,60,120,180)) +
#add geom_text() because stat_compare_means not working with ggline that has
#a numeric axis...
geom_text(aes(x = MMT_TIMEPOINT, y=rep(maximum_plus3, 10), label = p.signif),
color = "black", inherit.aes = FALSE,
data = meal_test_data %>%
nest(data = -c(MMT_TIMEPOINT, disease_group)) %>%
mutate(data2 = map(data, ~ compare_means(Metabolite ~ steatosis, .x,
ref.group = "no_Steatosis",
method = "wilcox.test",
symnum.args =
list(cutpoints = c(0, 0.0001, 0.001,
0.01, 0.05, 0.1, Inf),
symbols = c("****", "***", "**",
"*", ".", "ns"))))) %>%
unnest(data2))
So now the data for geom_text looks like this:
> data
# A tibble: 10 × 11
disease_group MMT_TIMEPOINT data .y. group1 group2 p p.adj p.format p.signif method
<fct> <dbl> <list> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
1 0 0 <tibble> Metabol… no_St… Steat… 0.621 0.62 0.62 ns Wilco…
2 0 30 <tibble> Metabol… no_St… Steat… 0.229 0.23 0.23 ns Wilco…
3 0 60 <tibble> Metabol… no_St… Steat… 1 1 1 ns Wilco…
4 0 120 <tibble> Metabol… no_St… Steat… 0.408 0.41 0.41 ns Wilco…
5 0 180 <tibble> Metabol… no_St… Steat… 0.336 0.34 0.34 ns Wilco…
6 2 0 <tibble> Metabol… no_St… Steat… 0.374 0.37 0.37 ns Wilco…
7 2 30 <tibble> Metabol… no_St… Steat… 0.980 0.98 0.98 ns Wilco…
8 2 60 <tibble> Metabol… no_St… Steat… 0.00758 0.0076 0.0076 ** Wilco…
9 2 120 <tibble> Metabol… no_St… Steat… 0.347 0.35 0.35 ns Wilco…
10 2 180 <tibble> Metabol… no_St… Steat… 0.296 0.3 0.3 ns Wilco…
instead of what we had before:
> data
# A tibble: 5 × 10
MMT_TIMEPOINT data .y. group1 group2 p p.adj p.format p.signif method
<dbl> <list> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
1 0 <tibble [60 × 5]> Metabolite no_Steatosis Steat… 0.494 0.49 0.49 ns Wilco…
2 30 <tibble [60 × 5]> Metabolite no_Steatosis Steat… 0.164 0.16 0.16 ns Wilco…
3 60 <tibble [60 × 5]> Metabolite no_Steatosis Steat… 0.131 0.13 0.13 ns Wilco…
4 120 <tibble [60 × 5]> Metabolite no_Steatosis Steat… 0.813 0.81 0.81 ns Wilco…
5 180 <tibble [60 × 5]> Metabolite no_Steatosis Steat… 0.0921 0.092 0.092 . Wilco…