I'm trying to plot a box-violin plot in ggplot2 but I can't seem to find a way to ignore outliers in geom_violin
which in geom_boxplot
is taken care of by outlier.shape = NA
. As a result the tails of the violin extend all the way to the top of the y-axis.
Here's my data:
> dput(data)
structure(list(Group = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B"), Type = c("1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2"), Value = c(1245.2261, 2886.96, 3572.6615,
2011.1111, 3321.2025, 229.5533, 14.1449, 135.291, 54.4526, 36.0926,
74.5434, 86.335, 131.4279, 105.4935, 14.5906, 1.503, 2.7716,
42.381, 88.9701, 869.6742, 316.855, 32.9683, 6.4267, 52.2946,
164.4073, 54.2387, 37.5134, 71.2792, 145.958, 114.6187, 36.133,
1.8108, 67.9746, 39.8386, 382.5043, 40.1728, 37.1252, 288.6866,
25.085, 21.8553, 15.0067, 143.8127, 16.8865, 26.8421, 8.8349,
188.1872, 42.2323, 64.2163, 56.7453, 85.4888, 29.6905, 6.1148,
43.0328, 158.0811, 90.4613, 217.033, 111.5344, 271.5655, 195.7022,
79.7093, 6.0458, 116.6274, 43.6644, 72.4189, 89.9063, 37.6572,
294.5133, 46.8855, 16.7959, 50.4155, 39.6882, 18.7457, 12.728,
40.2756, 129.6219, 190.0905, 796.7611, 30.1724, 14.8736, 551.2666,
18.2315, 57.9076, 129.7094, 158.1109, 256.6553, 79.6724, 75.2056,
7.2661, 18.7643, 79.4748, 445.5713, 9.9553, 106.6388, 50.0596,
56.4002, 157.1143, 9.805, 117.2691, 8.9047, 3.6258, 387.132,
56.8996, 40.7247, 1117.4439, 79.4742, 224.0688, 134.8485, 8.4794,
23.1996, 65.2439, 389.3144, 294.4159, 671.4736, 541.8969, 64.3243,
25.0634, 7.727, 20.8132, 149.3634, 160.7447, 114.1869, 38.4615,
28.502, 34.0532, 15.0038, 1028.626, 166.3813, 24.7788, 306.6516,
204.0348, 18.1818, 77.4041, 24.1017, 96.4706, 59.4937, 23.2078,
3.192, 37.8065, 40.8055, 8.3577, 7.4273, 66.426, 1548.8338, 3.6242,
92.264, 42.8195, 282.1101, 104.0848, 42.5784, 9.9258, 63.8066,
99.6852, 26.5864, 270.322, 121.4097, 32.6258, 287.2582, 7.4627,
851.5289, 156.0563, 324.1189, 101.5936, 5.618, 114.3788, 54.6875,
96.5594, 446.1059, 95.1883, 30.3678, 48.2655, 61.4182, 66.5381,
4.0973, 8.1744, 2.7192, 0.3697, 0.3681, 0.7488, 0, 7.9272, 1.1391,
1.4375, 0.7535, 0.8256, 1.0323, 0.9053, 2.7822, 0.6899, 3.037,
2.423, 0.7045, 6.1298, 1.7498, 10.5565, 0.684, 2.1433, 1.5334,
1.7043, 1.3783, 0.6146, 8.9179, 1.3879, 4.2004, 2.0747, 0.3508,
4.4362, 0.7214, 1.2232, 4.1245, 17.8295, 240.18, 61.0013, 0.813,
69.9786, 0.4346, 1.624, 30.4569, 4.4143, 5.3119, 0.4459, 0, 1.1484,
3.7614, 2.863, 1452.5581, 3.7736, 1.7705, 10.6081, 2743.5433,
6.019, 0.4851, 2.4719, 7.5529, 0, 6.9739, 1.5783, 0, 1.3115,
2.7701, 2.6135, 0, 0.9915, 4.0413, 2.3496, 1.796, 0.8745, 5.6391,
0.2803, 3202.3684, 19.5453, 17.5439, 1.831, 4.1848, 0.7547, 0,
0.5253, 0, 38.07, 0.6656, 5.6184, 0, 1.6858, 0.4801, 0.6676,
3.0412)), row.names = c(NA, -265L), class = c("tbl_df", "tbl",
"data.frame"))
and the code:
ggplot(data = data,
aes(x = Group,
y = Value,
fill = Type)) +
geom_violin(width = 0.5,
scale = "width",
color = "black",
show.legend = FALSE) +
geom_boxplot(position = position_dodge(width = 0.5),
width = 0.1,
color = "black",
lwd = 0.5,
outlier.shape = NA,
show.legend = TRUE) +
scale_fill_manual(name = "Type",
breaks = c("1", "2"),
values = c("1" = "red",
"2" = "forestgreen")) +
stat_compare_means(aes(group = Type),
method = "wilcox.test",
label = "p.signif",
vjust = 0.5,
size = 10) +
scale_y_continuous(breaks = pretty(data$Value, n = 8)) +
xlab("Group") +
ylab("Value") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black"),
panel.background = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text.x = element_text(size = 12,
color = "black",
face = "bold",
vjust = 0.5),
axis.text.y = element_text(size = 12,
color = "black",
face = "bold"),
axis.title = element_text(size = 15,
face = "bold"),
legend.title = element_text(size = 15,
face = "bold")) +
guides(fill = guide_legend(title = "Type",
override.aes = list(size = 10)))
I tried this but it doesn't work for me. I would like to have the tails of the violin restricted to the two ends (minimum/maximum values) of the boxplot.
This is an option. Use the original Value
column for the boxplot & create a new column for the violin with all outliers set to NA. Remember you can always mutate your data to fit your use case!
library(dplyr)
library(ggplot2)
library(ggpubr)
data <- structure(...)
# helper function
replace_outliers <- function(x) {
Q1 <- quantile(x, 0.25)
Q3 <- quantile(x, 0.75)
IQR <- Q3 - Q1
x[(x < Q1 - 1.5 * IQR) | (x > Q3 + 1.5 * IQR)] <- NA
x
}
data %>%
group_by(Group, Type) %>%
mutate(Value_NA = replace_outliers(Value)) %>% # replace outliers with NA
ggplot(aes(x = Group,
fill = Type)) +
geom_violin(aes(y = Value_NA),
width = 0.5,
scale = "width",
color = "black",
show.legend = FALSE) +
geom_boxplot(aes(y = Value),
position = position_dodge(width = 0.5),
width = 0.1,
color = "black",
lwd = 0.5,
outlier.shape = NA,
show.legend = TRUE) +
scale_fill_manual(name = "Type",
breaks = c("1", "2"),
values = c("1" = "red",
"2" = "forestgreen")) +
stat_compare_means(aes(y = Value, group = Type),
method = "wilcox.test",
label = "p.signif",
vjust = 0.5,
size = 10) +
scale_y_continuous(breaks = pretty(data$Value, n = 8)) +
xlab("Group") +
ylab("Value") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black"),
panel.background = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text.x = element_text(size = 12,
color = "black",
face = "bold",
vjust = 0.5),
axis.text.y = element_text(size = 12,
color = "black",
face = "bold"),
axis.title = element_text(size = 15,
face = "bold"),
legend.title = element_text(size = 15,
face = "bold")) +
guides(fill = guide_legend(title = "Type",
override.aes = list(size = 10)))
Created on 2024-05-14 with reprex v2.1.0