I am trying to get some customized boxplot using ggplot2 on this data.
> dput(Family.boxplot)
structure(list(X.Datasets = c(7845L, 7846L, 7847L, 7848L, 7849L,
7866L, 7867L, 7868L, 7869L, 7857L, 7859L, 7875L, 7877L, 7878L,
7879L, 7855L, 7856L, 7858L, 7850L, 7851L, 7852L, 7853L, 7854L,
7870L, 7871L, 7872L, 7873L, 7874L, 7860L, 7861L, 7880L, 7862L,
7863L, 7864L, 7881L, 7882L, 7883L, 7884L), Akkermansiaceae = c(255L,
407L, 736L, 270L, 333L, 137L, 200L, 188L, 474L, 560L, 90L, 788L,
66L, 58L, 157L, 148L, 359L, 162L, 174L, 546L, 270L, 623L, 186L,
457L, 416L, 347L, 1483L, 353L, 597L, 229L, 714L, 409L, 701L,
269L, 860L, 1091L, 2873L, 1536L), Bacteroidaceae = c(992L, 908L,
651L, 171L, 442L, 188L, 596L, 340L, 474L, 268L, 137L, 866L, 687L,
782L, 861L, 332L, 372L, 275L, 945L, 906L, 1068L, 1460L, 546L,
1279L, 2626L, 765L, 1457L, 679L, 1532L, 729L, 1286L, 1460L, 1416L,
1093L, 1818L, 1564L, 663L, 342L), Christensenellaceae = c(0L,
0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L,
0L, 0L, 0L, 0L, 0L), Clostridiaceae = c(33L, 50L, 97L, 91L, 254L,
353L, 159L, 315L, 149L, 139L, 200L, 99L, 101L, 160L, 317L, 240L,
382L, 46L, 122L, 141L, 314L, 87L, 244L, 179L, 115L, 270L, 80L,
168L, 88L, 143L, 120L, 154L, 28L, 93L, 64L, 89L, 30L, 83L), Coriobacteriaceae = c(85L,
264L, 114L, 287L, 77L, 0L, 0L, 97L, 138L, 177L, 91L, 291L, 146L,
122L, 138L, 41L, 0L, 234L, 34L, 123L, 99L, 116L, 63L, 81L, 0L,
97L, 120L, 73L, 162L, 126L, 268L, 146L, 165L, 144L, 221L, 370L,
552L, 482L), Deferribacteraceae = c(68L, 45L, 70L, 163L, 773L,
934L, 43L, 443L, 188L, 88L, 176L, 46L, 65L, 119L, 0L, 195L, 260L,
20L, 67L, 36L, 312L, 0L, 153L, 179L, 343L, 129L, 28L, 58L, 210L,
192L, 82L, 0L, 0L, 130L, 0L, 84L, 0L, 14L), Eggerthellaceae = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 24L, 233L, 0L, 115L, 44L, 0L, 38L,
95L, 71L, 279L, 53L, 224L, 53L, 167L, 79L, 0L, 143L, 0L, 146L,
0L, 98L, 42L, 138L, 121L, 192L, 84L, 255L, 326L, 588L, 469L),
Enterobacteriaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 141L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
Erysipelotrichaceae = c(0L, 12L, 28L, 37L, 39L, 8L, 10L,
7L, 24L, 22L, 12L, 25L, 0L, 0L, 18L, 0L, 25L, 0L, 14L, 21L,
0L, 21L, 0L, 0L, 9L, 0L, 49L, 8L, 0L, 0L, 7L, 0L, 11L, 0L,
16L, 17L, 28L, 10L), Eubacteriaceae = c(91L, 71L, 157L, 35L,
124L, 86L, 148L, 37L, 32L, 487L, 228L, 176L, 223L, 31L, 94L,
149L, 82L, 54L, 78L, 109L, 96L, 47L, 80L, 197L, 256L, 153L,
219L, 25L, 23L, 64L, 69L, 149L, 559L, 27L, 53L, 106L, 32L,
118L), Eubacteriales.Family.XIII..Incertae.Sedis = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 16L, 25L, 0L, 0L,
0L, 10L, 0L, 5L, 13L, 6L, 13L, 0L, 0L, 31L, 0L, 11L, 0L,
10L, 0L, 0L, 11L, 20L, 0L, 20L, 23L, 0L, 0L), Lachnospiraceae = c(744L,
1032L, 2506L, 1161L, 4272L, 5544L, 4230L, 5646L, 2896L, 2312L,
6130L, 1890L, 4315L, 2651L, 3829L, 5143L, 4639L, 1784L, 2701L,
2878L, 3208L, 1822L, 4891L, 3340L, 1423L, 5104L, 1220L, 3319L,
2546L, 4928L, 3637L, 2315L, 815L, 2746L, 1581L, 1750L, 928L,
2125L), Lactobacillaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 57L, 0L, 34L, 0L, 0L, 222L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), Muribaculaceae = c(89L, 121L, 42L, 0L, 80L, 34L,
63L, 119L, 23L, 758L, 768L, 150L, 348L, 204L, 64L, 355L,
620L, 915L, 0L, 18L, 0L, 22L, 0L, 554L, 473L, 0L, 29L, 0L,
0L, 0L, 0L, 170L, 238L, 0L, 0L, 25L, 286L, 86L), Oscillospiraceae = c(282L,
176L, 507L, 133L, 1103L, 1180L, 662L, 809L, 590L, 541L, 736L,
462L, 771L, 795L, 1054L, 906L, 1332L, 197L, 714L, 497L, 1143L,
449L, 918L, 720L, 437L, 972L, 342L, 726L, 790L, 1002L, 444L,
391L, 234L, 919L, 193L, 762L, 121L, 491L), Peptococcaceae = c(0L,
0L, 0L, 0L, 9L, 0L, 0L, 12L, 7L, 0L, 10L, 7L, 0L, 0L, 0L,
11L, 14L, 0L, 0L, 0L, 11L, 7L, 11L, 4L, 0L, 12L, 0L, 0L,
0L, 15L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Peptostreptococcaceae = c(104L,
57L, 421L, 431L, 71L, 42L, 187L, 70L, 400L, 673L, 201L, 383L,
211L, 500L, 325L, 126L, 37L, 420L, 0L, 9L, 7L, 62L, 0L, 0L,
23L, 0L, 73L, 366L, 0L, 29L, 0L, 52L, 117L, 7L, 0L, 0L, 0L,
0L), Spiroplasmataceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 34L,
0L, 41L, 0L, 0L, 82L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Staphylococcaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), Sutterellaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 82L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Tannerellaceae = c(1298L, 673L, 870L, 0L, 275L, 457L,
734L, 647L, 654L, 171L, 116L, 226L, 226L, 1206L, 398L, 482L,
423L, 139L, 975L, 1010L, 1499L, 1347L, 1179L, 409L, 662L,
726L, 1018L, 165L, 1634L, 970L, 1057L, 1123L, 943L, 2329L,
1362L, 1081L, 390L, 378L), Responders = c("NR", "NR", "NR",
"NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR",
"NR", "NR", "CR", "CR", "CR", "NR", "NR", "NR", "NR", "NR",
"NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "CR", "CR",
"CR", "CR", "CR", "CR", "CR"), treatment = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, NA,
NA, NA, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
NA, NA, NA, NA, NA, NA, NA), levels = c("A", "B",
"C", "D"), class = c("ordered", "factor")), Order = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L), treatment.withoutresponse.indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), levels = c("A", "B",
"C", "D"), class = c("ordered", "factor"))), row.names = c(NA,
-38L), class = "data.frame")
But Without having a normal 4 group boxplot, I like to have colours according to the 'Responders' column. But I am not sure if there is a way to do it directly.
The way I am doing is not a straightforward way
#This will give outlined boxplots + jitter
p = ggplot(data = Family.boxplot1, aes(x= treatment.withoutresponse.indicator,
y = Clostridiaceae,
colour = treatment.withoutresponse.indicator))+
geom_boxplot(outlier.shape = NA)+
scale_colour_manual(values = c("blue3","dark green","Dark orange", "brown3" ))+
xlab("") +
ylab("Clostridiaceae")+
geom_jitter(width = 0,size=1.5 ) #jitter width 0
#This will give outlined boxplots + jitter + responder points
new_data <- data.frame(name=c(rep('B',3),rep('D',7)),
value=Family.boxplot$Clostridiaceae[Family.boxplot$Responders=="CR"])
p + geom_jitter(data=new_data,
aes(x=name, y=value, fill=name),
position=position_jitter(0),
color="Misty Rose", pch=20)
#This is if we want to point CR ---Responders
This is the plot resulting my recent code
But ideally I want responders for green box (2nd) and red box (4th) in different colours.
I mean 4 box but 6 colours. I know I can also additionally split and point them. But seems there should be an easy way.
We can use ggh4x::scale_listed
:
library(ggplot2)
library(ggh4x)
ggplot(data = Family.boxplot,
aes(x = treatment.withoutresponse.indicator,
y = Clostridiaceae)) +
geom_boxplot(aes(boxC = treatment.withoutresponse.indicator),
outlier.size = 0) +
geom_point(aes(pointC = Responders),
size = 1.5) +
scale_listed(
list(scale_fill_manual(values = c("blue3", "dark green",
"Dark orange", "brown3"),
aesthetics = "boxC"),
scale_fill_manual(values = c("purple", "deeppink"),
aesthetics = "pointC")),
replaces = c("color", "color")) +
xlab("") +
ylab("Clostridiaceae")
#> Warning in geom_boxplot(aes(boxC = treatment.withoutresponse.indicator), :
#> Ignoring unknown aesthetics: boxC
#> Warning in geom_point(aes(pointC = Responders), size = 1.5): Ignoring unknown
#> aesthetics: pointC
Created on 2024-04-12 with reprex v2.0.2