I am attempting to plot something like the following where x axis is InEx and ReA SFMC but they are two categories of the variable called "Group", y axis is variable called "n" (frequency) and fill is variable called "CTaa_beta".
My data is as follows:
a<- structure(list(CTaa_beta = c("CASSVAGPNTEAFF", "CASSEGTSGGASTQYF",
"CASSLRQGPSYEQYF", "CASSVGNRGGTDTQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSLEGRERYEQFF", "CASSLLAGGNNEQFF", "CASTPAVRDGNYEQYF",
"CASSSGGLDEQYF", "CASSAGLAGGYEQYF", "CASSSPGTTNEKLFF", "CATSIGGPPYEQYF",
"CASSLSGSPAYGYTF", "CASSEGHRGGTDTQYF", "CASSLRDSHYEQYF", "CASSLGGSSYEQYF",
"CASSYPTSGANVLTF", "CASSRQAEAFF", "CASSLLQGPSSPLHF", "CASSLRDGHYGYTF",
"CASSQWMYSPNGYTF", "CASSQYRGTEAFF", "CAWSRPLGYTF", "CSASFGDGGEGETQYF",
"CSARVPTSGDYNEQFF", "CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CASSLVGAGANVLTF",
"CSVEDPSSGSYEQYF", "CASSGTGTGASGNEQFF", "CASSVGLFSTDTQYF", "CASSPLQGPSQPQHF",
"CASSFGTENTGELFF", "CASSEVGGSMETQYF", "CATSGRGDEVGELFF", "CASSSHYRGGTDTQYF",
"CASSPDRTGEANNEQFF", "CASSVGLYSTDTQYF", "CASGLVQQGGTEAFF", "CASGPGLQQTYGYTF",
"CASGGTGPYNEQFF", "CASSTDISSYNEQFF", "CASSLAKAGTGGEKLFF", "CASSQAKGGGETQYF",
"CASSLRGGPYNEQFF", "CASSLLPGLAGAGNEQFF", "CSARAGGWGTDTQYF", "CASSPGSSGSETQYF"
), Group = structure(c(4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
7L, 7L, 7L, 4L, 4L, 4L, 7L, 7L, 7L, 4L, 7L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L), levels = c("HC PBMC", "axSpA PBMC",
"axSpA SFMC", "InEx", "PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"),
n = c(303L, 292L, 200L, 163L, 125L, 99L, 96L, 94L, 89L, 85L,
80L, 67L, 66L, 62L, 59L, 57L, 54L, 53L, 53L, 52L, 52L, 51L,
50L, 49L, 48L, 48L, 48L, 46L, 45L, 44L, 43L, 41L, 41L, 40L,
36L, 35L, 35L, 34L, 33L, 32L, 32L, 31L, 31L, 30L, 29L, 29L,
29L, 28L, 27L, 27L)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
My codes for plotting:
ggplot(data = a,
aes(axis1 = Group=="InEx", axis2 = Group=="ReA SFMC", y = n)) +
geom_alluvium(aes(fill = CTaa_beta)) +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")
Essentially, I am hoping to see which CTaa_beta are shared between groups InEx and ReA SFMC and but how much (which would be indicated by thickness of the alluvial plot). Any help would be great!
Your data appears to be in a "long" format, so you can use the Nodes format in ggalluvial
, with the Group
variable as your key.
library(ggalluvial)
a |>
ggplot(aes(x = Group,
y = n,
alluvium = CTaa_beta,
stratum = CTaa_beta,
fill = CTaa_beta)) +
geom_flow() +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")
The shared sample data doesn't have any amino acid sequences with data for more than 1 group, so no alluvials are drawn. I've made up some data that hopefully helps illustrate how to create this plot. I dropped the peptide labels from this example.
set.seed(123)
b <- data.frame(
CTaa_beta = rep(c("CASSVAGPNTEAFF", "CASSEGTSGGASTQYF",
"CASSLRQGPSYEQYF", "CASSVGNRGGTDTQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSLEGRERYEQFF", "CASSLLAGGNNEQFF", "CASTPAVRDGNYEQYF"
), each = 7),
Group = as.factor(rep(c("HC PBMC", "axSpA PBMC",
"axSpA SFMC", "InEx", "PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), 10)),
n = sample(0:100, 70, replace = TRUE)
)
b |>
ggplot(aes(x = Group,
y = n,
alluvium = CTaa_beta,
stratum = CTaa_beta,
fill = CTaa_beta)) +
geom_flow() +
geom_stratum() +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")