My ultimate goal is to have alluvial plots in R
. However, my data is big and is not stored in a way that can be used for direct input, I have:
My thought is to use length(intersect())
to get the frequency of each combination of the list of list of vectors.
To get the combination of the list of list, I used cross()
from the purrr
. I am not sure if this is the right way to get this combination matrix, and I am open to other methods. Similarly, I am open to other pipelines that can accomplish the goal. The main reason is that I am stuck at this point and cannot find a good way to find the intersection of the list of list of vectors.
Below is a toy dataset and the expected output I would like to get (I type the result manually):
library(tidyverse)
group1 <- list(module1 = c("test1", "test2", "test3", "test4", "test5", "test6"),
module2 = c("test7", "test8", "test9", "test10"),
module3 = c("test11", "test12", "test13"))
group2 <- list(module1 = c("test3", "test4", "test5", "test7", "test8"),
module2 = c("test1", "test12", "test13"),
module3 = c("test2", "test6", "test11"))
group3 <- list(module1 = c("test3", "test5", "test6", "test8"),
module2 = c("test1", "test7", "test9", "test10"),
module3 = c("test13", "test14", "test15"))
list_combination <- list(group1 = names(group1), group2 = names(group2), group3 = names(group3)) %>%
cross() %>%
bind_rows()
expected_result_intersect <- list_combination %>% #below are the results
mutate(intersect_result = list(
c("test3", "test5"), #intersect(group1_module1, intersect(group2_module1, group3_module1))
c("test8"), #intersect(group1_module2, intersect(group2_module1, group3_module1))
NULL, #intersect(group1_module3, intersect(group2_module1, group3_module1))
NULL, #intersect(group1_module1, intersect(group2_module2, group3_module1))
NULL, #intersect(group1_module2, intersect(group2_module2, group3_module1))
NULL, #intersect(group1_module3, intersect(group2_module3, group3_module1))
c("test6"), #intersect(group1_module1, intersect(group2_module3, group3_module1))
NULL, #intersect(group1_module2, intersect(group2_module3, group3_module1))
NULL, #intersect(group1_module3, intersect(group2_module3, group3_module1))
c("test5"), #intersect(group1_module1, intersect(group2_module1, group3_module2))
c("test7"), #intersect(group1_module2, intersect(group2_module1, group3_module2))
NULL, #intersect(group1_module3, intersect(group2_module1, group3_module2))
c("test1"), #intersect(group1_module1, intersect(group2_module2, group3_module2))
NULL, #intersect(group1_module2, intersect(group2_module2, group3_module2))
NULL, #intersect(group1_module3, intersect(group2_module2, group3_module2))
NULL, #intersect(group1_module1, intersect(group2_module3, group3_module2))
NULL, #intersect(group1_module2, intersect(group2_module3, group3_module2))
NULL, #intersect(group1_module3, intersect(group2_module3, group3_module2))
NULL, #intersect(group1_module1, intersect(group2_module1, group3_module3))
NULL, #intersect(group1_module2, intersect(group2_module1, group3_module3))
NULL, #intersect(group1_module3, intersect(group2_module1, group3_module3))
NULL, #intersect(group1_module1, intersect(group2_module2, group3_module3))
NULL, #intersect(group1_module2, intersect(group2_module2, group3_module3))
c("test13"), #intersect(group1_module3, intersect(group2_module2, group3_module3))
NULL, #intersect(group1_module1, intersect(group2_module3, group3_module3))
NULL, #intersect(group1_module2, intersect(group2_module3, group3_module3))
NULL #intersect(group1_module3, intersect(group2_module3, group3_module3))
))
expected_result_counts <- expected_result_intersect %>%
rowwise() %>%
mutate(Freq = length(intersect_result))
library(ggalluvial)
ggplot(expected_result_counts,
aes(y = Freq, axis1 = group1, axis2 = group2)) +
geom_alluvium(aes(fill = group3), width = 1/12) +
geom_stratum(width = 1/12, fill = "black", color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Group1", "Group2"), expand = c(.05, .05)) +
scale_fill_brewer(type = "qual", palette = "Set1") +
ggtitle("test plot")
Something to note:
intersect(group1_module1, group1_module2)
as the character
vectors within the group are having distinct elements.I am open to other languages to process the data but prefer using R to plot the graph as I am more familiar with ggplot
.
Thanks!
map(cross(lst(group1, group2, group3)), ~reduce(.x, intersect))
is what you are looking for
my_list <- lst(group1, group2, group3)
list_combination%>%
mutate(result = map(cross(my_list),~reduce(.x,intersect)))
# A tibble: 27 × 4
group1 group2 group3 result
<chr> <chr> <chr> <list>
1 module1 module1 module1 <chr [2]>
2 module2 module1 module1 <chr [1]>
3 module3 module1 module1 <chr [0]>
4 module1 module2 module1 <chr [0]>
5 module2 module2 module1 <chr [0]>
6 module3 module2 module1 <chr [0]>
7 module1 module3 module1 <chr [1]>
8 module2 module3 module1 <chr [0]>
9 module3 module3 module1 <chr [0]>
10 module1 module1 module2 <chr [0]>
# … with 17 more rows
In base R:
apply(expand.grid(my_list),1, Reduce,f=intersect)