Here is the input datasets:
id <- c("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10")
variation <- c("aaa1", "aaa1", "bbb1", "aaa2", "b1","a3", "a1", "b1", "a1", "b1" )
result <- c("Way1", "Way1", "Way2", "Way2", "Way3","Way1", "Way2", "Way3", "Way4", "Way1" )
data <- data.frame(id, variation, result)
head(data)
# id variation result
# 1 A1 aaa1 Way1
# 2 A2 aaa1 Way1
# 3 A3 bbb1 Way2
# 4 A4 aaa2 Way2
# 5 A5 b1 Way3
# 6 A6 a3 Way1
# 7 A7 a1 Way2
# 8 A8 b1 Way3
# 9 A9 a1 Way4
# 10 A10 b1 Way1
Now lets prepare datasets for sankey diagramm while using R ggsankey package.
library(ggsankey)
data_df <- data %>%
make_long(id, variation, result)
head(data_df)
# A tibble: 6 × 4
# x node next_x next_node
# <fct> <chr> <fct> <chr>
# 1 id A1 variation aaa1
# 2 variation aaa1 result Way1
# 3 result Way1 NA NA
# 4 id A2 variation aaa1
# 5 variation aaa1 result Way1
# 6 result Way1 NA NA
Now lets plot sankey diagramm using R ggalluvial package.
library(ggalluvial)
ggplot(data_df, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_alluvial(flow.alpha = .6) +
geom_alluvial_text(size = 3, color = "black") +
scale_fill_viridis_d() +
theme_alluvial(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(hjust = .5)) +
ggtitle("")
As we can see generated plot above, "id" variable ordering is not correct. A1, A10, A2 etc..
My question is how can I order "id" variable like A1, A2, A3 .. A10.
Thank you.
This is not special to ggsankey or ggalluvial. Your nodes are ordered alphabetically. To fix that you have to convert node to a factor with the order of the levels set in your desired order, e.g. to get the order as in your original data you could use forcats::fct_inorder:
data <- data.frame(id, variation, result)
library(ggsankey)
library(ggalluvial)
#> Loading required package: ggplot2
library(dplyr, warn=FALSE)
library(forcats)
data_df <- data %>%
make_long(id, variation, result) |>
mutate(node = forcats::fct_inorder(node))
ggplot(data_df, aes(
x = x, next_x = next_x, node = node,
next_node = next_node, fill = node, label = node
)) +
geom_alluvial(flow.alpha = .6) +
geom_alluvial_text(size = 3, color = "black") +
scale_fill_viridis_d() +
theme_alluvial(base_size = 18) +
labs(x = NULL) +
theme(
legend.position = "none",
plot.title = element_text(hjust = .5)
) +
ggtitle("")
