Here is the input datasets:
id <- c("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10")
variation <- c("aaa1", "aaa1", "bbb1", "aaa2", "b1","a3", "a1", "b1", "a1", "b1" )
result <- c("Way1", "Way1", "Way2", "Way2", "Way3","Way1", "Way2", "Way3", "Way4", "Way1" )
data <- data.frame(id, variation, result)
head(data)
# id variation result
# 1 A1 aaa1 Way1
# 2 A2 aaa1 Way1
# 3 A3 bbb1 Way2
# 4 A4 aaa2 Way2
# 5 A5 b1 Way3
# 6 A6 a3 Way1
# 7 A7 a1 Way2
# 8 A8 b1 Way3
# 9 A9 a1 Way4
# 10 A10 b1 Way1
Now lets prepare datasets for sankey diagramm while using R ggsankey package.
library(ggsankey)
data_df <- data %>%
make_long(id, variation, result)
head(data_df)
# A tibble: 6 × 4
# x node next_x next_node
# <fct> <chr> <fct> <chr>
# 1 id A1 variation aaa1
# 2 variation aaa1 result Way1
# 3 result Way1 NA NA
# 4 id A2 variation aaa1
# 5 variation aaa1 result Way1
# 6 result Way1 NA NA
Now lets plot sankey diagramm using R ggalluvial package.
library(ggalluvial)
ggplot(data_df, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_alluvial(flow.alpha = .6) +
geom_alluvial_text(size = 3, color = "black") +
scale_fill_viridis_d() +
theme_alluvial(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(hjust = .5)) +
ggtitle("")
As we can see generated plot above, "id" variable ordering is not correct. A1, A10, A2 etc..
My question is how can I order "id" variable like A1, A2, A3 .. A10.
Thank you.
This is not special to ggsankey
or ggalluvial
. Your nodes are ordered alphabetically. To fix that you have to convert node
to a factor
with the order of the levels
set in your desired order, e.g. to get the order as in your original data you could use forcats::fct_inorder
:
data <- data.frame(id, variation, result)
library(ggsankey)
library(ggalluvial)
#> Loading required package: ggplot2
library(dplyr, warn=FALSE)
library(forcats)
data_df <- data %>%
make_long(id, variation, result) |>
mutate(node = forcats::fct_inorder(node))
ggplot(data_df, aes(
x = x, next_x = next_x, node = node,
next_node = next_node, fill = node, label = node
)) +
geom_alluvial(flow.alpha = .6) +
geom_alluvial_text(size = 3, color = "black") +
scale_fill_viridis_d() +
theme_alluvial(base_size = 18) +
labs(x = NULL) +
theme(
legend.position = "none",
plot.title = element_text(hjust = .5)
) +
ggtitle("")