I have a sankey graph that I created in ggplot, but I want the nodes on the right to relate to the left node... so if there are 4 colours from the left node entering the right node, the right node should have all 4 colours.
Here is my dataset
df2 = structure(list(x = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("Method_Group",
"Topic"), class = "factor"), node = c("BRUV + Both", "Behavioural Ecology",
"BRUV + Both", "Conservation Methods", "BRUV + Both", "Other Drivers",
"Animal Borne + No Receiver", "Behavioural Ecology", "Controlled + Receiver",
"Behavioural Ecology", "Controlled + Receiver", "Reproductive Ecology",
"Controlled + Receiver", "Other Drivers", "Controlled + Receiver",
"Behavioural Ecology", "Controlled + Receiver", "Methodological",
"Animal Borne + No Receiver", "Behavioural Ecology", "Animal Borne + No Receiver",
"Methodological", "Stationary + No Receiver", "Reproductive Ecology",
"Stationary + No Receiver", "Landuse Management", "Stationary + No Receiver",
"Other Drivers", "Animal Borne + No Receiver", "Behavioural Ecology",
"Animal Borne + No Receiver", "Methodological", "Animal Borne + No Receiver",
"Reproductive Ecology", "Stationary + Receiver", "Behavioural Ecology",
"Stationary + Receiver", "Fisheries Managemenet", "Stationary + Receiver",
"Behavioural Ecology", "Stationary + Receiver", "Methodological",
"Stationary + Receiver", "Fisheries Managemenet", "BRUV + Both",
"Behavioural Ecology", "BRUV + Both", "Methodological", "BRUV + Both",
"Conservation Methods"), next_x = structure(c(2L, NA, 2L, NA,
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA,
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA,
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA), levels = c("Method_Group",
"Topic"), class = "factor"), next_node = c("Behavioural Ecology",
NA, "Conservation Methods", NA, "Other Drivers", NA, "Behavioural Ecology",
NA, "Behavioural Ecology", NA, "Reproductive Ecology", NA, "Other Drivers",
NA, "Behavioural Ecology", NA, "Methodological", NA, "Behavioural Ecology",
NA, "Methodological", NA, "Reproductive Ecology", NA, "Landuse Management",
NA, "Other Drivers", NA, "Behavioural Ecology", NA, "Methodological",
NA, "Reproductive Ecology", NA, "Behavioural Ecology", NA, "Fisheries Managemenet",
NA, "Behavioural Ecology", NA, "Methodological", NA, "Fisheries Managemenet",
NA, "Behavioural Ecology", NA, "Methodological", NA, "Conservation Methods",
NA)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))
Here is the code that I'm using
devtools::install_github("davidsjoberg/ggsankey")
library(ggsankey)
library(dplyr)
library(ggplot2)
ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 1, node.color = "black",show.legend = FALSE, width=0.40) +
geom_sankey_label(size = 5, color = "black", fill= "white") +
theme_bw() +
theme(axis.title = element_blank()
, axis.text.y = element_blank()
, axis.text.x = element_blank()
, axis.ticks = element_blank()
, panel.grid = element_blank())+
#scale_fill_viridis_d(option = "inferno")
scale_fill_viridis(discrete=TRUE)
I want the right node of my graph to look like the right node in this image,
where all the colours from the left node show up in the right node if they are related. Is this possible with sankey plots? and if so how do I do that?
A bit hacky but one option would be to overplot the boxed on the right using the computed x
and y
coordinates used to draw the flows via geom_polygon
.
To this end I first get the data using layer_data
, filter it for the endpoints via filter(x == 2 - width / 2)
and keep only the data for one of the endpoints per polygon or flow using distinct(fill, flow_end_ymax, .keep_all = TRUE)
.
This dataset could then be used in a geom_rect
to extend the flows and overplot the rectangles drawn by geom_sankey
:
library(ggsankey)
library(dplyr)
library(ggplot2)
width <- .4
p <- ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 1, node.color = "black", show.legend = FALSE, width = width, linewidth = 2) +
theme_void() +
theme(
plot.margin = unit(rep(5.5, 4), "pt")
) +
scale_fill_viridis_d()
# Get the data from the flows layer
dat <- layer_data(last_plot(), 1) |>
filter(x == 2 - width / 2) |>
distinct(fill, flow_end_ymax, .keep_all = TRUE)
# Get the data from the boxes layer
dat1 <- layer_data(last_plot(), 2) |>
filter(xmin == 2 - width / 2)
p +
geom_rect(data = dat, aes(
xmin = x, xmax = x + width,
ymin = flow_end_ymin, ymax = flow_end_ymax,
fill = label
), inherit.aes = FALSE) +
# Draw the outline
geom_rect(data = dat1, aes(
xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax
), inherit.aes = FALSE, fill = NA, color = "black", linewidth = 2) +
geom_sankey_label(size = 5, color = "black", fill = "white") +
guides(fill = "none")