I am working with package sfo and prepare Sankey diagram. Below you can see my code and my data.
library(sfo)
sfo_passengers %>%
filter(activity_period == max(activity_period)) %>%
group_by(activity_type_code, geo_region) %>%
summarise(total = sum(passenger_count), .groups = "drop")
sfo_passengers %>%
filter(operating_airline == "United Airlines",
activity_period >= 201901 & activity_period < 202001) %>%
mutate(terminal = ifelse(terminal == "International", "international", terminal)) %>%
group_by(operating_airline,activity_type_code, geo_summary, geo_region, terminal) %>%
summarise(total = sum(passenger_count), .groups = "drop") %>%
sankey_ly(cat_cols = c("operating_airline", "terminal","geo_summary", "geo_region", "activity_type_code"),
num_col = "total",
title = "Dist. of United Airlines Passengers at SFO During 2019")
p <- plotly::last_plot()
plotly::add_annotations(p, c("Terminals", "Domestic/Int"), x = c(0.2, 0.5), y = c(1, 1), showarrow = FALSE)
Now I want to replicate this in ggplot2. I tryed with code below but is not working.
# Load required libraries
library(ggplot2)
library(dplyr)
sfo_passengers_filtered <- sfo_passengers %>%
filter(activity_period == max(activity_period)) %>%
group_by(activity_type_code, geo_region) %>%
summarise(total = sum(passenger_count), .groups = "drop")
sfo_passengers_united <- sfo_passengers %>%
filter(operating_airline == "United Airlines",
activity_period >= 201901 & activity_period < 202001) %>%
mutate(terminal = ifelse(terminal == "International", "international", terminal)) %>%
group_by(operating_airline,activity_type_code, geo_summary, geo_region, terminal) %>%
summarise(total = sum(passenger_count), .groups = "drop")
# Create the plot
ggplot(data = sfo_passengers_united, aes(x = terminal, y = geo_region,
fill = activity_type_code, text = paste("Passengers: ", total))) +
geom_rect(aes(x = terminal, xend = after_scale(terminal), y = geo_region, yend = after_scale(geo_region),
fill = activity_type_code, alpha = total), color = "black") +
geom_text(aes(label = total), size = 3) +
scale_fill_brewer(palette = "Set1", name = "Activity Type") +
scale_alpha_continuous(range = c(0.1, 1), guide = FALSE) +
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Distribution of United Airlines Passengers at SFO During 2019",
x = "Terminals",
y = "Geo Region")
Can anybody help me how to replicate this diagram in ggplot2 ?
One possible option to recreate your plotly sankey chart using ggplot2
would be to use ggsankey
:
library(dplyr, warn = FALSE)
library(sfo)
library(ggsankey)
library(ggplot2)
dat <- sfo_passengers %>%
filter(
operating_airline == "United Airlines",
activity_period >= 201901 & activity_period < 202001
) %>%
mutate(terminal = ifelse(terminal == "International", "international", terminal)) %>%
group_by(operating_airline, activity_type_code, geo_summary, geo_region, terminal) %>%
summarise(total = sum(passenger_count), .groups = "drop")
df <- dat %>%
make_long(
operating_airline, terminal,
geo_summary, geo_region, activity_type_code,
value = total
)
ggplot(df, aes(
x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = factor(node, unique(node))
)) +
geom_sankey(flow.fill = "grey") +
geom_sankey_label(
aes(
label = node,
x = stage(
x,
after_stat = x + .075 * if_else(
x == 5, -1, 1
)
),
hjust = if_else(
x == "activity_type_code", 1, 0
)
),
size = 8 / .pt, fill = "white"
) +
ggsci::scale_fill_d3(palette = "category20") +
scale_x_discrete(
breaks = c("terminal", "geo_summary"),
labels = c("Terminals", "Domestic/Int"),
position = "top"
) +
guides(fill = "none") +
ggthemes::theme_map(base_size = 11) +
theme(
axis.text.x = element_text()
) +
labs(
title = "Dist. of United Airlines Passengers at SFO During 2019"
)
UPDATE When you are restricted to CRAN package then one option would be to use ggalluvial
but it's not that close to the plot you are trying to re-create:
library(dplyr, warn = FALSE)
library(sfo)
library(ggalluvial)
#> Loading required package: ggplot2
library(ggplot2)
dat <- sfo_passengers %>%
filter(
operating_airline == "United Airlines",
activity_period >= 201901 & activity_period < 202001
) %>%
mutate(terminal = ifelse(terminal == "International", "international", terminal)) %>%
group_by(operating_airline, activity_type_code, geo_summary, geo_region, terminal) %>%
summarise(total = sum(passenger_count), .groups = "drop")
df <- to_lodes_form(
dat,
axes = c(1, 5, 3:4, 2)
)
ggplot(
df,
aes(
x = x, stratum = stratum, alluvium = alluvium,
y = total,
fill = stratum,
label = stratum
)
) +
geom_stratum(
alpha = 1,
width = .15,
linewidth = .25,
color = "white"
) +
geom_flow(
fill = "grey",
width = .15,
linewidth = .25,
color = "white"
) +
geom_label(
data = ~subset(.x, x != "geo_region"),
aes(
x = stage(
x,
after_stat = x + .1 * if_else(
x == 5, -1, 1
)
),
hjust = if_else(
x == "activity_type_code", 1, 0
)
),
size = 8 / .pt, fill = "white",
stat = "stratum"
) +
ggrepel::geom_label_repel(
data = ~subset(.x, x == "geo_region"),
size = 8 / .pt, fill = "white",
stat = "stratum",
direction = "y",
nudge_x = .15,
hjust = 0
) +
ggsci::scale_fill_d3(palette = "category20") +
scale_x_discrete(
breaks = c("terminal", "geo_summary"),
labels = c("Terminals", "Domestic/Int"),
position = "top"
) +
guides(fill = "none") +
ggthemes::theme_map(base_size = 11) +
theme(
axis.text.x = element_text()
) +
labs(
title = "Dist. of United Airlines Passengers at SFO During 2019"
)