I have this data:
https://docs.google.com/spreadsheets/d/18sTSOzVEmSEI2KGfGSvRT_0BbzQ9n87kCGZH-cSULCs/edit?usp=sharing (put below the code to produce it)
I use this code:
library(ggplot2)
library(ggalluvial)
ggplot(data = modechoice,
aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
geom_alluvium(aes(fill = "black"), width = 1/12) +
geom_stratum(width = 1/12, fill = "black", color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
scale_fill_brewer(type = "qual", palette = "Set1") +
ggtitle("Mode choice before corona and expected after")
And I get this result:
Not really what you would hope for.
First of all, I want the fill to be coloured by axis1. So s2_vervoermiddel_werkstudie_voor should dictate the colour, but I get an error when I try to do that (instead of "black", I put s2_vervoermiddel_werkstudie_voor)
(Error: Continuous value supplied to discrete scale)
Second, how do I change the labels to not look like we are a decade behind?
Third, how do I change the ordering?
structure(list(s2_vervoermiddel_werkstudie_voor = c("Auto (bestuurder)",
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)",
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)",
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)",
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)",
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)",
"Trein", "Trein", "Trein", "Trein", "Trein", "Trein", "Trein",
"Trein", "Trein", "Trein", "Bus/Tran/Metro", "Bus/Tran/Metro",
"Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro",
"Bus/Tran/Metro", "Bus/Tran/Metro", "Fiets", "Fiets", "Fiets",
"Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets",
"E-bike/speed pedelec", "E-bike/speed pedelec", "E-bike/speed pedelec",
"E-bike/speed pedelec", "Scooter/brommer/motor", "Scooter/brommer/motor",
"Scooter/brommer/motor", "Scooter/brommer/motor", "Scooter/brommer/motor",
"Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen",
"NVA", "NVA", "NVA", "NVA", "NVA", NA, NA, NA, NA, NA, NA, NA,
NA), s2_vervoermiddel_werkstudie_na = c("Auto (bestuurder)",
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec",
"Scooter/brommer/motor", "Lopen", "NVA", NA, "Auto (bestuurder)",
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "Lopen",
"NVA", NA, "Auto (bestuurder)", "Auto (passagier)", "Trein",
"Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor",
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)",
"Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "NVA",
NA, "Auto (bestuurder)", "Auto (passagier)", "Trein", "Bus/Tran/Metro",
"Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", "Lopen",
"NVA", NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec",
NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor",
NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro", "Fiets",
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)",
"Fiets", "NVA", NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro",
"Fiets", "E-bike/speed pedelec", "Lopen", "NVA", NA), Freq = c(441,
2, 11, 1, 21, 12, 3, 3, 3, 46, 4, 9, 1, 1, 2, 1, 1, 1, 25, 3,
156, 1, 22, 4, 2, 2, 6, 18, 10, 1, 7, 49, 17, 3, 2, 10, 30, 1,
28, 10, 348, 11, 4, 4, 6, 39, 2, 2, 53, 11, 4, 1, 1, 13, 2, 2,
5, 2, 8, 26, 1, 4, 1, 1, 1, 22, 1, 11, 4, 2, 15, 2, 1, 2, 379
)), row.names = c(NA, -75L), class = c("tbl_df", "tbl", "data.frame"
))
classes:
tibble [75 x 3] (S3: tbl_df/tbl/data.frame)
$ s2_vervoermiddel_werkstudie_voor: chr [1:75] "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" ...
$ s2_vervoermiddel_werkstudie_na : chr [1:75] "Auto (bestuurder)" "Auto (passagier)" "Trein" "Bus/Tran/Metro" ...
$ Freq : num [1:75] 441 2 11 1 21 12 3 3 3 46 ...
example:
ggplot(data = modechoice %>%
## remove NAs
filter(!is.na(s2_vervoermiddel_werkstudie_voor) &
!is.na(s2_vervoermiddel_werkstudie_na)),
aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
geom_alluvium(width = 1/12,
## link alluvium colors to variable:
aes(fill = s2_vervoermiddel_werkstudie_voor)
) +
geom_stratum(width = 1/12,
## change black stratum color here:
fill = "red",
color = "grey") +
## change geom_label to geom_text for leaner appearance:
geom_text(stat = "stratum",
aes(label = after_stat(stratum)),
## rotate labels if desired:
angle = 0,
## left- and right-adjust labels
hjust = c(rep(c(0,1),each = 9))
) +
scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
scale_fill_brewer(type = "qual", palette = "Set1",
## skip color legend:
guide = 'none'
) +
ggtitle("Mode choice before corona and expected after")
Ordering with fct_reorder
by cumsum of Freq. In a rush, sorry for not working out the code.