rggplot2sankey-diagramggalluvial

Making an alluvial/sankey diagram using the first axis as the fill


I have this data:

https://docs.google.com/spreadsheets/d/18sTSOzVEmSEI2KGfGSvRT_0BbzQ9n87kCGZH-cSULCs/edit?usp=sharing (put below the code to produce it)

I use this code:

library(ggplot2)
library(ggalluvial)
ggplot(data = modechoice,
       aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
  geom_alluvium(aes(fill = "black"), width = 1/12) +
  geom_stratum(width = 1/12, fill = "black", color = "grey") +
  geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
  scale_fill_brewer(type = "qual", palette = "Set1") +
  ggtitle("Mode choice before corona and expected after")

And I get this result:

enter image description here

Not really what you would hope for.

First of all, I want the fill to be coloured by axis1. So s2_vervoermiddel_werkstudie_voor should dictate the colour, but I get an error when I try to do that (instead of "black", I put s2_vervoermiddel_werkstudie_voor)

(Error: Continuous value supplied to discrete scale)

Second, how do I change the labels to not look like we are a decade behind?

Third, how do I change the ordering?

structure(list(s2_vervoermiddel_werkstudie_voor = c("Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (bestuurder)", "Auto (bestuurder)", "Auto (bestuurder)", 
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)", 
"Auto (passagier)", "Auto (passagier)", "Auto (passagier)", "Auto (passagier)", 
"Trein", "Trein", "Trein", "Trein", "Trein", "Trein", "Trein", 
"Trein", "Trein", "Trein", "Bus/Tran/Metro", "Bus/Tran/Metro", 
"Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro", "Bus/Tran/Metro", 
"Bus/Tran/Metro", "Bus/Tran/Metro", "Fiets", "Fiets", "Fiets", 
"Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets", "Fiets", 
"E-bike/speed pedelec", "E-bike/speed pedelec", "E-bike/speed pedelec", 
"E-bike/speed pedelec", "Scooter/brommer/motor", "Scooter/brommer/motor", 
"Scooter/brommer/motor", "Scooter/brommer/motor", "Scooter/brommer/motor", 
"Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen", "Lopen", 
"NVA", "NVA", "NVA", "NVA", "NVA", NA, NA, NA, NA, NA, NA, NA, 
NA), s2_vervoermiddel_werkstudie_na = c("Auto (bestuurder)", 
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", 
"Scooter/brommer/motor", "Lopen", "NVA", NA, "Auto (bestuurder)", 
"Auto (passagier)", "Trein", "Bus/Tran/Metro", "Fiets", "Lopen", 
"NVA", NA, "Auto (bestuurder)", "Auto (passagier)", "Trein", 
"Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", 
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)", 
"Trein", "Bus/Tran/Metro", "Fiets", "E-bike/speed pedelec", "NVA", 
NA, "Auto (bestuurder)", "Auto (passagier)", "Trein", "Bus/Tran/Metro", 
"Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", "Lopen", 
"NVA", NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec", 
NA, "Auto (bestuurder)", "Fiets", "E-bike/speed pedelec", "Scooter/brommer/motor", 
NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro", "Fiets", 
"Lopen", "NVA", NA, "Auto (bestuurder)", "Auto (passagier)", 
"Fiets", "NVA", NA, "Auto (bestuurder)", "Trein", "Bus/Tran/Metro", 
"Fiets", "E-bike/speed pedelec", "Lopen", "NVA", NA), Freq = c(441, 
2, 11, 1, 21, 12, 3, 3, 3, 46, 4, 9, 1, 1, 2, 1, 1, 1, 25, 3, 
156, 1, 22, 4, 2, 2, 6, 18, 10, 1, 7, 49, 17, 3, 2, 10, 30, 1, 
28, 10, 348, 11, 4, 4, 6, 39, 2, 2, 53, 11, 4, 1, 1, 13, 2, 2, 
5, 2, 8, 26, 1, 4, 1, 1, 1, 22, 1, 11, 4, 2, 15, 2, 1, 2, 379
)), row.names = c(NA, -75L), class = c("tbl_df", "tbl", "data.frame"
))

classes:

tibble [75 x 3] (S3: tbl_df/tbl/data.frame)
 $ s2_vervoermiddel_werkstudie_voor: chr [1:75] "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" "Auto (bestuurder)" ...
 $ s2_vervoermiddel_werkstudie_na  : chr [1:75] "Auto (bestuurder)" "Auto (passagier)" "Trein" "Bus/Tran/Metro" ...
 $ Freq                            : num [1:75] 441 2 11 1 21 12 3 3 3 46 ...

Solution

  • example:

    
    ggplot(data = modechoice %>%
               ## remove NAs
               filter(!is.na(s2_vervoermiddel_werkstudie_voor) &
                      !is.na(s2_vervoermiddel_werkstudie_na)),
           aes(y = Freq, axis1 = s2_vervoermiddel_werkstudie_voor, axis2 = s2_vervoermiddel_werkstudie_na)) +
        geom_alluvium(width = 1/12,
                      ## link alluvium colors to variable:
                      aes(fill = s2_vervoermiddel_werkstudie_voor)
                      ) +
        geom_stratum(width = 1/12,
                     ## change black stratum color here:
                     fill = "red",
                     color = "grey") +
        ## change geom_label to geom_text for leaner appearance:
        geom_text(stat = "stratum",
                  aes(label = after_stat(stratum)),
                  ## rotate labels if desired:
                  angle = 0,
                  ## left- and right-adjust labels
                  hjust = c(rep(c(0,1),each = 9))
                  ) +
        scale_x_discrete(limits = c("Before", "After"), expand = c(.05, .05)) +
        scale_fill_brewer(type = "qual", palette = "Set1",
                          ## skip color legend:
                          guide = 'none'
                          ) +
        ggtitle("Mode choice before corona and expected after")
    
    

    Ordering with fct_reorder by cumsum of Freq. In a rush, sorry for not working out the code.