rggplot2yaxisgeom-segment

Order y-axis using geom_segment()


I'm building a plot with mostly geom_segment(), and I'm having trouble ordering the y-axis as I need. I know this question is somewhat repeated, and I apologize for that, but I just can't understand what was done here and here to get to the ordered y-axis... I'm providing a simplified example of my dataset so I hope this can also help others later on.

So, here's the dataset:

df <- structure(list(id = c("ID1", "ID2", "ID3", "ID3", "ID3", 
"ID3", "ID3", "ID3", "ID3", "ID3", "ID3", "ID4", "ID4", "ID4", 
"ID4", "ID4", "ID4", "ID4", "ID4", "ID5", "ID5", "ID5", "ID5", 
"ID5", "ID6", "ID7", "ID8", "ID8", "ID8", "ID8", "ID9", "ID9", 
"ID9", "ID9", "ID9"), country = c("country_1", "country_2", "country_1", 
"country_2", "country_1", "country_2", "country_1", "country_2", 
"country_2", "country_2", "country_2", "country_1", "country_2", 
"country_1", "country_2", "country_1", "country_2", "country_1", 
"country_3", "country_4", "country_5", "country_4", "country_5", 
"country_1", "country_1", "country_2", "country_1", "country_5", 
"country_4", "country_5", "country_4", "country_5", "country_4", 
"country_5", "country_4"), region = c("region_A", "region_A", 
"region_A", "region_A", "region_A", "region_A", "region_A", "region_A", 
"region_A", "region_A", "region_A", "region_A", "region_A", "region_A", 
"region_A", "region_A", "region_A", "region_A", "region_A", "region_B", 
"region_B", "region_B", "region_B", "region_B", "region_B", "region_A", 
"region_B", "region_B", "region_B", "region_B", "region_B", "region_B", 
"region_B", "region_B", "region_B"), first_event = c("26/04/2021", 
"13/03/2020", "18/05/2022", "06/03/2023", "14/03/2023", "21/03/2023", 
"23/03/2023", "09/04/2023", "10/04/2023", "17/05/2023", "18/05/2023", 
"29/05/2020", "16/07/2020", "20/07/2020", "24/07/2020", "27/07/2020", 
"21/08/2020", "25/08/2020", "14/09/2020", "16/05/2023", "16/05/2023", 
"21/05/2023", "26/05/2023", "27/05/2023", "28/04/2021", "24/03/2023", 
"19/04/2023", "28/04/2023", "30/04/2023", "06/05/2023", "10/03/2023", 
"13/03/2023", "21/03/2023", "21/03/2023", "23/03/2023"), last_event = c("17/12/2022", 
"14/02/2023", "06/03/2023", "14/03/2023", "21/03/2023", "23/03/2023", 
"09/04/2023", "10/04/2023", "17/05/2023", "18/05/2023", "31/05/2023", 
"16/07/2020", "20/07/2020", "24/07/2020", "27/07/2020", "21/08/2020", 
"25/08/2020", "14/09/2020", "25/03/2021", "16/05/2023", "21/05/2023", 
"26/05/2023", "27/05/2023", "05/06/2023", "04/03/2023", "01/03/2024", 
"28/04/2023", "30/04/2023", "06/05/2023", "14/08/2023", "13/03/2023", 
"21/03/2023", "21/03/2023", "23/03/2023", "03/04/2023")), class = "data.frame", row.names = c(NA, 
-35L))

And here's the current code for the plot:

df$first_event <- as.Date(df$first_event, format="%d/%m/%Y")
df$last_event <- as.Date(df$last_event, format="%d/%m/%Y")


df <- with(df,df[order(region, -as.integer(first_event)),])
df$id <- with(df,factor(id,levels=unique(id)))


ggplot() +
  geom_segment(data = df[df$country=="country_1", ], aes(colour = country, 
                                                        x = first_event, 
                                                        xend = last_event, 
                                                        y = id), 
                                                        size = 5) +
  geom_segment(data = df[df$country=="country_2", ], aes(colour = country, 
                                                         x = first_event, 
                                                         xend = last_event, 
                                                         y = id), 
                                                         size = 5) +
  geom_segment(data = df[df$country=="country_3", ], aes(colour = country, 
                                                         x = first_event, 
                                                         xend = last_event, 
                                                         y = id), 
                                                         size = 8) +
  geom_segment(data = df[df$country=="country_4", ], aes(colour = country, 
                                                         x = first_event, 
                                                         xend = last_event, 
                                                         y = id), 
                                                         size = 5) +
  geom_segment(data = df[df$country=="country_5", ], aes(colour = country, 
                                                         x = first_event, 
                                                         xend = last_event, 
                                                         y = id), 
                                                         size = 8) +
  scale_colour_manual(values=c("country_1"="#543005", 
                              "country_2"="#c7a148",
                              "country_3"="#f6e8c3",
                              "country_4"="#01665e",
                              "country_5"="#35978f")) +
  theme_bw() +
  theme(
    axis.title.y = element_blank(),
    legend.position = "bottom",
    legend.box="vertical"
  ) +
guides(colour = guide_legend(nrow = 1)) + facet_wrap(~region, ncol=1, scales = "free_y")

And the plot output: enter image description here

1) My goal is to order the y axis by date (first event). So, in this case, the order would be:

region A: ID2, then ID4, ID1, ID3, ID7

region B: ID6, ID9, ID8, and then ID5

And I can't make that happen, even by looking at the other questions. I think it's connected to the fact that each id has sometimes more than one segment (so, multiple rows per id in the dataset and not just one). I thought this part with(df,df[order(region, -as.integer(first_event)),]) would do it, but it's not helping much. I also thought it could be connected to the order of the geom_segment() in the plot code, because they are indeed ordered by first_event but then also by country; but not sure how to fix it.

Also, 2) how to make the legend have the same sizes per country? This question mentions this but it seems to be quite simple and in this case it's not helping much.

Any help with this would be much appreciated!


Solution

  • We need to fix your order of levels=. Currently they are

    levels(df$id)
    # [1] "ID3" "ID7" "ID1" "ID4" "ID2" "ID5" "ID8" "ID9" "ID6"
    

    which does not meet your expectations. Use this to order the levels based on the order of first_event:

    df$id <- reorder(df$id, df$first_event)
    levels(df$id)
    # [1] "ID2" "ID4" "ID1" "ID6" "ID3" "ID9" "ID7" "ID8" "ID5"
    

    I'm adding yend=id to your ggplot code, as it was erring for me.

    Side note: you don't need five calls to geom_segment, just one. Since you want to have different sizes for each, we can control that in the same way that you control colors.

    For the legend, we can fix this by removing "size" from the legend and forcing the "key glyph" to be a "rect".

    my_aes <- data.frame(
      country = c("country_1", "country_2", "country_3", "country_4", "country_5"),
      size = c(5, 5, 8, 5, 8),
      color = c("#543005", "#c7a148", "#f6e8c3", "#01665e", "#35978f")
    )
    ggplot(df) +
      geom_segment(aes(x = first_event, xend = last_event,
                       y = id, yend = id,
                       colour = country, size = country),
                   key_glyph = draw_key_rect) +
      scale_colour_manual(values = setNames(my_aes$color, my_aes$country)) +
      scale_size_manual(values = setNames(my_aes$size, my_aes$country),
                        guide = "none") +
      theme_bw() +
      theme(
        axis.title.y = element_blank(),
        legend.position = "bottom",
        legend.box="vertical"
      ) +
      guides(colour = guide_legend(nrow = 1)) +
      facet_wrap(~region, ncol=1, scales = "free_y")
    

    ggplot with the y-axis ordered based on first event, and the legend boxes all being the same size

    If you want the y-axis to be ordered from top-to-bottom, you can add scale_y_discrete(limits = rev) to the plotting expression.