Control sorting of field in a geom_col()

I'm struggling to reproduce this graph that I built in Tableau.

I'm missing the Sub-totals, but think I can solve that problem. What's causing me the problem is sorting Transportation Mode by the SUM() of the overall measure. Feel like it should be handled dynamically, but I'm OK hard-coding to force it in the order that I want just to get over this hurdle and keep moving forward.

Here's what my R version looks like (disregard the forcats vertical thing on the left side, that's just junk created when I was trying to build the graphs for this post).

It's important that Personal Vehicles is on top and Pedestrians is second in each facet. When I add the subtotals into my R dataset, I will need it to be at the bottom of the facet. Not sure how to control subtotals if I can't even control the main field.

Here's the data behind the graph:

Border_Code	measure_summary	mode_color_code	annual_crossings	record_cnt
CAN	Personal Vehicles	#C14F22	20608230	973
CAN	Trucks	#34844A	5526056	896
CAN	Pedestrians	#4E79A7	215983	347
CAN	Buses	#8074A8	48334	398
CAN	Trains	#848E93	20732	276
MEX	Personal Vehicles	#C14F22	75891766	312
MEX	Pedestrians	#4E79A7	39421651	329
MEX	Trucks	#34844A	7356659	257
MEX	Buses	#8074A8	118719	179
MEX	Trains	#848E93	10772	84

Here's my R code to produce the dataset that does some of the early data wrangling before I aggregate to the CSV file. I don't think this is the problem, but I do create the measure_summary factor field in this section.

# Add some features to the BASE data frame.  This is the BASE that all sub-queries (data frames) will pull from.
data_00_base_mod <- data_00_base %>%
  mutate(Border_Code = ifelse(grepl("Canada", Border, fixed = TRUE), "CAN", "MEX")) %>%
  separate(Date, c("Month", "Year"), remove = FALSE)  %>%
  # mutate(Measure = factor(Measure, order = TRUE, levels = c("Bus Passengers","Buses","Pedestrians","Personal Vehicle Passengers","Personal Vehicles","Rail Containers Empty","Rail Containers Loaded","Train Passengers","Trains","Truck Containers Empty","Truck Containers Loaded","Trucks"))) 
  # mutate(Measure = factor(Measure, order = TRUE, levels = c("Personal Vehicles","Personal Vehicle Passengers","Pedestrians","Trucks","Truck Containers Loaded","Truck Containers Empty","Buses","Bus Passengers","Trains","Train Passengers","Rail Containers Loaded","Rail Containers Empty"))) 
  mutate(measure_summary = factor(Measure, order = TRUE, levels = c("Personal Vehicles","Personal Vehicle Passengers","Pedestrians","Trucks","Truck Containers Loaded","Truck Containers Empty","Buses","Bus Passengers","Trains","Train Passengers","Rail Containers Loaded","Rail Containers Empty"))) %>%
  mutate(Measure = factor(Measure, order = TRUE, levels = c("Trains","Train Passengers","Rail Containers Loaded","Rail Containers Empty","Buses","Bus Passengers","Trucks","Truck Containers Loaded","Truck Containers Empty","Pedestrians","Personal Vehicles","Personal Vehicle Passengers"))) %>%
  # mutate(Year_num) = as.integer(Year)
  mutate(state_delete_me = State)
  
data_00_base_mod$Year_num <- as.integer(data_00_base_mod$Year)

  # Augment with a dimension table to help control colors and sorting in the presentation tier
  # This is all stuff I decided on when building the Tableau dashboard.  I want to follow that lead with R to make it easier to compare/contrast output side-by-side
  dim_measure <- data.frame(
    Measure=c("Bus Passengers","Buses","Pedestrians","Personal Vehicle Passengers","Personal Vehicles","Rail Containers Empty","Rail Containers Loaded","Train Passengers","Trains","Truck Containers Empty","Truck Containers Loaded","Trucks")
    ,mode_main_sort=c(8,7,3,2,1,11,10,12,9,6,5,4)
    ,mode_color_code=c("#C799BC","#8074A8","#4E79A7","#F59C3C","#C14F22","#CDCECD","#5B6570","#89C8CC","#848E93","#F4D166","#B2C25B","#34844A")
  )
  
  # Left Join the Base table with supplemental information about the Measures to help with the presentation layer
  data_00_base_mod <- merge(x = data_00_base_mod, y = dim_measure, by = "Measure", all.x = TRUE)

And this is the block of code that builds the dataset for the graph (and the csv attachment).

  # A second pass at this using a secondary dim field, measure_summary
  data_10b_core <- data_00_base_mod %>%
    filter(measure_summary == "Personal Vehicles" | measure_summary == "Pedestrians" | measure_summary == "Trucks" | measure_summary == "Buses" | measure_summary =="Trains") %>%
    filter(Year == "2023") %>%
    select(Border_Code, measure_summary, mode_color_code, Value) %>%
    group_by(Border_Code, measure_summary, mode_color_code) %>%
    summarise(annual_crossings = sum(Value), record_cnt = n(), .groups = 'keep') %>%
    arrange(Border_Code, - annual_crossings)
    

  gg_10b_year_summary <- ggplot(data_10b_core) + 
    geom_col(aes(x=measure_summary, y=annual_crossings, fill = mode_color_code)) +
    geom_text(aes(x=measure_summary, y=annual_crossings, label = paste(round(annual_crossings / 1e6, 2), "M")), vjust = 0.5, hjust = -0.2) +
    coord_flip() +
    scale_fill_identity() +
    facet_grid(vars(Border_Code, )) + 
    theme_minimal() +
    theme(axis.text.x = element_text(angle=0),
          panel.background = element_blank(),
          strip.background = element_rect(colour="gray50", fill="gray90"),
          panel.border = element_rect(colour = "gray50", fill=NA, size=1)) +
    scale_y_continuous(name = "Inbound Border Crossings",
                       labels = unit_format(unit = "M", scale = 1e-6)
    ) + 
    labs(title = "Border Crossings by Mode and Country in 2023")
  plot(gg_10b_year_summary)

Many thanks for any help you can provide.

Cheers!

Solution

By default the categories are ordered alphabetically. If you want a specific order you have to convert to a factor with your desired order. And if you want to reorder based on the value of a numeric variable you could do so using reorder. By default reorder will reorder based on the mean but you could change that by setting FUN=sum to reorder based on the sum:

Note: I switched x and y to get rid of the coord_flip and used geom_label (with fill=NA and label.size=0) instead of geom_text to add some padding around the labels.

library(ggplot2)
library(scales)

ggplot(data_10b_core, aes(
  x = annual_crossings,
  y = reorder(measure_summary, annual_crossings, FUN = sum)
)) +
  geom_col(
    aes(fill = mode_color_code)
  ) +
  geom_label(
    aes(
      label = paste(round(annual_crossings / 1e6, 2), "M")
    ),
    hjust = 0,
    fill = NA,
    label.size = 0
  ) +
  scale_fill_identity() +
  facet_grid(vars(Border_Code)) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(angle = 0),
    panel.background = element_blank(),
    strip.background = element_rect(colour = "gray50", fill = "gray90"),
    panel.border = element_rect(colour = "gray50", fill = NA, size = 1)
  ) +
  scale_x_continuous(
    name = "Inbound Border Crossings",
    labels = unit_format(unit = "M", scale = 1e-6)
  ) +
  labs(title = "Border Crossings by Mode and Country in 2023")
#> Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
#> ℹ Please use the `linewidth` argument instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

DATA

data_10b_core <- structure(list(Border_Code = c(
  "CAN", "CAN", "CAN", "CAN", "CAN",
  "MEX", "MEX", "MEX", "MEX", "MEX"
), measure_summary = c(
  "Personal Vehicles",
  "Trucks", "Pedestrians", "Buses", "Trains", "Personal Vehicles",
  "Pedestrians", "Trucks", "Buses", "Trains"
), mode_color_code = c(
  "#C14F22",
  "#34844A", "#4E79A7", "#8074A8", "#848E93", "#C14F22", "#4E79A7",
  "#34844A", "#8074A8", "#848E93"
), annual_crossings = c(
  20608230,
  5526056, 215983, 48334, 20732, 75891766, 39421651, 7356659, 118719,
  10772
), record_cnt = c(
  973, 896, 347, 398, 276, 312, 329, 257,
  179, 84
)), class = "data.frame", row.names = c(NA, -10L))