rggplot2geom-col

Creating a bar plot using geom_col() and coord_flip()


Using the below dataset, I would like to create the image pasted below. I tried the following code but it returned a plot that is not even close to the plot I need.

My data

structure(list(UWLE = structure(c(0.600000023841858, 0.600000023841858, 
0.800000011920929, 0.699999988079071, 0.899999976158142, 0.300000011920929, 
0.400000005960464, 0.400000005960464, 0.400000005960464, 0.5, 
0.400000005960464, 0.400000005960464, 0.5, 0.5, 0.600000023841858, 
0.699999988079071, 0.699999988079071, 0.899999976158142, 0.800000011920929, 
1), format.stata = "%9.0g"), HWLE = structure(c(10.1999998092651, 
9.89999961853027, 9.39999961853027, 10.8000001907349, 12.3999996185303, 
5, 4.90000009536743, 4.5, 5.30000019073486, 6.5, 7.59999990463257, 
7.40000009536743, 7, 8.10000038146973, 9.5, 10.5, 10.3000001907349, 
9.69999980926514, 11.1000003814697, 12.8999996185303), format.stata = "%9.0g"), 
    OWLE = structure(c(10.1999998092651, 10, 10, 9.89999961853027, 
    10.8999996185303, 9.10000038146973, 8.89999961853027, 8.80000019073486, 
    9, 10.1999998092651, 11.5, 11.1999998092651, 11.3000001907349, 
    11.3000001907349, 12.3999996185303, 9.89999961853027, 9.80000019073486, 
    9.80000019073486, 9.69999980926514, 10.6000003814697), format.stata = "%9.0g"), 
    OBLE = structure(c(10.1999998092651, 10.8999996185303, 10.3000001907349, 
    9.89999961853027, 9.10000038146973, 14.6000003814697, 15.3000001907349, 
    14.5, 14.3000001907349, 13.8000001907349, 13.1000003814697, 
    13.8000001907349, 13.1000003814697, 12.8000001907349, 12, 
    9.69999980926514, 10.3000001907349, 9.80000019073486, 9.39999961853027, 
    8.60000038146973), format.stata = "%9.0g"), TLE = structure(c(31.1000003814697, 
    31.3999996185303, 30.3999996185303, 31.2999992370605, 33.2000007629395, 
    29, 29.3999996185303, 28.2000007629395, 29, 31.1000003814697, 
    32.5999984741211, 32.9000015258789, 32, 32.7000007629395, 
    34.5, 30.8999996185303, 31.1000003814697, 30.1000003814697, 
    31, 33.0999984741211), format.stata = "%9.0g"), birth_place = structure(c(0, 
    1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4), format.stata = "%10.0g", class = c("haven_labelled", 
    "vctrs_vctr", "double"), labels = c(Northeast = 0, Midwest = 1, 
    South = 2, West = 3, Foreign = 4)), race = structure(c(0, 
    0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3), format.stata = "%18.0g", class = c("haven_labelled", 
    "vctrs_vctr", "double"), labels = c(`non-Hispanic White` = 0, 
    `non-Hispanic Black` = 1, Hispanic = 2, `non-Hispanic Other` = 3
    ))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-20L))

Plot I want from the data above

enter image description here

Code I tried

library(ggplot2)

# Convert labeled variables to factors
stack_data_G1$birth_place <- as.factor(stack_data_G1$birth_place)
stack_data_G1$race <- as.factor(stack_data_G1$race)

# Create the stacked horizontal bar plot
ggplot(stack_data_G1, aes(x = UWLE + HWLE + OWLE + OBLE, y = reorder(birth_place, -(UWLE + HWLE + OWLE + OBLE)), fill = race)) +
  geom_col() +
  coord_flip() +
  labs(x = NULL, y = NULL, title = "XXXXXX", subtitle = "XXX", yaxis = "YYYY") +
  scale_y_discrete(labels = c("UWLE", "HWLE", "OWLE", "OBLE")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 8),
    axis.title.y = element_text(size = 10),
    plot.title = element_text(size = 16),
    plot.subtitle = element_text(size = 12),
    panel.grid = element_blank()
  )

Solution

  • Prepare the data

    library(haven)
    library(ggplot2)
    library(dplyr)
    library(tidyr)
    
    # Convert haven labeled variables to factors
    stack_data_G1$birth_place <- as_factor(stack_data_G1$birth_place)
    stack_data_G1$race <- as_factor(stack_data_G1$race)
    
    # create proper data
    graph_data <- stack_data_G1 |>
      pivot_longer(cols = c(UWLE, HWLE, OWLE, OBLE), names_to = "var",
                   values_to = "value") |>
      # assign levels order of var for graph
      mutate(var = factor(var, levels = c("OBLE", "OWLE", "HWLE", "UWLE"))) |>
      arrange(race, birth_place, desc(var)) |>
      group_by(race, birth_place) |>
      mutate(label_pos = cumsum(value) - value / 2,
             # label text only applicable for big enough value to plot
             label_text = if_else(value < 4, "", format(value, nsmall = 1)),
             tle_pos = sum(value) + 2, # this value may need manual adjustment
             tle_label = format(TLE, nsmall = 1)) |>
      ungroup()
    
    # data that well organized for graph
    graph_data
    #> # A tibble: 80 × 9
    #>      TLE birth_place race    var    value label_pos label_text tle_pos tle_label
    #>    <dbl> <fct>       <fct>   <fct>  <dbl>     <dbl> <chr>        <dbl> <chr>    
    #>  1  31.1 Northeast   non-Hi… UWLE   0.600     0.300 ""            33.2 31.1     
    #>  2  31.1 Northeast   non-Hi… HWLE  10.2       5.70  "10.2"        33.2 31.1     
    #>  3  31.1 Northeast   non-Hi… OWLE  10.2      15.9   "10.2"        33.2 31.1     
    #>  4  31.1 Northeast   non-Hi… OBLE  10.2      26.1   "10.2"        33.2 31.1     
    #>  5  31.4 Midwest     non-Hi… UWLE   0.600     0.300 ""            33.4 31.4     
    #>  6  31.4 Midwest     non-Hi… HWLE   9.90      5.55  " 9.9"        33.4 31.4     
    #>  7  31.4 Midwest     non-Hi… OWLE  10        15.5   "10.0"        33.4 31.4     
    #>  8  31.4 Midwest     non-Hi… OBLE  10.9      25.9   "10.9"        33.4 31.4     
    #>  9  30.4 South       non-Hi… UWLE   0.800     0.400 ""            32.5 30.4     
    #> 10  30.4 South       non-Hi… HWLE   9.40      5.50  " 9.4"        32.5 30.4     
    #> # ℹ 70 more rows
    

    Now the graph

    # Create the stacked horizontal bar plot
    ggplot(graph_data) +
      geom_col(aes(x = value,
                   y = reorder(birth_place, TLE), # order by TLE
                   fill = var)) +
      geom_text(aes(x = label_pos, y = birth_place, label = label_text),
                color = "white") +
      geom_text(aes(x = tle_pos, y = birth_place, label = tle_label)) +
      facet_grid(race ~ ., switch = "y") +
      labs(title = "XXXXXX", subtitle = "XXX") +
      xlab(NULL) + ylab("YYYYY") +
      scale_x_continuous(expand = c(0, 0)) +
      theme_minimal() +
      theme(
        axis.text.y = element_text(size = 8),
        axis.title.y = element_text(size = 10),
        plot.title = element_text(size = 16),
        plot.subtitle = element_text(size = 12),
        panel.grid = element_blank(),
        strip.placement = "outside"
      )
    

    Created on 2023-06-07 with reprex v2.0.2