rggplot2dplyrbar-chart

Plot multiple true/false columns as bar plot in R


I have a dataframe that looks like this:

structure(list(chesapeakebay = c(TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE), icefishing = c(FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), lakesimpoundments = c(FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE), 
    nontidal = c(FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, 
    FALSE, FALSE, TRUE), tidal = c(FALSE, TRUE, FALSE, TRUE, 
    TRUE, TRUE, TRUE, FALSE, TRUE, TRUE)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

enter image description here

I want to create a bar chart similar to this with the percentage of rows answering true for each column.

Is this possible?


Solution

  • Pivot to long, then compute n and percentage using sum() and mean():

    library(dplyr)
    library(tidyr)
    library(ggplot2)
    library(scales)
    
    dat |>
      pivot_longer(everything()) |>
      summarize(
        n = sum(value),
        lab = paste0(n, " (", percent(mean(value)), ")"),
        .by = name
      ) |>
      ggplot(aes(n, name)) +
      geom_col(fill = "darkmagenta") +
      geom_text(aes(label = lab), nudge_x = 1) +
      scale_x_continuous(limits = c(0, 11.5), breaks = seq(0, 10, by = 2)) +
      labs(x = NULL, y = NULL) +
      theme_minimal()