I have a dataframe that looks like this:
structure(list(chesapeakebay = c(TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE), icefishing = c(FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), lakesimpoundments = c(FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE),
nontidal = c(FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE,
FALSE, FALSE, TRUE), tidal = c(FALSE, TRUE, FALSE, TRUE,
TRUE, TRUE, TRUE, FALSE, TRUE, TRUE)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
I want to create a bar chart similar to this with the percentage of rows answering true for each column.
Is this possible?
Pivot to long, then compute n and percentage using sum()
and mean()
:
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
dat |>
pivot_longer(everything()) |>
summarize(
n = sum(value),
lab = paste0(n, " (", percent(mean(value)), ")"),
.by = name
) |>
ggplot(aes(n, name)) +
geom_col(fill = "darkmagenta") +
geom_text(aes(label = lab), nudge_x = 1) +
scale_x_continuous(limits = c(0, 11.5), breaks = seq(0, 10, by = 2)) +
labs(x = NULL, y = NULL) +
theme_minimal()