With my below toy data and example code, I am trying to make two faceted plots that are identical (other than colors and their x values).
I want to make a histogram and overlay a density curve on top. When I do this, as below, it seems to replace the geom_histogram y axis (which should be counts, if I were using geom_histogram on its own) with the density range.
Is there a way to force the y axis to show the original counts from the histogram?
Second-order question is if anyone has advice on how to subsequently make the y axes identical in their range across the two plots, so I don't have to repeat the y axis ticks/labels in the right-hand plot, that would be excellent. Thanks.
library(patchwork)
library(tidyverse)
p1 <- ggplot(d, aes(x = hours)) +
geom_histogram(aes(y = ..density..),
binwidth = 10,
fill = "goldenrod", alpha = 0.3, color = "black") +
geom_density(aes(y = ..density..),
color = "dodgerblue3", lwd = 1) +
facet_wrap(~condition) +
scale_x_continuous(breaks = seq(10, 90, by = 10),
expand = c(0, 0)) +
labs(title = "Hours")
p2 <- ggplot(d, aes(x = money)) +
geom_histogram(aes(y = ..density..),
binwidth = 10,
fill = "palegreen2", alpha = 0.3, color = "black") +
geom_density(aes(y = ..density..),
color = "pink2", lwd = 1) +
facet_wrap(~condition) +
scale_x_continuous(breaks = seq(10, 90, by = 10),
expand = c(0, 0)) +
labs(title = "Money",
y = NULL)
p1 + p2
d<-structure(list(hours = c(70, 10, 20, 10, 50, 10,
50, 60, 50, 70, 40, 90, 40, 60, 70, 40, 50, 60, 80, 10, 90, 50,
40, 40, 70, 50, 40, 10, 80, 70, 70, 90, 20, 10, 60, 90, 10, 20,
90, 70, 30, 60, 90, 60, 90, 20, 20, 20, 10, 40), money = c(60,
70, 10, 10, 40, 30, 40, 40, 50, 80, 90, 70, 50, 60, 80, 20, 10,
90, 20, 40, 90, 40, 30, 60, 40, 60, 70, 90, 10, 20, 80, 90, 80,
60, 70, 70, 60, 50, 60, 90, 90, 80, 60, 40, 30, 80, 30, 20, 60,
20), condition = structure(c(4L, 5L, 5L, 5L, 5L, 2L, 4L, 4L,
2L, 3L, 5L, 3L, 5L, 4L, 3L, 5L, 2L, 3L, 4L, 3L, 2L, 3L, 3L, 2L,
4L, 4L, 2L, 3L, 2L, 5L, 3L, 5L, 2L, 4L, 5L, 2L, 5L, 2L, 2L, 3L,
5L, 2L, 3L, 5L, 5L, 5L, 4L, 2L, 3L, 5L), levels = c("condition_control",
"conditionA", "conditionB", "conditionC", "conditionD"
), class = "factor")), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -50L), groups = structure(list(
condition = structure(2:5, levels = c("condition_control",
"conditionA", "conditionB", "conditionC", "conditionD"
), class = "factor"), .rows = structure(list(c(6L, 9L, 17L,
21L, 24L, 27L, 29L, 33L, 36L, 38L, 39L, 42L, 48L), c(10L,
12L, 15L, 18L, 20L, 22L, 23L, 28L, 31L, 40L, 43L, 49L), c(1L,
7L, 8L, 14L, 19L, 25L, 26L, 34L, 47L), c(2L, 3L, 4L, 5L,
11L, 13L, 16L, 30L, 32L, 35L, 37L, 41L, 44L, 45L, 46L, 50L
)), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-4L), .drop = TRUE))
Replace every occurrence of ..density..
with after_stat(density * nrow(d))
to get counts instead of density (i.e., multiply by the total number of observations). Surrounding with ..
is now deprecated in favor of after_stat
.
p1 <- ggplot(d, aes(x = hours)) +
geom_histogram(aes(y = after_stat(density * nrow(d))),
binwidth = 10,
fill = "goldenrod", alpha = 0.3, color = "black") +
geom_density(aes(y = after_stat(density * nrow(d))),
color = "dodgerblue3", lwd = 1) +
facet_wrap(~condition) +
scale_x_continuous(breaks = seq(10, 90, by = 10),
expand = c(0, 0)) +
labs(title = "Hours")
p1
p2 <- ggplot(d, aes(x = money)) +
geom_histogram(aes(y = after_stat(density * nrow(d))),
binwidth = 10,
fill = "palegreen2", alpha = 0.3, color = "black") +
geom_density(aes(y = after_stat(density * nrow(d))),
color = "pink2", lwd = 1) +
facet_wrap(~condition) +
scale_x_continuous(breaks = seq(10, 90, by = 10),
expand = c(0, 0)) +
labs(title = "Money",
y = NULL)
p1 + p2
To align the y-axes, you could set the y-limits of both plots to be equal using an approach modified from this question:
# get the y-axis limits of p1 and p2
ranges <- list(p1, p2) |>
map(layer_scales) |>
map(list("y", "range", "range")) |>
unlist()
# get the maximum extents of the two ranges
lims <- c(min(ranges), max(ranges))
# set each plot's y-limits to be the same
(p1 + ylim(lims)) + (p2 + ylim(lims))
Obviously this leaves some cleaning up of the axis and tick labels to be done.