rggplot2geom-histogram

Y axis to show counts with geom_density and geom_histogram combined in ggplot


With my below toy data and example code, I am trying to make two faceted plots that are identical (other than colors and their x values).

I want to make a histogram and overlay a density curve on top. When I do this, as below, it seems to replace the geom_histogram y axis (which should be counts, if I were using geom_histogram on its own) with the density range.

Is there a way to force the y axis to show the original counts from the histogram?

Second-order question is if anyone has advice on how to subsequently make the y axes identical in their range across the two plots, so I don't have to repeat the y axis ticks/labels in the right-hand plot, that would be excellent. Thanks.

library(patchwork)
library(tidyverse)

p1 <- ggplot(d, aes(x = hours)) +
  geom_histogram(aes(y = ..density..), 
                binwidth = 10, 
                fill = "goldenrod", alpha = 0.3, color = "black") +
  geom_density(aes(y = ..density..), 
               color = "dodgerblue3", lwd = 1) +  
  facet_wrap(~condition) +
  scale_x_continuous(breaks = seq(10, 90, by = 10),
                     expand = c(0, 0)) +
  labs(title = "Hours")

p2 <- ggplot(d, aes(x = money)) +
  geom_histogram(aes(y = ..density..), 
                binwidth = 10, 
                fill = "palegreen2", alpha = 0.3, color = "black") +
  geom_density(aes(y = ..density..), 
               color = "pink2", lwd = 1) +  
  facet_wrap(~condition) +
  scale_x_continuous(breaks = seq(10, 90, by = 10),
                     expand = c(0, 0)) +
  labs(title = "Money",
       y = NULL)

p1 + p2
d<-structure(list(hours = c(70, 10, 20, 10, 50, 10, 
50, 60, 50, 70, 40, 90, 40, 60, 70, 40, 50, 60, 80, 10, 90, 50, 
40, 40, 70, 50, 40, 10, 80, 70, 70, 90, 20, 10, 60, 90, 10, 20, 
90, 70, 30, 60, 90, 60, 90, 20, 20, 20, 10, 40), money = c(60, 
70, 10, 10, 40, 30, 40, 40, 50, 80, 90, 70, 50, 60, 80, 20, 10, 
90, 20, 40, 90, 40, 30, 60, 40, 60, 70, 90, 10, 20, 80, 90, 80, 
60, 70, 70, 60, 50, 60, 90, 90, 80, 60, 40, 30, 80, 30, 20, 60, 
20), condition = structure(c(4L, 5L, 5L, 5L, 5L, 2L, 4L, 4L, 
2L, 3L, 5L, 3L, 5L, 4L, 3L, 5L, 2L, 3L, 4L, 3L, 2L, 3L, 3L, 2L, 
4L, 4L, 2L, 3L, 2L, 5L, 3L, 5L, 2L, 4L, 5L, 2L, 5L, 2L, 2L, 3L, 
5L, 2L, 3L, 5L, 5L, 5L, 4L, 2L, 3L, 5L), levels = c("condition_control", 
"conditionA", "conditionB", "conditionC", "conditionD"
), class = "factor")), class = c("grouped_df", "tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -50L), groups = structure(list(
    condition = structure(2:5, levels = c("condition_control", 
    "conditionA", "conditionB", "conditionC", "conditionD"
    ), class = "factor"), .rows = structure(list(c(6L, 9L, 17L, 
    21L, 24L, 27L, 29L, 33L, 36L, 38L, 39L, 42L, 48L), c(10L, 
    12L, 15L, 18L, 20L, 22L, 23L, 28L, 31L, 40L, 43L, 49L), c(1L, 
    7L, 8L, 14L, 19L, 25L, 26L, 34L, 47L), c(2L, 3L, 4L, 5L, 
    11L, 13L, 16L, 30L, 32L, 35L, 37L, 41L, 44L, 45L, 46L, 50L
    )), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
    "list"))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-4L), .drop = TRUE))

Solution

  • Replace every occurrence of ..density.. with after_stat(density * nrow(d)) to get counts instead of density (i.e., multiply by the total number of observations). Surrounding with .. is now deprecated in favor of after_stat.

    p1 <- ggplot(d, aes(x = hours)) +
      geom_histogram(aes(y = after_stat(density * nrow(d))), 
                     binwidth = 10, 
                     fill = "goldenrod", alpha = 0.3, color = "black") +
      geom_density(aes(y = after_stat(density * nrow(d))), 
                   color = "dodgerblue3", lwd = 1) +  
      facet_wrap(~condition) +
      scale_x_continuous(breaks = seq(10, 90, by = 10),
                         expand = c(0, 0)) +
      labs(title = "Hours")
    p1
    
    p2 <- ggplot(d, aes(x = money)) +
      geom_histogram(aes(y = after_stat(density * nrow(d))), 
                     binwidth = 10, 
                     fill = "palegreen2", alpha = 0.3, color = "black") +
      geom_density(aes(y = after_stat(density * nrow(d))), 
                   color = "pink2", lwd = 1) +  
      facet_wrap(~condition) +
      scale_x_continuous(breaks = seq(10, 90, by = 10),
                         expand = c(0, 0)) +
      labs(title = "Money",
           y = NULL)
    
    p1 + p2
    

    enter image description here

    To align the y-axes, you could set the y-limits of both plots to be equal using an approach modified from this question:

    # get the y-axis limits of p1 and p2
    ranges <- list(p1, p2) |>
      map(layer_scales) |> 
      map(list("y", "range", "range")) |>
      unlist()
    # get the maximum extents of the two ranges
    lims <- c(min(ranges), max(ranges))
    # set each plot's y-limits to be the same
    (p1 + ylim(lims)) + (p2 + ylim(lims))
    

    enter image description here

    Obviously this leaves some cleaning up of the axis and tick labels to be done.