rggplot2ridgeline-plot

How to make a ridgeline plot heights to reflect the frequency/count of data points?


I have made a ridgeline plot that displays the density of each subset of my data. However, my intention is to expand its interpretation so that the height of each ridge line is proportional to the number of data points that fall within each subset of data. Maybe someone can let me know, but it seems that this functionality is no longer present in the package ggridges. Browsing along on the internet, it seems people could use counts for scaling in the past, but I can't make it work now. Thank you

library(ggplot2)
library(dplyr)
library(ggridges)

plot_body_mass_ridgeline <- function(file_female, file_male,
                                     wild_female_avg, wild_male_avg) {
  
  # 1. Read both Excel files
  df_f <- read_excel(file_female) %>%
    mutate(Sex = "Female", wild_ref = wild_female_avg)
  df_m <- read_excel(file_male) %>%
    mutate(Sex = "Male", wild_ref = wild_male_avg)
  
  # 2. Combine
  df <- bind_rows(df_f, df_m)
  
  # 3. Classify relative deviation
  df <- df %>%
    mutate(ratio = avg_body_mass / wild_ref,
           category = case_when(
             ratio < 0.75                     ~ "Underweight",
             ratio >= 0.75 & ratio <= 1.25    ~ "Healthy BM",
             ratio >  1.25 & ratio <= 1.50    ~ "Overweight",
             ratio >  1.50 & ratio <= 1.75    ~ "Obese",
             ratio >  1.75                    ~ "Morbidly obese"
           ),
           category = factor(category, 
                             levels = c("Underweight",
                                        "Healthy BM",
                                        "Overweight",
                                        "Obese",
                                        "Morbidly obese")))
  
  # 4. Species
  species <- unique(df$binSpecies)
  plot_title <- paste(species)
  
  # 5. Counts per sex & category
  counts <- df %>%
    group_by(Sex, category) %>%
    summarise(n = n(), .groups = "drop")
  print(counts)
  
  
  # 6. Ridgeline plot with facets (Male block, Female block)
  p <- ggplot(df, aes(x = avg_body_mass, y = category, fill = category)) +
    stat_density_ridges(alpha = 0.7, scale = 2, quantile_lines = TRUE, quantiles = 2, rel_min_height = 0.0125, color = "black") +
    #geom_density_ridges(alpha = 0.7, scale = counts, quantile_lines = TRUE, quantiles = 2, rel_min_height = 0.0125, color = "black")+
    geom_point(position = position_jitter(height = 0.1), size = 1.5, alpha = 0.7) +
    facet_grid(Sex ~ ., scales = "free_y", switch = "y") +   # group by sex, labels on left
    labs(
      title = plot_title,
      x = "Body Mass (Kg)",
      y = ""
    ) +
    scale_fill_manual(values = c(
      "Underweight" = "steelblue",
      "Healthy BM" = "green2",
      "Overweight" = "gold",
      "Obese" = "darkorange",
      "Morbidly obese" = "brown"
    )) +
    theme_minimal(base_size = 14) +
    theme(
      legend.position = "bottom",
      axis.text.x = element_text(face = "bold"),
      strip.placement = "outside",
      strip.text.y.left = element_text(size = 14, face = "bold")
    )
  
  # 7. Add counts at peaks
  dens_peaks <- df %>%
    group_by(Sex, category) %>%
    summarise(xpos = median(avg_body_mass), .groups = "drop") %>%
    left_join(counts, by = c("Sex", "category"))
  
  p <- p + 
    geom_text(data = dens_peaks,
              aes(x = xpos, y = category, label = paste0("N = ", n)),
              inherit.aes = FALSE,
              vjust = 2, hjust = -0.25, size = 4, fontface = "bold")
  
  # 8. Add wild average reference lines per sex
  wild_df <- tibble(
    Sex = c("Female", "Male"),
    wild_ref = c(wild_female_avg, wild_male_avg)
  )
  
  p <- p +
    geom_vline(data = wild_df, aes(xintercept = wild_ref),
               linetype = "dashed", color = "black", size = 1)+
    #annotate("text", x = 70, y = "Healthy BM", label = "Wild avg 70kg", angle = 90)+
    scale_x_continuous(
      breaks = seq(0, 300, by = 20)
      #limits = c(0, 300)   # optional, if you want to fix the visible range
    )
  
  print(p)
}
df_sample <- tibble::tibble(
  avg_body_mass = c(50, 60, 64, 70, 72, 80, 90, 120, 130, 140, 150, 160, 170, 175, 189, 190, 193, 200, 234, 235, 290, 260),
  binSpecies = rep("T.killia", 22),
  Sex = rep(c("Female","Male"), each = 11)
)

#wild reference values
wild_female_avg <- 70
wild_male_avg   <- 130

Figure of my plot: enter image description here


Solution

  • We could put the scale parameter in aes() to vary it by count.

    library(dplyr)
    ggplot(df |> mutate(grp_n = sum(n), .by = c(Sex, category)), 
             aes(x = avg_body_mass, y = category, fill = category)) +
      stat_density_ridges(alpha = 0.7, 
                          aes(scale = grp_n/max(grp_n)*2),
                          quantile_lines = TRUE, quantiles = 2, 
                          rel_min_height = 0.0125, color = "black") + ...
    

    enter image description here