I have made a ridgeline plot that displays the density of each subset of my data. However, my intention is to expand its interpretation so that the height of each ridge line is proportional to the number of data points that fall within each subset of data. Maybe someone can let me know, but it seems that this functionality is no longer present in the package ggridges. Browsing along on the internet, it seems people could use counts for scaling in the past, but I can't make it work now. Thank you
library(ggplot2)
library(dplyr)
library(ggridges)
plot_body_mass_ridgeline <- function(file_female, file_male,
wild_female_avg, wild_male_avg) {
# 1. Read both Excel files
df_f <- read_excel(file_female) %>%
mutate(Sex = "Female", wild_ref = wild_female_avg)
df_m <- read_excel(file_male) %>%
mutate(Sex = "Male", wild_ref = wild_male_avg)
# 2. Combine
df <- bind_rows(df_f, df_m)
# 3. Classify relative deviation
df <- df %>%
mutate(ratio = avg_body_mass / wild_ref,
category = case_when(
ratio < 0.75 ~ "Underweight",
ratio >= 0.75 & ratio <= 1.25 ~ "Healthy BM",
ratio > 1.25 & ratio <= 1.50 ~ "Overweight",
ratio > 1.50 & ratio <= 1.75 ~ "Obese",
ratio > 1.75 ~ "Morbidly obese"
),
category = factor(category,
levels = c("Underweight",
"Healthy BM",
"Overweight",
"Obese",
"Morbidly obese")))
# 4. Species
species <- unique(df$binSpecies)
plot_title <- paste(species)
# 5. Counts per sex & category
counts <- df %>%
group_by(Sex, category) %>%
summarise(n = n(), .groups = "drop")
print(counts)
# 6. Ridgeline plot with facets (Male block, Female block)
p <- ggplot(df, aes(x = avg_body_mass, y = category, fill = category)) +
stat_density_ridges(alpha = 0.7, scale = 2, quantile_lines = TRUE, quantiles = 2, rel_min_height = 0.0125, color = "black") +
#geom_density_ridges(alpha = 0.7, scale = counts, quantile_lines = TRUE, quantiles = 2, rel_min_height = 0.0125, color = "black")+
geom_point(position = position_jitter(height = 0.1), size = 1.5, alpha = 0.7) +
facet_grid(Sex ~ ., scales = "free_y", switch = "y") + # group by sex, labels on left
labs(
title = plot_title,
x = "Body Mass (Kg)",
y = ""
) +
scale_fill_manual(values = c(
"Underweight" = "steelblue",
"Healthy BM" = "green2",
"Overweight" = "gold",
"Obese" = "darkorange",
"Morbidly obese" = "brown"
)) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
axis.text.x = element_text(face = "bold"),
strip.placement = "outside",
strip.text.y.left = element_text(size = 14, face = "bold")
)
# 7. Add counts at peaks
dens_peaks <- df %>%
group_by(Sex, category) %>%
summarise(xpos = median(avg_body_mass), .groups = "drop") %>%
left_join(counts, by = c("Sex", "category"))
p <- p +
geom_text(data = dens_peaks,
aes(x = xpos, y = category, label = paste0("N = ", n)),
inherit.aes = FALSE,
vjust = 2, hjust = -0.25, size = 4, fontface = "bold")
# 8. Add wild average reference lines per sex
wild_df <- tibble(
Sex = c("Female", "Male"),
wild_ref = c(wild_female_avg, wild_male_avg)
)
p <- p +
geom_vline(data = wild_df, aes(xintercept = wild_ref),
linetype = "dashed", color = "black", size = 1)+
#annotate("text", x = 70, y = "Healthy BM", label = "Wild avg 70kg", angle = 90)+
scale_x_continuous(
breaks = seq(0, 300, by = 20)
#limits = c(0, 300) # optional, if you want to fix the visible range
)
print(p)
}
df_sample <- tibble::tibble(
avg_body_mass = c(50, 60, 64, 70, 72, 80, 90, 120, 130, 140, 150, 160, 170, 175, 189, 190, 193, 200, 234, 235, 290, 260),
binSpecies = rep("T.killia", 22),
Sex = rep(c("Female","Male"), each = 11)
)
#wild reference values
wild_female_avg <- 70
wild_male_avg <- 130
We could put the scale
parameter in aes()
to vary it by count.
library(dplyr)
ggplot(df |> mutate(grp_n = sum(n), .by = c(Sex, category)),
aes(x = avg_body_mass, y = category, fill = category)) +
stat_density_ridges(alpha = 0.7,
aes(scale = grp_n/max(grp_n)*2),
quantile_lines = TRUE, quantiles = 2,
rel_min_height = 0.0125, color = "black") + ...