I am struggling to get the rows in my table to align correctly with their respective IRR points on the plot (e.g., values for A align with the IRR point for A on the plot).
Specifically, I am creating a forest plot that includes a graph and corresponding tables of data. Each point on the forest plot represents an incidence rate ratio (IRR) based on their exposure to a given recreational substance. There are two groups for each substance. The two groups for cannabis exposure are A and B, groups for nicotine are C and D, and groups for alcohol are E and F. The two columns of data have the adjusted and unadjusted IRR values.
Here is my current plot below:
And here is the code:
# Load necessary libraries
library(tibble)
library(ggplot2)
library(dplyr)
library(forcats)
library(stringr)
library(patchwork) # For plot layout
# Define data
res <- tibble(
model = c("Cannabis", "A", "B", "Nicotine", "C", "D", "Alcohol", "E", "F"),
estimate = c(NA, 1.08, 1.01, NA, 1.06, 1.07, NA, 1.09, 1.22),
conf.low = c(NA, 1.02, .99, NA, 1.04, 1.03, NA, 1.07, 1.11),
conf.high = c(NA, 1.14, 1.05, NA, 1.08, 1.11, NA, 1.11, 1.33),
estimate2 = c(NA, 1.09, 1.02, NA, 1.03, 1.06, NA, 1.07, 1.2),
conf.low2 = c(NA, 1.03, .99, NA, 1.01, 1.04, NA, 1.05, 1.1),
conf.high2 = c(NA, 1.15, 1.05, NA, 1.05, 1.08, NA, 1.09, 1.3)
)
# Convert 'model' to a factor with the specified level order and reverse it
res$model <- factor(res$model, levels = rev(c("Cannabis", "A", "B", "Nicotine", "C", "D", "Alcohol", "E", "F")))
# Create forest plot on log scale (middle section of figure)
p_right <- res %>%
ggplot(aes(y = model)) + # Use 'model' as y with the reversed factor
theme_classic() +
# Plot confidence intervals only for non-NA values
geom_linerange(data = subset(res, !is.na(estimate)), aes(xmin = conf.low, xmax = conf.high)) +
# Plot points only for non-NA values
geom_point(data = subset(res, !is.na(estimate)), aes(x = estimate),
color = "black", fill = c("cyan2", "cyan2", "orange", "orange", "purple", "purple"),
shape = 21, size = 3, stroke = 0.5, position = position_dodge(width = 0.5)) +
labs(x = "Incidence Rate Ratio") +
coord_cartesian(ylim = c(1, 9), xlim = c(.5, 1.5)) +
geom_vline(xintercept = 1, linetype = "dashed") +
annotate("text", x = 1.3, y = 9, label = "Higher Risk") +
annotate("text", x = .7, y = 9, label = "Lower Risk") +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size = 10), # Display the y-axis text for readability
axis.title.y = element_blank()
)
# Wrangle results into pre-plotting table form for middle-side labels
res_plot <- res %>%
mutate(
across(c(estimate, conf.low, conf.high), ~ str_pad(round(.x, 2), width = 4, pad = "0", side = "right")),
estimate_lab = paste0(estimate, " (", conf.low, "-", conf.high, ")")
)
# Wrangle results into pre-plotting table form for left-side labels
res_plot2 <- res %>%
mutate(
across(c(estimate2, conf.low2, conf.high2), ~ str_pad(round(.x, 2), width = 4, pad = "0", side = "right")),
estimate_lab2 = paste0(estimate2, " (", conf.low2, "-", conf.high2, ")")
)
# Middle side of plot
p_mid <- res_plot %>%
ggplot(aes(y = model)) +
geom_text(aes(x = 1, label = ifelse(!is.na(estimate), estimate_lab, "")),
hjust = 0) +
theme_void() +
coord_cartesian(xlim = c(0, 4))
# Left side of plot
p_left <- res_plot2 %>%
ggplot(aes(y = model)) +
geom_text(aes(x = 0, label = model), hjust = 0, fontface = "bold") +
# Only show estimate_lab text when it is not NA
geom_text(aes(x = 1, label = ifelse(!is.na(estimate2), estimate_lab2, "")),
hjust = 0) +
theme_void() +
coord_cartesian(xlim = c(0, 4))
# Layout design (top, left, bottom, right)
layout <- c(
area(t = 0, l = 2, b = 10, r = 3),
area(t = 0, l = 2, b = 10, r = 9),
area(t = 0, l = 6, b = 10, r = 11)
)
# Final plot arrangement
final_plot <- p_left + p_mid + p_right + plot_layout(design = layout)
final_plot
Any advice on how to get the points on the plot to align with their rows would be greatly appreciated. Thank you so much!
The issue is that your main plot does not show the header rows of your data. To fix that add + scale_y_discrete(drop = FALSE)
and use the labels=
argument to not show a label for the header rows. Also note that I merged the left and middle plot in one plot.
library(tidyverse)
library(patchwork)
labels_y <- res |>
distinct(model, estimate) |>
mutate(estimate = if_else(is.na(estimate), "", model)) |>
tibble::deframe()
p_right <- res %>%
ggplot(aes(y = model)) + # Use 'model' as y with the reversed factor
theme_classic() +
# Plot confidence intervals only for non-NA values
geom_linerange(data = subset(res, !is.na(estimate)), aes(xmin = conf.low, xmax = conf.high)) +
# Plot points only for non-NA values
geom_point(
data = subset(res, !is.na(estimate)), aes(x = estimate),
color = "black", fill = c("cyan2", "cyan2", "orange", "orange", "purple", "purple"),
shape = 21, size = 3, stroke = 0.5, position = position_dodge(width = 0.5)
) +
scale_y_discrete(
drop = FALSE,
labels = labels_y
) +
labs(x = "Incidence Rate Ratio") +
coord_cartesian(ylim = c(1, 9), xlim = c(.5, 1.5)) +
geom_vline(xintercept = 1, linetype = "dashed") +
annotate("text", x = 1.3, y = 9, label = "Higher Risk") +
annotate("text", x = .7, y = 9, label = "Lower Risk") +
theme(
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size = 10),
axis.title.y = element_blank()
)
# Left side of plot
p_left <- res_plot %>%
ggplot(aes(y = model)) +
geom_text(aes(x = 1, label = ifelse(!is.na(estimate2), estimate_lab, "")),
hjust = 0
) +
scale_x_continuous(expand = c(0, 0)) +
theme_void() +
theme(axis.text.y.left = element_text(face = "bold", hjust = 0)) +
coord_cartesian(xlim = c(0, 4))
# Layout design (top, left, bottom, right)
layout <- c(
area(t = 0, l = 0, b = 10, r = 4),
area(t = 0, l = 5, b = 10, r = 10)
)
# Final plot arrangement
final_plot <- p_left + p_right + plot_layout(design = layout)
final_plot