rggplot2visualizationr-forestplot

Customize top-down order of odds ratios bars in forest plot using ggplot2


I have been working on a forest plot in R using ggplot2, and for the life of me, I cannot get the odds ratios in the proper order. I would like them from top down to be "Unadjusted", "Adjusted for family factors", "Adjusted for family and self-functioning factors", "Adjusted for family, self-functioning, and socio-demographic factors", as in the dataset. However, the plot shows up like this: enter image description here

Is there a way to simply REVERSE the order? I've tried every solution chatGPT has to offer and this is the closest I could get to the order I want (every other attempt jumbled up in the order).

Here is my code:

library(devtools)
library(ggplot2)
library(stringr)
library(dplyr)
library(forcats)

# Create a data frame with your odds ratios and confidence intervals
odds_ratios <- c(1.290, 1.217, 1.231, 1.244)
ci_lower <- c(1.107, 1.038, 1.019, 1.030)
ci_upper <- c(1.504, 1.427, 1.486, 1.502)
Model <- c("Unadjusted", "Adjusted for family factors", "Adjusted for family and self-functioning factors", "Adjusted for family, self-functioning, and socio-demographic factors")

# Create a data frame
data <- data.frame(
  Model = Model,
  Odds_Ratio = odds_ratios,
  CI_lower = ci_lower,
  CI_upper = ci_upper
)

# Define the desired order of levels
desired_order <- c("Unadjusted", "Adjusted for family factors", "Adjusted for family and self-functioning factors", "Adjusted for family, self-functioning, and socio-demographic factors")

# Reorder the data frame rows based on the desired order
data <- data %>%
  mutate(Model = factor(Model, levels = desired_order)) %>%
  arrange(Model)

# Add a row number column
data$row_num <- 1:nrow(data)

# Define custom breaks and labels for the x-axis
custom_breaks <- c(0.5, 1, 1.2, 1.5)
custom_labels <- c(" ", "Just as likely", "1.2x as likely", "1.5x as likely")

# Add significance column
data$Significance <- ifelse(data$CI_lower > 1 | data$CI_upper < 1, "Significant", "Not Significant")

# Wrap long labels
data$Model <- str_wrap(data$Model, width = 30)

# Create the forest plot with customized labels
ggplot(data, aes(x = Odds_Ratio, y = reorder(Model, row_num, reverse = TRUE))) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "gray") +
  geom_point(aes(x = Odds_Ratio, color = Significance), size = 3) +
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.3) +
  scale_x_continuous(breaks = custom_breaks, labels = custom_labels) +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  theme_minimal() +
  labs(x = "Odds Ratio", y = " ") +
  ggtitle(paste("Odds Ratios for Family Affectedness of Individual Chronic Pain:", "\n", "Women vs. Men")) +
  theme(
    axis.title.y = element_blank(),
    axis.text = element_text(size = 10),
    panel.grid.major.y = element_line(color = "gray", linetype = "dashed"),
    legend.position = "none",
    plot.title = element_text(hjust = 0.5, lineheight = 1.2)
  )

Thank you so much in advance!


Solution

  • If you make "Model" a factor you can use fct_rev() from the forcats package (https://forcats.tidyverse.org/reference/fct_rev.html) to change the order, e.g.

    library(tidyverse)
    
    # Create a data frame with your odds ratios and confidence intervals
    odds_ratios <- c(1.290, 1.217, 1.231, 1.244)
    ci_lower <- c(1.107, 1.038, 1.019, 1.030)
    ci_upper <- c(1.504, 1.427, 1.486, 1.502)
    Model <- c("Unadjusted", "Adjusted for family factors", "Adjusted for family and self-functioning factors", "Adjusted for family, self-functioning, and socio-demographic factors")
    
    # Create a data frame
    data <- data.frame(
      Model = Model,
      Odds_Ratio = odds_ratios,
      CI_lower = ci_lower,
      CI_upper = ci_upper
    )
    
    # Define the desired order of levels
    desired_order <- c("Unadjusted", "Adjusted for family factors", "Adjusted for family and self-functioning factors", "Adjusted for family, self-functioning, and socio-demographic factors")
    
    # Reorder the data frame rows based on the desired order
    data <- data %>%
      mutate(Model = factor(Model, levels = desired_order)) %>%
      arrange(Model)
    
    # Add a row number column
    data$row_num <- 1:nrow(data)
    
    # Define custom breaks and labels for the x-axis
    custom_breaks <- c(0.5, 1, 1.2, 1.5)
    custom_labels <- c(" ", "Just as likely", "1.2x as likely", "1.5x as likely")
    
    # Add significance column
    data$Significance <- ifelse(data$CI_lower > 1 | data$CI_upper < 1, "Significant", "Not Significant")
    
    # Wrap long labels
    data$Model <- factor(str_wrap(data$Model, width = 30),
                         levels = str_wrap(desired_order, width = 30))
    
    # Create the forest plot with customized labels
    ggplot(data, aes(x = Odds_Ratio, y = fct_rev(Model))) +
      geom_vline(xintercept = 1, linetype = "dashed", color = "gray") +
      geom_point(aes(x = Odds_Ratio, color = Significance), size = 3) +
      geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.3) +
      scale_x_continuous(breaks = custom_breaks, labels = custom_labels) +
      scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
      theme_minimal() +
      labs(x = "Odds Ratio", y = " ") +
      ggtitle(paste("Odds Ratios for Family Affectedness of Individual Chronic Pain:", "\n", "Women vs. Men")) +
      theme(
        axis.title.y = element_blank(),
        axis.text = element_text(size = 10),
        panel.grid.major.y = element_line(color = "gray", linetype = "dashed"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, lineheight = 1.2)
      )
    

    Created on 2024-04-04 with reprex v2.1.0