rggplot2

Add P-value Bracket to Tumor Growth ggplot


I would like to add p-value brackets to a tumor growth curve plot I have made in ggplot. This is a common situation that GraphPad Prism has built in functionality for, but I have not seen any examples of this using ggplot when I searched.

From what I can tell, existing R functions like ggpubr::stat_compare_means, ggpubr::stat_pvalue_manual, or ggprism::add_pvalue seem to rely on the groups being compared to be plotted on the x-axis. However, for tumor growth curves, time is the x-axis, and the groups being compared are specified in the legend.

I have included example figure from a publication that was made using GraphPad Prism as well as what I have generated in ggplot.

I would like to add the p-value brackets in the same manner. Preferably this could be an automatic process, so I am not specifying coordinates for every plot going forward.

Tumor growth curve made in GraphPad Prism with desired p-value brackets

library(tidyverse)
library(scales)

tumorDataExample <- data.frame(
  Mouse = rep(1:15, times = 5),
  Condition = rep(c("A", "B", "C"), each = 5, times = 5),
  Day = rep(c(0, 8, 10, 11, 14), each = 15),
  Volume = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
             55.419392, 63.819999, 9.622816, 43.9999665, 34.792254, 5.470524, 23.516325, 
             49.9453705, 9.7540625, 16.9136, 13.1383, 36.872528, 18.108342, 48.700748, 
             13.197388, 86.979584, 76.9613405, 36.452668, 20.9408, 26.978522, 24.628864, 
             6.397264, 132.420195, 18.3314305, 11.1797745, 5.0688, 29.738592, 17.87125, 
             32.8653, 19.9408, 135.7811505, 99.900178, 60.76815, 25.6632, 33.519087, 
             25.1935245, 12.581856, 294.5889, 38.69775, 15.056676, 7.7533775, 21.625065, 
             19.62, 55.118336, 16.1063615, 200.485611, 162.4776305, 104.56355, 40.106664, 
             66.2195385, 16.143108, 4.2439, 886.51492, 17.37468, 15.867072, 7.791552, 
             11.3016465, 37.626644, 74.365425, 4.080501)
)

tumorDataExampleSummary <- tumorDataExample %>%
  group_by(Day, Condition) %>%
  summarize(meanVolume = mean(Volume, na.rm = TRUE),
            se = sd(Volume) / sqrt(n()))

tumorSizeExample_Summary <- ggplot() +
  geom_line(data = tumorDataExampleSummary, aes(x = Day, y = meanVolume, color = Condition)) +
  geom_errorbar(data = tumorDataExampleSummary, aes(x = Day, ymin = meanVolume - se, ymax = meanVolume + se, color = Condition), width = 0.1) +
  geom_point(data = tumorDataExampleSummary, aes(x = Day, y = meanVolume, color = Condition)) +
  scale_y_continuous(breaks = pretty_breaks(n=8)) +
  scale_x_continuous(breaks = pretty_breaks(n=3)) +
  labs(title = "Tumor Volumes Over Time",
       x = "Days After Tumor Injection",
       y = expression("Tumor Volume (mm"^3*")")) +
  theme_minimal()

tumorSizeExample_Summary

Tumor Growth Curve made with ggplot


Solution

  • This code works as expected. It runs a Student's T test and plots it with brackets on the last day of measurements.

    library(tidyverse)
    library(scales)
    library(gtools)
    
    
    tumorDataExample <- data.frame(
      Mouse = rep(1:15, times = 5),
      Condition = rep(c("A", "B", "C"), each = 5, times = 5),
      Day = rep(c(0, 8, 10, 11, 14), each = 15),
      Volume = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                 55.419392, 63.819999, 9.622816, 43.9999665, 34.792254, 5.470524, 23.516325, 
                 49.9453705, 9.7540625, 16.9136, 13.1383, 36.872528, 18.108342, 48.700748, 
                 13.197388, 86.979584, 76.9613405, 36.452668, 20.9408, 26.978522, 24.628864, 
                 6.397264, 132.420195, 18.3314305, 11.1797745, 5.0688, 29.738592, 17.87125, 
                 32.8653, 19.9408, 135.7811505, 99.900178, 60.76815, 25.6632, 33.519087, 
                 25.1935245, 12.581856, 294.5889, 38.69775, 15.056676, 7.7533775, 21.625065, 
                 19.62, 55.118336, 16.1063615, 200.485611, 162.4776305, 104.56355, 40.106664, 
                 66.2195385, 16.143108, 4.2439, 886.51492, 17.37468, 15.867072, 7.791552, 
                 11.3016465, 37.626644, 74.365425, 4.080501)
    )
    
    #Create a summary table of the mean volume for each condition
    tumorDataExampleSummary <- tumorDataExample %>%
      group_by(Day, Condition) %>%
      summarize(meanVolume = mean(Volume, na.rm = TRUE),
                se = sd(Volume) / sqrt(n()))
    
    #Run a Student's T test on all the conditions
    stat.test <- compare_means(
      Volume ~ Condition, data = tumorDataExample %>% filter(Day == max(tumorDataExample$Day)),
      method = "t.test",
      var.equal = TRUE #Student's T test
    )
    stat.test
    
    
    #Create a list of all possible pairwise comparisons of Conditions
    paired_columns <- gtools::combinations(n = 3, r = 2, v = unique(tumorDataExample$Condition) , 
                         repeats.allowed = FALSE) 
    
    #Create a list of the p_values for each comparison in the order of the paired_columns
    p_value_labs <- lapply(seq_along(unique(tumorDataExample$Condition)), function(i){
      paste0(round(stat.test$p[i], 3))
    }) %>% unlist()
    
    #Create a table of x positions for the brackets
    #These will really be y values, but the coordinates are flipped in the plot
    #This uses the meanVolume of each Condition for the last day of measurements
    pvalue_bracket_xpos <- tumorDataExampleSummary  %>%
      filter(Day == max(tumorDataExampleSummary$Day)) %>%
      pivot_wider(id_cols = Day, names_from = Condition, 
                  values_from = meanVolume)
    
    #Create the plot
    #x and y have to be flipped in aes() for the brackets to work
    ggplot() +
      geom_path(data = tumorDataExampleSummary %>% arrange(Day), 
                aes(y = Day, x = meanVolume, color = Condition)) + #Need to use geom_path instead of geom_line
                                                                  #Otherwise the lines will be connected in tumor volume order, not day
      geom_errorbarh(data = tumorDataExampleSummary, 
                     aes(y = Day, xmin = meanVolume - se, 
                         xmax = meanVolume + se, color = Condition), 
                     height = 0.1) +
      geom_point(data = tumorDataExampleSummary, 
                 aes(y = Day, x = meanVolume, color = Condition)) +
      labs(title = "Tumor Volumes Over Time",
           y = "Days After Tumor Injection",
           x = expression("Tumor Volume (mm"^3*")")) +
      theme_minimal() +
      coord_flip() + ## x and y are flipped in aes(), so we flip them again
      ggpubr::geom_bracket(inherit.aes = FALSE, label = p_value_labs, 
                           coord.flip = TRUE, 
                           xmin = unlist(pvalue_bracket_xpos[,paired_columns[,1]]), 
                           xmax = unlist(pvalue_bracket_xpos[,paired_columns[,2]]), 
                           y.position = pvalue_bracket_xpos$Day + 0.3,
                           step.increase = 0.03,
                           tip.length = 0.02,
                           vjust = 0.25) +
      scale_x_continuous(breaks = pretty_breaks(n=8)) +
      scale_y_continuous(breaks = pretty_breaks(n=3))