rggplot2statggpubr

Error with adding p-values using stat_pvalue_manual into a ggplot


I have a problem with stat_pvalue_manual. They showed an error whenever I ran it with stat_pvalue_manual function. I tried to use stat_compare_means, but it is wrong with p.adj value, so I need to come back with stat_pvalue_manual. Below is my script and data. I hope some one can help me with this issue. Thank you very much!

# Create vectors for each variable
medium <- c(
  "MM", "MM", "MM", "MM", "MM", "MM",
  "MM", "MM", "MM", "MM", "MM",
  "MM", "MM", "MM", "MM", "MM",
  "MMGA", "MMGA", "MMGA", "MMGA", "MMGA", "MMGA",
  "MMGA", "MMGA", "MMGA", "MMGA", "MMGA"
)

strains <- c(
  "A", "A", "A", "A", "A", "A",
  "B", "B", "B", "B", "B",
  "C", "C", "C", "C", "C",
  "A", "A", "A", "A", "A", "A",
  "B", "B", "B", "B", "B"
)

value <- c(
  0.642396224, 0.642973791, 0.560378425, 0.844780865, 1.202418689, 2.107052006,
  0.286002062, 0.207255769, 0.121489854, 0.149735456, 0.292497354,
  0.188256332, 0.181433285, 0.407696852, 0.37862504, 0.891960999, 0.465748762,
  1.003103112, 1.000750442, 1.001402757, 1.006722153, 0.996919936, 0.991101601,
  0.997623183, 0.994042211, 0.999101239, 0.996893329, 1.000764356
)

# Create a dataframe
ggdat <- data.frame(medium, strains, value)

Error: ! Problem while computing aesthetics. ℹ Error occurred in the 4th layer. Caused by error in FUN(): ! object 'strains' not found Run rlang::last_trace() to see where the error occurred. Warning message: Duplicated aesthetics after name standardisation: size

#####################
library(ggprism)
library(readxl)
library(ggplot2)
library(RColorBrewer)
library(ggpubr)
library(rstatix)

ggdat <- read.csv("ggdat_demo.csv")

#calculate p-val
stat.test <- ggdat %>%
  group_by(medium) %>%
  t_test(value ~ strains) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance() %>%   
  add_xy_position(x = "strains", dodge = 0.8)

#data for mean and sd
df.summary <- ggdat %>%  group_by(strains, medium) %>%
  summarise(sd = sd(value),    value = mean(value)) # generate mean and sd for col plot

ggdat$strains <- factor(ggdat$strains, levels = c("A","B", "C", "D"))
df.summary$strains <- factor(df.summary$strains, levels = c("A","B", "C", "D"))

#graph eror when I add p value
ggplot(ggdat, aes(strains, value, fill=strains)) +
  facet_grid(. ~ medium) + 
  geom_col(data = df.summary, position = position_dodge(0.8), width = 0.7, alpha=1) +
  geom_errorbar(data = df.summary, aes(ymin = value-sd, ymax = value+sd), 
                color="#282828",width = 0.4, position = position_dodge(0.8)) + 
  geom_point(size=2, position = position_jitterdodge(jitter.width = .5, dodge.width = .8),
             cex = 3, alpha=.8,  color="#282828", shape=21, stroke = .75) +
 # add_pvalue(stat.test, label = "p.adj") +  
  stat_pvalue_manual(stat.test,  label = "p.adj", tip.length = 0) +
  theme(legend.text = element_text(size = 12), legend.title = element_text(size = 12),
        legend.position = "top",
        axis.title.y = element_text(face = "plain", size = 12, color = "black", margin = margin(0,0,0,0,"cm")),
        axis.title.x = element_text(face = "plain", size = 12, color = "black", margin = margin(0,0,0,0,"cm")),
        axis.text.x = element_text(face = "plain", size = 12, color = "black", angle = 35, vjust = 1, hjust = 1, margin = margin(0,0,0.2,0,"cm")),#
        axis.text.y = element_text(face = "plain", size = 12, color = "black", angle = 0, margin = margin(0,0,0,0.2,"cm"))) +
  scale_y_continuous(limits = c(0,3), breaks = c(0, 0.5,1, 1.5, 2, 2.5))+
  scale_color_manual(name= "Strains",values = c("#E6A226", "#BABBBB", "#21BDC2", "#7AD1ED"),labels=c("A","B", "C", "D")) +
  scale_fill_manual(name= "Strains",values = c("#E6A226", "#BABBBB", "#21BDC2", "#7AD1ED"), labels=c("A","B", "C", "D")) +
  guides(fill=guide_legend(nrow=1,byrow=TRUE),  color=guide_legend(nrow=1, byrow=TRUE))+
  theme_bw()

Solution

  • There's a few issues to address, but the biggest issue (as far as I can tell) is using 'global' aes() with different data sources. If you specify aesthetics separately for each geometry you get an output, e.g.

    library(tidyverse)
    library(RColorBrewer)
    library(ggpubr)
    library(rstatix)
    #> 
    #> Attaching package: 'rstatix'
    #> The following object is masked from 'package:stats':
    #> 
    #>     filter
    
    # Create vectors for each variable
    medium <- c(
      "MM", "MM", "MM", "MM", "MM", "MM",
      "MM", "MM", "MM", "MM", "MM",
      "MM", "MM", "MM", "MM", "MM",
      "MMGA", "MMGA", "MMGA", "MMGA", "MMGA", "MMGA",
      "MMGA", "MMGA", "MMGA", "MMGA", "MMGA"
    )
    
    strains <- c(
      "A", "A", "A", "A", "A", "A",
      "B", "B", "B", "B", "B",
      "C", "C", "C", "C", "C",
      "A", "A", "A", "A", "A", "A",
      "B", "B", "B", "B", "B"
    )
    
    value <- c(
      0.642396224, 0.642973791, 0.560378425, 0.844780865, 1.202418689, 2.107052006,
      0.286002062, 0.207255769, 0.121489854, 0.149735456, 0.292497354,
      0.188256332, 0.181433285, 0.407696852, 0.37862504, 0.891960999, 0.465748762,
      1.003103112, 1.000750442, 1.001402757, 1.006722153, 0.996919936, 0.991101601,
      0.997623183, 0.994042211, 0.999101239, 0.996893329
    )
    
    # Create a dataframe
    ggdat <- data.frame(medium, strains, value)
    
    #calculate p-val
    stat.test <- ggdat %>%
      group_by(medium) %>%
      t_test(value ~ strains) %>%
      adjust_pvalue(method = "bonferroni") %>%
      add_significance() %>%   
      add_xy_position(x = "strains", dodge = 0.8)
    
    #data for mean and sd
    df.summary <- ggdat %>%  group_by(strains, medium) %>%
      summarise(sd = sd(value),    value = mean(value)) # generate mean and sd for col plot
    #> `summarise()` has grouped output by 'strains'. You can override using the
    #> `.groups` argument.
    
    ggdat$strains <- factor(ggdat$strains, levels = c("A","B", "C", "D"))
    df.summary$strains <- factor(df.summary$strains, levels = c("A","B", "C", "D"))
    df.summary <- ungroup(df.summary)
    
    #graph eror when I add p value
    ggplot(ggdat) +
      facet_grid(. ~ medium) + 
      geom_col(data = df.summary, aes(strains, value, fill=strains),
               position = position_dodge(0.8), width = 0.7, alpha=1) +
       geom_errorbar(data = df.summary, aes(x = strains,
                                            y = value,
                                            ymin = value-sd, ymax = value+sd), 
                     color="#282828",width = 0.4, position = position_dodge(0.8)) + 
      geom_point(aes(strains, value, fill=strains),
                 size=2, position = position_jitterdodge(jitter.width = .5, dodge.width = .8),
                 cex = 3, alpha=.8,  color="#282828", shape=21, stroke = .75) +
      # add_pvalue(stat.test, label = "p.adj") +  
      stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0) +
      theme(legend.text = element_text(size = 12), legend.title = element_text(size = 12),
            legend.position = "top",
            axis.title.y = element_text(face = "plain", size = 12, color = "black", margin = margin(0,0,0,0,"cm")),
            axis.title.x = element_text(face = "plain", size = 12, color = "black", margin = margin(0,0,0,0,"cm")),
            axis.text.x = element_text(face = "plain", size = 12, color = "black", angle = 35, vjust = 1, hjust = 1, margin = margin(0,0,0.2,0,"cm")),#
            axis.text.y = element_text(face = "plain", size = 12, color = "black", angle = 0, margin = margin(0,0,0,0.2,"cm"))) +
      scale_y_continuous(limits = c(0,3), breaks = c(0, 0.5,1, 1.5, 2, 2.5))+
      scale_color_manual(name= "Strains",values = c("#E6A226", "#BABBBB", "#21BDC2", "#7AD1ED"),labels=c("A","B", "C", "D")) +
      scale_fill_manual(name= "Strains",values = c("#E6A226", "#BABBBB", "#21BDC2", "#7AD1ED"), labels=c("A","B", "C", "D")) +
      guides(fill=guide_legend(nrow=1,byrow=TRUE),  color=guide_legend(nrow=1, byrow=TRUE))+
      theme_bw()
    #> Warning: Duplicated aesthetics after name standardisation: size
    

    Created on 2023-09-21 with reprex v2.0.2

    Is this your expected output? Does this solve your problem?