rggplot2ggpubr

ggbarplot mean standard error bars not aligning


I am trying to plot the percentage of three cellTypes (T cells CD4, T cells CD8 & Tregs) that are assigned to A vs B, averaged across the ROI_name column and with mean_se bars, however the plotted bars are not always aligning with the error bars.

Relevant code:

neighb$ab = ifelse(neighb$ROI_name %in% tls_pos, "A", "B")

imm_perc = neighb %>% 
  select(cellType, treatment, ab, ROI_name) %>% 
  group_by(ROI_name, treatment) %>% 
  mutate(total_count = n()) %>% 
  ungroup() %>% 
  group_by(ROI_name, treatment, ab, cellType) %>% 
  filter(cellType %in% c("T cells CD4", "T cells CD8", "T reg cells")) %>% 
  mutate(imm_count = n()) %>% 
  ungroup() %>% 
  group_by(ROI_name, treatment, ab, cellType) %>% 
  summarise(total_count = unique(total_count),
            imm_count = unique(imm_count),
            imm_perc = (imm_count/total_count)*100)

imm_perc_v = filter(imm_perc, treatment == "Vehicle")
imm_perc_m = filter(imm_perc, treatment == "MRTX")

p = ggbarplot(imm_perc_v, x = "ab", y = "imm_perc", fill = "ab",
              add = "mean_se", width = 0.9) +
  theme_classic()+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(size = 22),
        axis.text = element_text(size = 20),
        legend.text = element_text(size = 20),
        legend.title = element_blank(),
        strip.text = element_text(size = 18)) + 
  facet_wrap(cellType~.) +
  ylab("Immune cell percentage") 
p

data:

structure(list(ROI_name = c("HypM_602121-2_ROI_001_1_Vehicle", 
"HypM_602121-2_ROI_001_1_Vehicle", "HypM_602121-2_ROI_001_1_Vehicle", 
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-2_ROI_002_2_Vehicle", 
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle", 
"HypM_602121-3_ROI_001_1_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle", 
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-3_ROI_002_2_Vehicle", 
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle", 
"HypM_602121-4_ROI_001_1_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle", 
"HypM_602121-4_ROI_002_2_Vehicle", "HypM_602121-4_ROI_002_2_Vehicle", 
"HypM_602121-4_ROI_002_2_Vehicle", "HypM604121_13_ROI_001_1_Vehicle", 
"HypM604121_13_ROI_001_1_Vehicle", "HypM604121_13_ROI_001_1_Vehicle", 
"HypM604121_13_ROI_002_2_Vehicle", "HypM604121_13_ROI_002_2_Vehicle", 
"HypM604121_13_ROI_002_2_Vehicle"), treatment = c("Vehicle", 
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", 
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", 
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", 
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle"), ab = c("A", 
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "A", "A", "A", "B", "B", "B", "B", "B", "B"), cellType = c("T cells CD4", 
"T cells CD8", "T reg cells", "T cells CD4", "T cells CD8", "T reg cells", 
"T cells CD4", "T cells CD8", "T reg cells", "T cells CD4", "T cells CD8", 
"T reg cells", "T cells CD4", "T cells CD8", "T reg cells", "T cells CD4", 
"T cells CD8", "T reg cells", "T cells CD4", "T cells CD8", "T reg cells", 
"T cells CD4", "T cells CD8", "T reg cells"), total_count = c(9991L, 
9991L, 9991L, 9693L, 9693L, 9693L, 6250L, 6250L, 6250L, 8933L, 
8933L, 8933L, 7110L, 7110L, 7110L, 8719L, 8719L, 8719L, 8602L, 
8602L, 8602L, 9358L, 9358L, 9358L), imm_count = c(124L, 121L, 
75L, 113L, 107L, 62L, 155L, 41L, 69L, 165L, 103L, 96L, 284L, 
257L, 153L, 450L, 242L, 216L, 178L, 151L, 128L, 153L, 166L, 84L
), imm_perc = c(1.24111700530477, 1.21108998098288, 0.750675608047243, 
1.16578974517693, 1.10388940472506, 0.639636851336016, 2.48, 
0.656, 1.104, 1.84708384641218, 1.15302809806336, 1.07466696518527, 
3.9943741209564, 3.61462728551336, 2.15189873417722, 5.16114233283633, 
2.77554765454754, 2.47734831976144, 2.06928621250872, 1.75540571960009, 
1.48802604045571, 1.63496473605471, 1.77388330839923, 0.897627698226117
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-24L), groups = structure(list(ROI_name = c("HypM_602121-2_ROI_001_1_Vehicle", 
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle", 
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle", 
"HypM_602121-4_ROI_002_2_Vehicle", "HypM604121_13_ROI_001_1_Vehicle", 
"HypM604121_13_ROI_002_2_Vehicle"), treatment = c("Vehicle", 
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", 
"Vehicle"), ab = c("A", "B", "B", "B", "B", "A", "B", "B"), .rows = structure(list(
    1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

Resulting plot

Is there something I can do to fix this, so that the bars and error bars overlap?


Solution

  • With an MRE I can see that facet_wrap is the culprit - I believe the problem here is, that this takes the mean of all abs as bar heights, but tries to add the mean_se "correctly" but because the bars have all the same hights it looks off.

    library(dplyr)
    library(ggpubr)
    
    set.seed(420)
    X <- data.frame(
      ab = sample(c("A","B"), 24, replace = TRUE),
      cellType = rep(c("T cells CD4", "T cells CD8", "T reg cells"), 8),
      imm_perc = sample(898:2800, 24)/1000
    )
    ggpubr::ggbarplot(X, x = "ab", y = "imm_perc", fill = "ab",
                  add = "mean_se", width = 0.9) +
      facet_wrap(~cellType)
    

    wrong

    But there is a facet.by parameter that works:

    ggpubr::ggbarplot(X, x = "ab", y = "imm_perc", fill = "ab",
                  add = "mean_se", width = 0.9, facet.by = "cellType")
    

    correct

    It works with your neighb just the same

    ggpubr::ggbarplot(neighb, x = "ab", y = "imm_perc", fill = "ab",
                  add = "mean_se", width = 0.9, facet.by = "cellType")
    

    res


    Jon Spring's Comment is very good advice, it not only makes your code cleaner, but you also can't forget to ungroup before another group_by.