I am trying to plot the percentage of three cellTypes (T cells CD4, T cells CD8 & Tregs) that are assigned to A vs B, averaged across the ROI_name column and with mean_se bars, however the plotted bars are not always aligning with the error bars.
Relevant code:
neighb$ab = ifelse(neighb$ROI_name %in% tls_pos, "A", "B")
imm_perc = neighb %>%
select(cellType, treatment, ab, ROI_name) %>%
group_by(ROI_name, treatment) %>%
mutate(total_count = n()) %>%
ungroup() %>%
group_by(ROI_name, treatment, ab, cellType) %>%
filter(cellType %in% c("T cells CD4", "T cells CD8", "T reg cells")) %>%
mutate(imm_count = n()) %>%
ungroup() %>%
group_by(ROI_name, treatment, ab, cellType) %>%
summarise(total_count = unique(total_count),
imm_count = unique(imm_count),
imm_perc = (imm_count/total_count)*100)
imm_perc_v = filter(imm_perc, treatment == "Vehicle")
imm_perc_m = filter(imm_perc, treatment == "MRTX")
p = ggbarplot(imm_perc_v, x = "ab", y = "imm_perc", fill = "ab",
add = "mean_se", width = 0.9) +
theme_classic()+
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size = 22),
axis.text = element_text(size = 20),
legend.text = element_text(size = 20),
legend.title = element_blank(),
strip.text = element_text(size = 18)) +
facet_wrap(cellType~.) +
ylab("Immune cell percentage")
p
data:
structure(list(ROI_name = c("HypM_602121-2_ROI_001_1_Vehicle",
"HypM_602121-2_ROI_001_1_Vehicle", "HypM_602121-2_ROI_001_1_Vehicle",
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-2_ROI_002_2_Vehicle",
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle",
"HypM_602121-3_ROI_001_1_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle",
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-3_ROI_002_2_Vehicle",
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle",
"HypM_602121-4_ROI_001_1_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle",
"HypM_602121-4_ROI_002_2_Vehicle", "HypM_602121-4_ROI_002_2_Vehicle",
"HypM_602121-4_ROI_002_2_Vehicle", "HypM604121_13_ROI_001_1_Vehicle",
"HypM604121_13_ROI_001_1_Vehicle", "HypM604121_13_ROI_001_1_Vehicle",
"HypM604121_13_ROI_002_2_Vehicle", "HypM604121_13_ROI_002_2_Vehicle",
"HypM604121_13_ROI_002_2_Vehicle"), treatment = c("Vehicle",
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle",
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle",
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle",
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle"), ab = c("A",
"A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "A", "A", "A", "B", "B", "B", "B", "B", "B"), cellType = c("T cells CD4",
"T cells CD8", "T reg cells", "T cells CD4", "T cells CD8", "T reg cells",
"T cells CD4", "T cells CD8", "T reg cells", "T cells CD4", "T cells CD8",
"T reg cells", "T cells CD4", "T cells CD8", "T reg cells", "T cells CD4",
"T cells CD8", "T reg cells", "T cells CD4", "T cells CD8", "T reg cells",
"T cells CD4", "T cells CD8", "T reg cells"), total_count = c(9991L,
9991L, 9991L, 9693L, 9693L, 9693L, 6250L, 6250L, 6250L, 8933L,
8933L, 8933L, 7110L, 7110L, 7110L, 8719L, 8719L, 8719L, 8602L,
8602L, 8602L, 9358L, 9358L, 9358L), imm_count = c(124L, 121L,
75L, 113L, 107L, 62L, 155L, 41L, 69L, 165L, 103L, 96L, 284L,
257L, 153L, 450L, 242L, 216L, 178L, 151L, 128L, 153L, 166L, 84L
), imm_perc = c(1.24111700530477, 1.21108998098288, 0.750675608047243,
1.16578974517693, 1.10388940472506, 0.639636851336016, 2.48,
0.656, 1.104, 1.84708384641218, 1.15302809806336, 1.07466696518527,
3.9943741209564, 3.61462728551336, 2.15189873417722, 5.16114233283633,
2.77554765454754, 2.47734831976144, 2.06928621250872, 1.75540571960009,
1.48802604045571, 1.63496473605471, 1.77388330839923, 0.897627698226117
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-24L), groups = structure(list(ROI_name = c("HypM_602121-2_ROI_001_1_Vehicle",
"HypM_602121-2_ROI_002_2_Vehicle", "HypM_602121-3_ROI_001_1_Vehicle",
"HypM_602121-3_ROI_002_2_Vehicle", "HypM_602121-4_ROI_001_1_Vehicle",
"HypM_602121-4_ROI_002_2_Vehicle", "HypM604121_13_ROI_001_1_Vehicle",
"HypM604121_13_ROI_002_2_Vehicle"), treatment = c("Vehicle",
"Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle", "Vehicle",
"Vehicle"), ab = c("A", "B", "B", "B", "B", "A", "B", "B"), .rows = structure(list(
1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))

Is there something I can do to fix this, so that the bars and error bars overlap?
With an MRE I can see that facet_wrap is the culprit - I believe the problem here is, that this takes the mean of all abs as bar heights, but tries to add the mean_se "correctly" but because the bars have all the same hights it looks off.
library(dplyr)
library(ggpubr)
set.seed(420)
X <- data.frame(
ab = sample(c("A","B"), 24, replace = TRUE),
cellType = rep(c("T cells CD4", "T cells CD8", "T reg cells"), 8),
imm_perc = sample(898:2800, 24)/1000
)
ggpubr::ggbarplot(X, x = "ab", y = "imm_perc", fill = "ab",
add = "mean_se", width = 0.9) +
facet_wrap(~cellType)
But there is a facet.by parameter that works:
ggpubr::ggbarplot(X, x = "ab", y = "imm_perc", fill = "ab",
add = "mean_se", width = 0.9, facet.by = "cellType")
It works with your neighb just the same
ggpubr::ggbarplot(neighb, x = "ab", y = "imm_perc", fill = "ab",
add = "mean_se", width = 0.9, facet.by = "cellType")
Jon Spring's Comment is very good advice, it not only makes your code cleaner, but you also can't forget to ungroup before another group_by.