I have a data frame, as shown below:
> dput(filtered_lymph)
structure(list(cluster = c("CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells",
"CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD4+ Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "CD8+ Effector Tcells",
"CD8+ Effector Tcells", "CD8+ Effector Tcells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells", "NK Cells",
"NK Cells", "Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells",
"Progenitor Tcells", "Progenitor Tcells", "Progenitor Tcells"
), condition = c("B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4",
"KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5",
"B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1",
"KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2",
"B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3",
"KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2",
"B16_NTX_3", "B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3",
"KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3",
"B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5",
"B16_AC484_1", "B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1",
"KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6",
"B16_NTX_1", "B16_NTX_2", "B16_NTX_3", "B16_NTX_4", "KPC_NTX_1",
"KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4", "KPC_NTX_5", "B16_aPD1_1",
"B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4", "KPC_aPD1_1", "KPC_aPD1_2",
"KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1", "B16_AC484_2", "B16_AC484_3",
"B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2", "KPC_AC484_3", "KPC_AC484_4",
"KPC_AC484_5", "KPC_AC484_6", "B16_NTX_1", "B16_NTX_2", "B16_NTX_3",
"B16_NTX_4", "KPC_NTX_1", "KPC_NTX_2", "KPC_NTX_3", "KPC_NTX_4",
"KPC_NTX_5", "B16_aPD1_1", "B16_aPD1_2", "B16_aPD1_3", "B16_aPD1_4",
"KPC_aPD1_1", "KPC_aPD1_2", "KPC_aPD1_4", "KPC_aPD1_5", "B16_AC484_1",
"B16_AC484_2", "B16_AC484_3", "B16_AC484_4", "KPC_AC484_1", "KPC_AC484_2",
"KPC_AC484_3", "KPC_AC484_4", "KPC_AC484_5", "KPC_AC484_6"),
`total cells` = c(1.705589084, 1.414617716, 2.009911894,
0.857632933, 1.263689975, 1.606186794, 2.006018054, 1.449275362,
1.286863271, 4.41495778, 5.954825462, 4.0544723, 4.275011018,
1.063829787, 2.304330552, 0.997592019, 1.957585644, 0.386100386,
0.500294291, 0.855880729, 0.876460768, 1.538118591, 1.736208345,
1.583467525, 1.176075269, 2.027027027, 1.645692159, 14.51062713,
4.479622769, 12.61013216, 7.261292167, 3.959561921, 12.55205235,
6.218655968, 14.29512516, 3.699731903, 4.680337756, 3.661875428,
7.644692046, 6.63287792, 1.968085106, 5.522447358, 10.49191606,
3.996737357, 8.273579702, 11.65391407, 12.64494754, 6.385642738,
20.17387428, 20.61047326, 22.49060655, 17.37231183, 13.73873874,
15.32752501, 3.332458672, 6.635230717, 2.450440529, 3.687821612,
4.085930918, 5.770374777, 3.911735206, 5.928853755, 2.252010724,
6.899879373, 7.289527721, 4.178272981, 6.170118995, 6.64893617,
3.535955503, 6.329549364, 3.752039152, 6.784335356, 5.974102413,
7.482054114, 7.429048414, 10.90057958, 9.633155979, 11.54052603,
10.68548387, 14.99356499, 16.16650532, 5.510364734, 2.72819131,
3.716960352, 5.431675243, 4.886267902, 6.543723974, 6.519558676,
5.401844532, 23.21715818, 3.908323281, 3.867214237, 2.69266481,
2.732481269, 1.861702128, 7.151370679, 7.258341933, 4.159869494,
3.281853282, 3.943496174, 4.389839867, 2.796327212, 4.257690593,
5.516661999, 4.911433172, 6.686827957, 8.719433719, 6.808647951
), New_Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("NTX",
"Anti-PD-1", "AC484"), class = "factor"), Tissue_Expression = c("Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid", "Lymphoid",
"Lymphoid", "Lymphoid")), row.names = c(NA, -108L), class = c("tbl_df",
"tbl", "data.frame"))
> str(filtered_lymph)
tibble [108 × 5] (S3: tbl_df/tbl/data.frame)
$ cluster : chr [1:108] "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" "CD4+ Tcells" ...
$ condition : chr [1:108] "B16_NTX_1" "B16_NTX_2" "B16_NTX_3" "B16_NTX_4" ...
$ total cells : num [1:108] 1.706 1.415 2.01 0.858 1.264 ...
$ New_Condition : Factor w/ 3 levels "NTX","Anti-PD-1",..: 1 1 1 1 1 1 1 1 1 2 ...
$ Tissue_Expression: chr [1:108] "Lymphoid" "Lymphoid" "Lymphoid" "Lymphoid" ...
> summary(filtered_lymph)
cluster condition total cells
Length:108 Length:108 Min. : 0.3861
Class :character Class :character 1st Qu.: 2.6321
Mode :character Mode :character Median : 4.5800
Mean : 6.0848
3rd Qu.: 7.2591
Max. :23.2172
New_Condition Tissue_Expression
NTX :36 Length:108
Anti-PD-1:32 Class :character
AC484 :40 Mode :character
I am trying to find a way of conducting a two tailed unpaired t-test between the control and test 'New_Condition' variables - this would be between 'NTX' and Anti-PD-1' and another between 'NTX' and 'AC484'. I want this to be calculated for each of the different cell clusters ie. 'CD4+ Tcells', 'Progenitor Tcells' etc.
I then want to be able to plot the p-values above the respective pairs of boxplots on my ggplot - the code for which is detailed below:
p_lymph <- ggplot(filtered_lymph, aes(x = cluster, y = `total cells`, fill = New_Condition, color = New_Condition)) +
geom_boxplot(position = position_dodge(width = 0.8), size = 0.9,linetype="solid", outlier.shape=NA) +
scale_fill_manual(values=fill_colors) + #These two lines I am changing the legend label from New_Condition (as it is named in the original data column) to Treatment as stated on the original paper
scale_color_manual(name="Condition", values = outline_colors) +
theme_minimal()+
guides(fill="none",color="none")+ #removing the legend
labs(y="Total cells (%)",
x=NULL)+
scale_x_discrete(labels=x_lym_labels)+
theme(
axis.line=element_line(color="black",linewidth=0.5,linetype="solid"), #adding axis lines
axis.text.x=element_blank(),
axis.ticks.x=element_line(colour="black",linewidth=0.5),#manually adding in tick breaks
axis.ticks.y=element_line(colour="black", linewidth=0.5),
panel.grid=element_blank() #removing grid lines
)+
scale_y_continuous(breaks=seq(0,25, by=5),
limits=c(0,25), #Adjusting the y-axis breaks
labels=c("0","5","10","15","20","25") #customising the y-axis labels
)
I have previously tried the t.test function:
t.test(filtered_lymph$'total cells',filtered_lymph$cluster[filtered_lymph$New_Condition])$p.value
however got this error
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") :
missing value where TRUE/FALSE needed
In addition: Warning messages:
1: In mean.default(y) : argument is not numeric or logical: returning NA
2: In var(y) : NAs introduced by coercion
Any help would be greatly appreciated!
The answer, as hinted in the comments is to use something like geom_signif
from ggsignif
, but you would have to change the structure of your plot to get this to work:
library(ggplot2)
library(ggsignif)
ggplot(filtered_lymph, aes(New_Condition, `total cells`,
color = New_Condition, fill = after_scale(alpha(color, 0.5)))) +
geom_boxplot() +
geom_signif(comparisons = list(c('NTX', 'Anti-PD-1'),
c('NTX', 'AC484')),
step_increase = 0.1, color = 'black',
map_signif_level = scales::pvalue_format(add_p = TRUE)) +
scale_x_discrete('Cluster', expand = c(0.25, 0.5)) +
scale_color_manual("Condition", values = c("#4d4b4c", "#4c517b", "#c63a41")) +
facet_grid(.~cluster, switch = 'x') +
theme_minimal() +
theme(panel.spacing.x = unit(0, 'mm'),
axis.text.x = element_blank(),
axis.line = element_line(),
strip.placement = 'outside')