I am trying to make some analysis in R. I need to perform a non-paired Wilcox test of my dataframe. Fist of all I made a script for the analysis:
wilcox.test(df$Obs1[df$Var1 == "1" & df$Var2 == "1"],
df$Obs1[df$Var1 == "1" & df$Var2 == "2"])$p.value
However, I wanted to perform a for loop of this analysis by changing the column Obs1 to Obs2 and then to Obs3 but no idea how to start.
Sample Var1 Var2 Obs1 Obs2 Obs3
A 1 2 3.12 4.13 5.35
A 2 2 6.54 2.67 1.11
B 1 3 4.14 1.96 8.98
B 2 3 5.47 9.42 6.72
C 1 1 8.56 4.95 2.62
C 2 1 4.92 3.21 3.97
D 1 1 3.36 9.99 2.62
D 2 1 8.57 1.25 6.74
etc.
dput(df)
structure(list(Sample = c(A, A, B, B, C, C, D, D, E, E, F, F, G, G, H, H, I, I), Var1 = c("1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2"), Var2 = c("2", "2", "3", "3", "1", "1", "1", "1", "2", "2", "2", "2", "1", "1", "3", "3", "3", "3"), Obs1 = c(3.12, 6.54, 4.14, 5.40, 8.56, 4.92, 3.36, 8.57, 7.56, 7.89. 2.34, 5.43, 6.21, 7.43, 5.83, 9.28, 10.30, 3.42), Obs2 = c(4.13, 2.67, 1.96, 9.42, 4.95, 3.21, 9.99, 1.25, 9.78, 9.54, 7.23, 6.24, 1.23, 2.35, 4.73, 4.12, 5.30, 0.12), Obs3 = c(5.35, 1.11, 8.98, 6.72, 2.62, 3.97, 2.62, 6.74, 9.41, 5.37, 7.43, 9.62, 4.69, 4.27, 2.74, 3.53, 5.87, 7.98), Obs4 = c(8.52, 9.59, 3.45, 9.40, 8.77, 4.26, 9.78, 5.55, 2.31, 5.12. 2.35, 4.33, 7.61, 5.37, 6.84, 9.98, 8.65, 0.43), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"))
Could someone help me with the loop? Thanks in advance.
use tidyverse
library(tidyverse)
df <- structure(
list(Sample = c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E", "F", "F", "G", "G", "H", "H", "I", "I"),
Var1 = c("1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2", "1", "2"),
Var2 = c("2", "2", "3", "3", "1", "1", "1", "1", "2", "2", "2", "2", "1", "1", "3", "3", "3", "3"),
Obs1 = c(3.12, 6.54, 4.14, 5.40, 8.56, 4.92, 3.36, 8.57, 7.56, 7.89, 2.34, 5.43, 6.21, 7.43, 5.83, 9.28, 10.30, 3.42),
Obs2 = c(4.13, 2.67, 1.96, 9.42, 4.95, 3.21, 9.99, 1.25, 9.78, 9.54, 7.23, 6.24, 1.23, 2.35, 4.73, 4.12, 5.30, 0.12),
Obs3 = c(5.35, 1.11, 8.98, 6.72, 2.62, 3.97, 2.62, 6.74, 9.41, 5.37, 7.43, 9.62, 4.69, 4.27, 2.74, 3.53, 5.87, 7.98),
Obs4 = c(8.52, 9.59, 3.45, 9.40, 8.77, 4.26, 9.78, 5.55, 2.31, 5.12, 2.35, 4.33, 7.61, 5.37, 6.84, 9.98, 8.65, 0.43)))
df <- as_tibble(df)
WT_data <- df %>%
mutate(Var12 = paste0(Var1, Var2)) %>%
select(Var12, starts_with("Obs")) %>%
filter(Var12 == "11" | Var12 == "12")
map_df(WT_data[-1], ~ wilcox.test(.x ~ Var12, data = WT_data)$p.value)
#> Warning in wilcox.test.default(x = c(2.62, 2.62, 4.69), y = c(5.35, 9.41, :
#> cannot compute exact p-value with ties
#> # A tibble: 1 x 4
#> Obs1 Obs2 Obs3 Obs4
#> <dbl> <dbl> <dbl> <dbl>
#> 1 0.4 1 0.0765 0.2
Created on 2020-12-01 by the reprex package (v0.3.0)