I am trying to get a data frame of t.test outputs grouped by species for many variables. I have a subset of data that looks like this;
structure(list(Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), Experiment = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L), levels = c("standard 1",
"standard 2"), class = "factor"), Alpha.peak.1b = c(-78.35, -78.34,
-78.84, -78.85, -78.01, -78.17, -79.17, -79.01, -79.01, -79.18,
-79.18, -79.72, -77.85, -77.86, -77.69, -78.01, -77.86), Alpha.recrystalization.peak = c(-70.34,
-78.34, -70.83, -70.67, -70.67, -71.17, -71, -71.17, -70.67,
-70.84, -71.01, -70.67, -70.67, -70.68, -70.84, -70.67, -70.68
), Beta.peak.1a = c(-39.37, -39.63, -39.85, -39.53, -39.69, -39.85,
-40.52, -40.52, -40.69, -40.86, -40.69, -40.68, -38.87, -38.56,
-38.72, -38.86, -38.39)), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame"))
I am trying the following;
df.ttest<-df %>%
group_by(Species) %>%
group_modify(~ broom::tidy(df[,3:ncol(df)], 2, function(x) t.test(x ~ Experiment, data = df)))
On my subset of data i get the following error;
Error: unexpected ',' in "ction(x) t.test(x ~ Experiment, data = df)))
However, on my full data, I get an output that looks like this;
structure(list(Species = structure(c(1L, 1L, 1L), levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), column = c("Alpha.peak.1b", "Alpha.recrystalization.peak",
"Beta.peak.1a"), n = c(Alpha.peak.1b = 47, Alpha.recrystalization.peak = 47,
Beta.peak.1a = 47), mean = c(Alpha.peak.1b = -78.5808510638298,
Alpha.recrystalization.peak = -71.0140425531915, Beta.peak.1a = -39.4
), sd = c(Alpha.peak.1b = 0.577779970860107, Alpha.recrystalization.peak = 1.18348905612851,
Beta.peak.1a = 0.869930032169005), median = c(Alpha.peak.1b = -78.68,
Alpha.recrystalization.peak = -70.84, Beta.peak.1a = -39.52),
trimmed = c(Alpha.peak.1b = -78.5807692307692, Alpha.recrystalization.peak = -70.8828205128205,
Beta.peak.1a = -39.4182051282051), mad = c(Alpha.peak.1b = 0.490000000000009,
Alpha.recrystalization.peak = 0.170000000000002, Beta.peak.1a = 0.669999999999995
), min = c(Alpha.peak.1b = -79.72, Alpha.recrystalization.peak = -78.34,
Beta.peak.1a = -40.86), max = c(Alpha.peak.1b = -77.52, Alpha.recrystalization.peak = -69.51,
Beta.peak.1a = -37.69), range = c(Alpha.peak.1b = 2.2, Alpha.recrystalization.peak = 8.83,
Beta.peak.1a = 3.17), skew = c(Alpha.peak.1b = 0.027222651634338,
Alpha.recrystalization.peak = -5.13007598432232, Beta.peak.1a = 0.112391518548281
), kurtosis = c(Alpha.peak.1b = 2.00457929376749, Alpha.recrystalization.peak = 32.7138551153747,
Beta.peak.1a = 1.9708376743257), se = c(Alpha.peak.1b = 0.0842778705371632,
Alpha.recrystalization.peak = 0.172629621110036, Beta.peak.1a = 0.126892336746095
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), groups = structure(list(Species = structure(1L, levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), .rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
EDIT for better expected outcome:
I am expecting an output that has the t.test
output but also columns for species and variables, respectively. For example;
structure(list(Species = "Species x", Variable = "Variable x",
estimate = 0.10705965418003, estimate1 = 0.460507391694923,
estimate2 = 0.353447737514893, statistic = c(t = 1.5638634504756),
p.value = 0.198127496637623, parameter = c(df = 3.72179497232624),
conf.low = -0.0887480304981188, conf.high = 0.30286733885818,
method = "Welch Two Sample t-test", alternative = "two.sided"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
Any help is greatly appreciated!
Here is another approach using dplyr::rowwise()
with nest_by()
and expand_grid()
. I like having a final data.frame
which contains inputs and output and we can verify each piece (the formula form
, the data
and the variables vars
).
library(tidyverse)
dv <- df %>%
select(-c(Species, Experiment)) %>%
colnames()
df |>
nest_by(Species) %>%
expand_grid(vars = dv) %>%
rowwise() %>%
mutate(form = list(reformulate("Experiment", vars)),
res = list(t.test(form, data = data) %>%
broom::tidy())
) %>%
unnest(res)
#> # A tibble: 9 x 14
#> Species data vars form estim~1 estim~2 estim~3 stati~4 p.value
#> <fct> <list<> <chr> <list> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Apetahia long~ [6 x 4] Alph~ <formula> -0.167 -78.5 -78.3 -0.545 0.619
#> 2 Apetahia long~ [6 x 4] Alph~ <formula> -2.33 -73.2 -70.8 -0.899 0.463
#> 3 Apetahia long~ [6 x 4] Beta~ <formula> 0.0733 -39.6 -39.7 0.440 0.686
#> 4 Clermontia fa~ [6 x 4] Alph~ <formula> 0.297 -79.1 -79.4 1.58 0.236
#> 5 Clermontia fa~ [6 x 4] Alph~ <formula> -0.107 -70.9 -70.8 -0.604 0.583
#> 6 Clermontia fa~ [6 x 4] Beta~ <formula> 0.167 -40.6 -40.7 2.05 0.110
#> 7 Cyanea fissa [5 x 4] Alph~ <formula> 0.135 -77.8 -77.9 1.45 0.280
#> 8 Cyanea fissa [5 x 4] Alph~ <formula> -0.0550 -70.7 -70.7 -0.995 0.423
#> 9 Cyanea fissa [5 x 4] Beta~ <formula> -0.0917 -38.7 -38.6 -0.365 0.766
#> # ... with 5 more variables: parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
#> # method <chr>, alternative <chr>, and abbreviated variable names
#> # 1: estimate, 2: estimate1, 3: estimate2, 4: statistic
Data from OP
df <- structure(list(Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), Experiment = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L), levels = c("standard 1",
"standard 2"), class = "factor"), Alpha.peak.1b = c(-78.35, -78.34,
-78.84, -78.85, -78.01, -78.17, -79.17, -79.01, -79.01, -79.18,
-79.18, -79.72, -77.85, -77.86, -77.69, -78.01, -77.86), Alpha.recrystalization.peak = c(-70.34,
-78.34, -70.83, -70.67, -70.67, -71.17, -71, -71.17, -70.67,
-70.84, -71.01, -70.67, -70.67, -70.68, -70.84, -70.67, -70.68
), Beta.peak.1a = c(-39.37, -39.63, -39.85, -39.53, -39.69, -39.85,
-40.52, -40.52, -40.69, -40.86, -40.69, -40.68, -38.87, -38.56,
-38.72, -38.86, -38.39)), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame"))
Created on 2023-03-06 by the reprex package (v2.0.1)