given the dataset df
df<-structure(list(stream = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), levels = c("blendziava", "smeltaite", "sventoji"), class = "factor"),
season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L), levels = c("fall", "winter", "spring",
"summer"), class = c("ordered", "factor")), event = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("drought",
"flood"), class = "factor"), c = c(533, 596, 622, 590, 282,
313, 310, 293, 285, 292, 280, 262, 357, 360, 348, 337, 424,
464, 441, 427, 506, 703, 606, 579, 506, 605, 587, 526, 307,
366, 301, 330, 385, 346, 396, 210, 225, 218, 217, 228, 226,
270, 130, 136, 135, 143, 150, 153, 151, 222, 232, 234, 235,
238, 238, 240, 299, 296, 296, 297, 308, 305, 293, 360, 384,
382, 368, 367, 360, 362, 387, 417, 404, 389, 386, 382, 378,
386, 390, 397, 360, 190, 195, 191, 203, 175, 176, 173, 177,
235, 226, 220, 217, 316, 303, 291, 292, 366, 355, 343, 350,
378, 368, 341, 384), ph = c(8.31, 8.04, 8.28, 8.2, 8.35,
8.06, 8.02, 8.19, 8.14, 7.98, 7.91, 8.04, 7.91, 7.71, 7.82,
7.98, 8.3, 7.97, 8.17, 8.21, 8.05, 7.5, 7.86, 8.06, 7.86,
7.68, 7.88, 7.92, 9.34, 9.18, 9.34, 9.27, 9.16, 9.18, 8.97,
8.08, 8.05, 8, 8.03, 7, 8.07, 8.38, 7.97, 7.95, 7.97, 8.03,
8.05, 8.23, 8.65, 8.07, 8.17, 8.22, 8.21, 8.23, 8.27, 8.34,
8.3, 8.54, 8.53, 8.46, 8.49, 8.51, 8.59, 8.2, 8.18, 8.27,
8.3, 8.44, 8.46, 8.47, 8.04, 7.94, 8.15, 8.17, 8.29, 8.38,
8.39, 8.34, 8.33, 8.38, 8.26, 8.12, 8.09, 8.07, 8.05, 8.1,
8.1, 6.03, 8.01, 7.94, 7.94, 8, 8.16, 8.23, 8.08, 8.17, 8.16,
7.93, 7.87, 7.87, 7.7, 7.65, 7.71, 7.72, 7.81), t = c(12.9,
12.2, 12.1, 12.1, 8.2, 8.3, 8.2, 7.9, 2.5, 2.3, 2.3, 1.9,
4.7, 3.6, 3.3, 4, 11.5, 11.9, 10.5, 10.5, 18, 19, 17.6, 16,
17.3, 19.1, 17.9, 17, 10.7, 11.3, 11.3, 10.9, 11.3, 11.3,
11.2, 4.3, 5, 5.1, 5.2, 5.4, 5.4, 5.4, 1.6, 1.8, 1.9, 2.1,
2.1, 2.2, 2.4, 4.9, 4.7, 4.9, 4.2, 4.2, 4.3, 4.6, 10.5, 11.8,
11.5, 10.7, 12, 12.2, 11.3, 15.2, 17.5, 17.3, 16.1, 17.2,
16.8, 17.6, 17, 19.5, 19, 17.9, 18.6, 18.4, 19.4, 11.3, 11.1,
11.5, 11.5, 5.3, 5.3, 5.4, 5.6, 2.5, 2.3, 2.4, 2.4, 4.2,
3.6, 3.3, 3.5, 11, 10.6, 9.7, 9.8, 18, 18, 17, 16.6, 19.9,
20, 19, 19.6), do = c(9.2, 5.8, 7.8, 9.2, 7.9, 7.5, 8, 9.1,
11.7, 11.3, 11.7, 13.5, 13.3, 11.7, 13.2, 13.7, 14.9, 11.4,
13.8, 11.6, 7.9, 5, 5, 7.9, 8.2, 4.3, 7, 8.3, 9.9, 9.8, 10.5,
10.1, 10.8, 11, 10.6, 11.2, 11.5, 11.6, 11.4, 11.3, 11.9,
12.6, 12.4, 12.9, 12.5, 12.2, 12.3, 12.6, 14.7, 13.7, 14.9,
14.3, 14.3, 14.5, 14.5, 14.2, 14.2, 15.7, 15, 13.5, 13.4,
13.2, 13.9, 10.5, 8.6, 9.9, 10.6, 11.6, 10.6, 9.7, 10, 7.3,
9.7, 9.9, 10.6, 10.9, 10.9, 8.7, 9.8, 10.1, 9.8, 9.4, 9,
9.5, 9.8, 13, 12.5, 12.1, 12.7, 13.3, 14.4, 14.8, 15.5, 11.8,
12.1, 13, 14.4, 8, 8.8, 8.9, 7.3, 5.4, 8.4, 7.8, 6.1)), row.names = c(NA,
-105L), class = c("tbl_df", "tbl", "data.frame"))
with 4 numeric environmental variables, c,ph,t,do
and three factors stream, season, event
, I'm running different loops using this formula:
env_vars <- c("c","ph","t","do")
proc_model<-function(sub_df) {
sapply(env_vars, function(env) {
model <-tukey_hsd(reformulate("stream",env), data = sub_df)%>%
add_significance()
list(
mytukey_result = model
)
}, simplify=FALSE)
}
The model iterate the particular test on each numeric column, represented by the env_vars
vector.
Then, I run the function on my df
that I grouped in respect to the other two factors season
and event
with:
stat.test<-df%>%
group_by(season,event) %>%
proc_model()
stat.test
grouping everything in the stat_test
.
I do this procedure for different pipe friendly tests from rstatix::package
like anova_test()
, kruskal_test()
and their post hoc comparisons tukey_hsd()
and dunn_test()
, respectively.
The problem comes when I particularly do it for tukey_hsd
, giving me the error:
Error in UseMethod("tbl_vars") :
no applicable method for 'tbl_vars' applied to an object of class "NULL"
I suspect is because while the other tests are returning just a data.frame
, the tukey_hsd
returns a tibble data.frame
.
Is there a convenient method to export this output anyway, bypassing the above mentioned error?
Thanks in advance!
rstatix::tukey_hsd
is a wrapper function for stats::TukeyHSD
which according to the description needs "a fitted model object, usually an aov fit". Therefore, rstatix::tukey_hsd
works with lm()
and aov()
.
If you include lm() into your code, the error disappears and you get an output:
env_vars <- c("c","ph","t","do")
proc_model<-function(sub_df) {
sapply(env_vars, function(env) {
model <-tukey_hsd(lm(reformulate("stream",env), data = sub_df))%>%
add_significance()
list(
mytukey_result = model
)
}, simplify=FALSE)
}
However, in my opinion this approach requires a much more complex analysis including appropriate GLM or GLMM fits followed by meaningful comparisons.