rrstatix

Error running two way mixed model ANOVA in loop but not when column of data specifically identified


This is the loop that is generating the error.

Error: ! Can't subset columns with df_numeric[[column]]. ✖ Can't convert from df_numeric[[column]] to due to loss of precision.

df_numeric <- df[, sapply(df, is.numeric)]

for (column in names(df_numeric)) {
  res.aov <- anova_test(data = df, dv= df_numeric[[column]], wid = `Subject`, within = `Timepoint`, between = `Genotype`)
  get_anova_table(res.aov)
}  

But when I pull out the code for the anova and specifically input the column from my dataframe it generates the proper anova table results.

res.aov <- anova_test(data = df, dv=  `Tregs CD127lo CD25+`, wid = `Subject`, within = `Timepoint`, between = `Genotype`)
get_anova_table(res.aov)

I have tried using df_numeric$column.

Dataframe

library(rstatix)

 dput(df_numeric)
structure(list(`Tregs CD127lo CD25+` = c(2702, 2175, 2651, 1672.8, 
3762, 4264, 1975, 3208, 3285, 3457, 3383, 2619.9, 11872, 16101, 
13443, 3935, 1894, 2297, 7385, 8901, 9522, 7100, 8789, 9309, 
371, 379, 514), `Monocytes % of Live by Size` = c(1.38, 2.66, 
4.74, 5.83, 3.9, 5.06, 6.36, 3.45, 2.64, 6.33, 10.7, 9.41, 3.42, 
3.46, 2.73, 2.38, 3.12, 4.44, 5.31, 3.59, 4.91, 1.53, 6.54, 4.85, 
6.87, 3.66, 5.07), `NK cells` = c(90.62, 153.6, 159.8, 88, 118, 
159, 74, 82, 64, 30, 344, 73, 29, 198, 79, 145, 258, 307, 30, 
74.4, 0, 47.3, 32, 0, 52.6, 95.3, 51.7)), row.names = c(NA, -27L
), class = c("tbl_df", "tbl", "data.frame"))


> dput(df)
structure(list(Subject = c("ASCVD002", "ASCVD002", "ASCVD002", 
"ASCVD003", "ASCVD003", "ASCVD003", "ASCVD004", "ASCVD004", "ASCVD004", 
"ASCVD005", "ASCVD005", "ASCVD005", "ASCVD006", "ASCVD006", "ASCVD006", 
"ASCVD008", "ASCVD008", "ASCVD008", "ASCVD009", "ASCVD009", "ASCVD009", 
"ASCVD010", "ASCVD010", "ASCVD010", "ASCVD011", "ASCVD011", "ASCVD011"
), Timepoint = c("0", "0.25", "0.5", "0", "0.25", "0.5", "0", 
"0.25", "0.5", "0", "0.25", "0.5", "0", "0.25", "0.5", "0", "0.25", 
"0.5", "0", "0.25", "0.5", "0", "0.25", "0.5", "0", "0.25", "0.5"
), Genotype = c("Heterozygote", "Heterozygote", "Heterozygote", 
"Heterozygote", "Heterozygote", "Heterozygote", "Heterozygote", 
"Heterozygote", "Heterozygote", "GG", "GG", "GG", "AA", "AA", 
"AA", "GG", "GG", "GG", "AA", "AA", "AA", "AA", "AA", "AA", "GG", 
"GG", "GG"), `Tregs CD127lo CD25+` = c(2702, 2175, 2651, 1672.8, 
3762, 4264, 1975, 3208, 3285, 3457, 3383, 2619.9, 11872, 16101, 
13443, 3935, 1894, 2297, 7385, 8901, 9522, 7100, 8789, 9309, 
371, 379, 514), `Monocytes % of Live by Size` = c(1.38, 2.66, 
4.74, 5.83, 3.9, 5.06, 6.36, 3.45, 2.64, 6.33, 10.7, 9.41, 3.42, 
3.46, 2.73, 2.38, 3.12, 4.44, 5.31, 3.59, 4.91, 1.53, 6.54, 4.85, 
6.87, 3.66, 5.07), `NK cells` = c(90.62, 153.6, 159.8, 88, 118, 
159, 74, 82, 64, 30, 344, 73, 29, 198, 79, 145, 258, 307, 30, 
74.4, 0, 47.3, 32, 0, 52.6, 95.3, 51.7)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -27L))

Solution

  • Thank you for providing the dput and code - you are using the df dataset, so you don't really need the df_numeric dataset. To get the names of all your numeric columns, you can use the following code: names(df)[unlist(lapply(df, is.numeric))]. Also you assigned a vector of values to the command dv - it should only be the name of the column.

    The below should work for you, it did for me:

    for (column in names(df)[unlist(lapply(df, is.numeric))]) {
      res.aov <- rstatix::anova_test(data = df, dv = column, 
                                     wid = `Subject`, within = `Timepoint`, between = `Genotype`)
      rstatix::get_anova_table(res.aov)
    }  
    

    Note that in your loop, you are overwriting res.aov with each iteration and you are not storing the results from get_anova_table(res.aov) - I would suggest storing these data in a list:

    nnames <- names(df)[unlist(lapply(df, is.numeric))]
    res.aov <- list()
    aov_tab <- list()
    for (column in nnames) {
      res.aov[[column]] <- rstatix::anova_test(data = df, dv = column, 
                                     wid = `Subject`, within = `Timepoint`, between = `Genotype`)
      aov_tab[[column]] <- rstatix::get_anova_table(res.aov[[column]])
    }