For this question, I'm using example data from another question: (Using combn() in R to find all possible t-test relationships, how to access the variables compared?)
I am trying to run t.tests on the data in one column (vdem_media_bias
) based on the grouping and averaging according to the like rows in column group
. To do this, I made a matrix (col_vec
) where all the possible combinations of rows in column group
are given in columns. Then I looped through the combination columns and tried to use the values in col_vec
to index trust_news
and run t.tests on the values in vdem_media_bias
.
library(tidyr)
library(dplyr)
library(purrr)
library(broom)
library(data.table)
trust_news <- data.table(group = c( 1, 2, 3, 5, 1, 2, 3, 5 ),
mean = c( 2.68, 2.8, 3.22, 2.96, 2.16739457271907, 2.52420459002153, 2.41568263469884, 3.203042892 ),
polity2 = c( 11.0065624592239, 11.5173963819866, 10.9690935682988, 7.4505271137667, 12.5039982916238, 10.2183026540883, 11.5528854439886, 8.951439457 ),
web = c( 87.2661, 94.8967, 89.7391, 74.3872, 99.429653354408, 86.651554228033, 88.7724076776387, 82.71520214 ),
rsf = c( 16.4114055821914, 16.7094853023036, 29.5703741065912, 23.8887382488302, 15.6296493773739, 17.6154322886768, 28.7548757022008, 19.67113906 ),
civil_liberties = c( 0.0825935016192092, 0.775101009726579, 0.0505530810447113, 0.302537382248755, 0.4563913292031, 0.759215853597412, 0.172427084833363, 0.772689046 ),
freedom_of_expression = c( 0.828613177733605, 0.312013322438901, 0.788670591087342, 0.192519523553583, 0.599717513191781, 0.682591150447179, 0.0926750357710926, 0.777797913 ),
vdem_gov_censorship_effort = c( 0.138795469801477, 0.609063884646593, 0.726359178306941, 0.889397969637812, 0.935589374225135, 0.5665276966565, 0.939565934607974, 0.383976181 ),
vdem_self_censorship_effort = c( 0.396389956859171, 0.368909768892093, 0.795891881421097, 0.635097442795347, 0.818482469632694, 0.236225542529557, 0.615626792133531, 0.035815441 ),
vdem_freedom_of_expression = c( 0.270293421459824, 0.0390741902396886, 0.0430150919275922, 0.451150387704898, 0.477508000166212, 0.841653725881213, 0.885557800737999, 0.205165414 ),
ciri_freedom_of_speech_and_press = c( 0.316629196080121, 0.473252307510063, 0.782615827564335, 0.600531400120434, 0.710143072640082, 0.0105031020419695, 0.348075169050439, 0.282137779 ),
media_integrity = c( 0.763527559879246, 0.32401086386208, 0.72357993656577, 0.00400993472498878, 0.804790656378296, 0.752078255508056, 0.12450323855886, 0.784842369 ),
vdem_critical_press = c( 0.381934476222942, 0.34180259848916, 0.446445782446012, 0.101622162644166, 0.69273703605482, 0.674447773690032, 0.593981628888233, 0.942639773 ),
vdem_media_perspective = c( 0.0121195504624632, 0.55722585253704, 0.759939717080726, 0.422261093911245, 0.425138202329788, 0.0440513028866172, 0.128858393736017, 0.242947985 ),
vdem_media_bias = c( 0.0386091435387627, 0.149227444223962, 0.109819706451143, 0.79759885890908, 0.421084356961021, 0.269048253813396, 0.661346417250035, 0.051339441 ),
vdem_media_corruption = c( 0.188072265767419, 0.70446056263869, 0.988731124980027, 0.659578974943737, 0.924096790161402, 0.465803086735264, 0.0389306657957701, 0.865528541 ),
vdem_media_freedom = c( 0.710150348587615, 0.253187878925348, 0.436362652570953, 0.471900738683268, 0.302672853555399, 0.88559075326591, 0.786474651280836, 0.234080239 )
)
#Get the unique names of the rows in a column
condition_vec = unique(trust_news$group)
#get combinations of rows in the 'group' column
col_vec <- names(combn(condition_vec, 2, FUN = paste))
tab_ttest_list = list()
for(c in col_vec) {
tmp_ttest <- t.test(trust_news[group == combn(condition_vec, 2)[1,c], vdem_media_bias],
trust_news[group == combn(condition_vec, 2)[2,c], vdem_media_bias])
res_tab_1 = data.table(condition1=combn(condition_vec, 2)[1,c],
condition2=combn(condition_vec, 2)[2,c],
t_statistic=tmp_ttest$statistic,
df=tmp_ttest$parameter,
p_value=tmp_ttest$p.value,
mean_cond1=tmp_ttest$estimate[1],
mean_cond2=tmp_ttest$estimate[2],
conf_low=tmp_ttest$conf.low,
conf_high=tmp_ttest$conf.high,
method=tmp_ttest$method)
res_tab = rbind(res_tab_1)
tab_ttest_list[[c]] = res_tab
print(c)
}
ttest_compList = rbindlist(tab_ttest_list)
Here's the thing: When I run this and let it sit and think for a while, I get no error. It seems like it has a problem processing it. In R-studio, if I run col_vec
or ttest_compList
, it comes up completely empty.
Which programming god did I upset to make this happen and how do I appease it? Thank you kindly.
**UPDATE: ** When I run one t.test using the code below:
#Get the unique names of the rows in a column
condition_vec = unique(trust_news$group)
#get list of combinations of rows in the 'rows' column
col_vec <- combn(condition_vec, 2, FUN = paste)
tmp_ttest <- t.test(trust_news[group == combn(condition_vec, 2)[1,1], vdem_media_bias],
trust_news[group == combn(condition_vec, 2)[2,1], vdem_media_bias])
res_tab_1 = data.table(condition1=combn(condition_vec, 2)[1,1],
condition2=combn(condition_vec, 2)[2,1],
t_statistic=tmp_ttest$statistic,
df=tmp_ttest$parameter,
p_value=tmp_ttest$p.value,
mean_cond1=tmp_ttest$estimate[1],
mean_cond2=tmp_ttest$estimate[2],
conf_low=tmp_ttest$conf.low,
conf_high=tmp_ttest$conf.high,
method=tmp_ttest$method)
I get results that look like this when I run res_tab_1
:
UPDATE
Adding as.numeric()
to col_vec
seems to fix the no 'dimnames' attribute for array
issue, so it is no longer character, but there is another issue. I only get t.tests for some of my comparisons, as shown below.
col_vec <- as.numeric(combn(condition_vec, 2, FUN = paste))
condition1 condition2 t_statistic df p_value mean_cond1 mean_cond2
1: 1 2 0.1033366 1.194413 0.9322750 0.2298468 0.2091378
2: 1 3 -0.4640742 1.781169 0.6930649 0.2298468 0.3855831
3: 1 5 -0.4641799 1.491450 0.7012347 0.2298468 0.4244691
4: 2 5 -0.5697971 1.051526 0.6664883 0.2091378 0.4244691
method
1: Welch Two Sample t-test
2: Welch Two Sample t-test
3: Welch Two Sample t-test
4: Welch Two Sample t-test
col_vec
looks like this, so there should be 6 comparisons:
> col_vec
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "1" "1" "1" "2" "2" "3"
[2,] "2" "3" "5" "3" "5" "5"
UPDATE I think I found something that works! But please let me know if there is something better.
#Get the unique names of the rows in a column
condition_vec = as.numeric(unique(trust_news$group))
#get list of combinations of rows in the 'rows' column
col_vec <- combn(condition_vec, 2, FUN = paste)
tab_ttest_list = list()
for(c in 1:ncol(col_vec)) {
tmp_ttest <- t.test(trust_news[group == combn(condition_vec, 2)[1,c], vdem_media_bias],
trust_news[group == combn(condition_vec, 2)[2,c], vdem_media_bias])
res_tab_1 = data.table(condition1=combn(condition_vec, 2)[1,c],
condition2=combn(condition_vec, 2)[2,c],
t_statistic=tmp_ttest$statistic,
df=tmp_ttest$parameter,
p_value=tmp_ttest$p.value,
mean_cond1=tmp_ttest$estimate[1],
mean_cond2=tmp_ttest$estimate[2],
conf_low=tmp_ttest$conf.low,
conf_high=tmp_ttest$conf.high,
method=tmp_ttest$method)
res_tab = rbind(res_tab_1)
tab_ttest_list[[c]] = res_tab
print(c)
}
ttest_compList = rbindlist(tab_ttest_list)
print(ttest_compList)
This gives me:
condition1 condition2 t_statistic df p_value mean_cond1 mean_cond2
1: 1 2 0.10333665 1.194413 0.9322750 0.2298468 0.2091378
2: 1 3 -0.46407420 1.781169 0.6930649 0.2298468 0.3855831
3: 1 5 -0.46417992 1.491450 0.7012347 0.2298468 0.4244691
4: 2 3 -0.62525727 1.094188 0.6368321 0.2091378 0.3855831
5: 2 5 -0.56979705 1.051526 0.6664883 0.2091378 0.4244691
6: 3 5 -0.08381105 1.841388 0.9414162 0.3855831 0.4244691
method
1: Welch Two Sample t-test
2: Welch Two Sample t-test
3: Welch Two Sample t-test
4: Welch Two Sample t-test
5: Welch Two Sample t-test
6: Welch Two Sample t-test
Here is a base R that you can generalize and play with:
multiple_t_test <- function(form, data){
fn <- function(x, g){
f <- function(cmbs){
t_object <- do.call(t.test, unname(split(x, g %in% cmbs)))
nms <- paste0(cmbs, collapse = ":")
cbind(grp = nms, broom::tidy(t_object))}
levs <- levels(as.factor(g))
do.call(rbind, combn(levs, 2, f, simplify = FALSE))
}
model <- model.frame(form[c(1,3, 2)], data)
grp_fct <- model.response(model)
grp_fct_name <- deparse1(form[[3]])
Xnames <- attr(terms(model), "term.labels")
res <- Map(fn, data[Xnames], MoreArgs = list(g = grp_fct))
res <- do.call(rbind, res)
data.frame(group = sub("[.0-9]+$", "", rownames(res)),
res, row.names = NULL)
}
multiple_t_test(mean~group, trust_news)
# A tibble: 6 × 12
group grp estim…¹ estim…² estim…³ stati…⁴ p.value param…⁵ conf.…⁶ conf.…⁷ method
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 mean 1:2 0.407 2.95 2.54 1.75 0.135 5.50 -0.175 0.989 Welch…
2 mean 1:3 0.251 2.87 2.62 0.941 0.389 5.07 -0.432 0.934 Welch…
3 mean 1:5 -0.0126 2.74 2.75 -0.0442 0.966 5.74 -0.719 0.694 Welch…
4 mean 2:3 0.0126 2.75 2.74 0.0442 0.966 5.74 -0.694 0.719 Welch…
5 mean 2:5 -0.251 2.62 2.87 -0.941 0.389 5.07 -0.934 0.432 Welch…
6 mean 3:5 -0.407 2.54 2.95 -1.75 0.135 5.50 -0.989 0.175 Welch
You can then use it as:
multiple_t_test(mean+news~group, trust_news)# for only mean and news
multiple_t_test(.~group, trust_news) # for everything else
Note that the grouping factor is just 1 variable. The notion of having 2 does not work. but it can be optimized to use two