I have the following dataset
dput(dat2)
structure(list(g = c("Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Women", "Women", "Women",
"Women", "Women", "Women", "Women", "Men", "Men", "Men", "Men",
"Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men",
"Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men",
"Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men",
"Men", "Men", "Men", "Men", "Men", "Men", "Men"), e = c("E",
"M", "M", "M", "M", "M", "M", "M", "M", "E", "E", "E", "E", "M",
"M", "E", "M", "E", "M", "M", "E", "M", "M", "M", "M", "E", "M",
"E", "E", "M", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "M", "E", "E",
"E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "M", "E", "E", "E", "E", "E", "E", "E",
"E", "E", "M", "E", "E", "E", "E", "H", "M", "E", "E", "E", "E",
"E", "E", "E", "H", "M", "M", "M", "M", "M", "M", "M", "M", "H",
"M", "M", "H", "M", "M", "M", "H", "M", "H", "E", "H", "E", "M",
"M", "M", "M", "M", "E", "E", "M", "M", "M", "E", "E", "M", "E",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"E"), m = structure(c(109, 111, 114, 110, 111, 100, 110, 108,
104, 107, 112, 113, 108, 109, 109, 111, 108, 113, 111, 111, 113,
108, 108, 114, 114, 111, 108, 105, 113, 113, 111, 106, 108, 112,
110, 108, 110, 112, 112, 113, 113, 105, 110, 110, 106, 108, 106,
111, 109, 104, 105, 110, 106, 107, 108, 110, 106, 107, 112, 110,
102, 106, 113, 103, 108, 110, 110, 108, 109, 113, 110, 109, 109,
109, 107, 111, 106, 105, 110, 107, 105, 107, 110, 105, 108, 108,
99, 112, 108, 105, 107, 103, 101, 107, 106, 96, 113, 104, 110,
105, 100, 106, 110, 106, 104, 110, 101, 103, 101, 103, 105, 105,
108, 108, 107, 108, 107, 103, 109, 104, 109, 108, 105, 107, 103,
107, 105, 109, 107, 105, 106, 103, 101, 103, 103, 108, 101, 107,
102, 107, 107, 102, 112, 107, 108), format.spss = "F16.2")), row.names = c(NA,
-145L), class = c("tbl_df", "tbl", "data.frame"))
I would need to extract coefficients from it by revealing the variable e
. The operation would correspond to extracting the coefficient from the summary
dat2$e = relevel(dat$e, ref = 'E')
m1 = lm(m ~ g*e, data = dat2)
summary(m1)$coefficients
dat2$e = relevel(dat$e, ref = 'M')
m2 = lm(m ~ g*e, data = dat2)
summary(m2)$coefficients
dat2$e = relevel(dat2$e, ref = 'H')
m3 = lm(m ~ g*e, data = dat2)
summary(m3)$coefficients
The problem is that I have to do this in a dplyr code. I have put the code as follows:
dat2 %>%
nest(data = -c(Education)) %>%
mutate(stderr = map(data,~ summary(lm(Savoring ~ Gender, data = .x))$coefficients)) %>%
unnest(stderr)
but this code is despite the interactions and the coefficients are different from those I get from those calculated above. I think that the key might properly arrange the relevel() function within map()
, but I do not have any idea about how.
I try with a solution like this but this not work
dat2 %>%
mutate(lev = case_when(e == 'E' ~ 1,
e == 'M' ~ 2,
TRUE ~ 3)) %>%
nest(data = -c(lev)) %>%
mutate(stderr = map(dat2$e = relevel(dat2$e, ref = .x),
~ summary(lm(m ~ g*e, data = .x))$coefficients))
Can any give some suggestions?
Desired output
Let's assume I would like to get the estimate and the standard error column. The output would more or less look like this
dat = dat2 %>%
pluck('Education') %>%
levels() %>%
as.data.frame() %>%
mutate(z = (dat2 %>% pluck("e") %>% levels() %>%
map(~summary(lm(m ~ g*e,
within(dat3, e <- relevel(e, .x))))$coefficients[2,2]) %>%
unlist() %>% as.data.frame()),
est =(dat2 %>% pluck("e") %>% levels() %>%
map(~summary(lm(m ~ g*e,
within(dat2, e <- relevel(e, .x))))$coefficients[2,1]) %>%
unlist() %>% as.data.frame())
)
. . .
1 E 1.160201 1.507042
2 M 0.815786 3.339161
3 H 2.376032 -6.333333
Thanks
Using base R
lapply(unique(dat2$e), \(x) summary(lm(m ~ g * e,
transform(dat2, e = relevel(factor(e), x))))$coef)
-output
[[1]]
Estimate Std. Error t value Pr(>|t|)
(Intercept) 107.000000 1.099889 97.2825314 9.893783e-130
gWomen 1.507042 1.160201 1.2989491 1.961120e-01
eH -1.000000 1.905064 -0.5249168 6.004773e-01
eM -1.884615 1.257771 -1.4983768 1.363030e-01
gWomen:eH -7.840376 2.644162 -2.9651641 3.561488e-03
gWomen:eM 1.832119 1.418300 1.2917713 1.985801e-01
[[2]]
Estimate Std. Error t value Pr(>|t|)
(Intercept) 105.1153846 0.6101087 172.2895948 6.154064e-164
gWomen 3.3391608 0.8157860 4.0931821 7.180778e-05
eE 1.8846154 1.2577714 1.4983768 1.363030e-01
eH 0.8846154 1.6708515 0.5294398 5.973449e-01
gWomen:eE -1.8321186 1.4182995 -1.2917713 1.985801e-01
gWomen:eH -9.6724942 2.5121774 -3.8502432 1.793655e-04
[[3]]
Estimate Std. Error t value Pr(>|t|)
(Intercept) 106.0000000 1.555478 68.1462485 1.088620e-108
gWomen -6.3333333 2.376032 -2.6655083 8.597742e-03
eE 1.0000000 1.905064 0.5249168 6.004773e-01
eM -0.8846154 1.670852 -0.5294398 5.973449e-01
gWomen:eE 7.8403756 2.644162 2.9651641 3.561488e-03
gWomen:eM 9.6724942 2.512177 3.8502432 1.793655e-04
With the updated desired output
library(purrr) #version >= 1.0.1
library(dplyr)# version >= 1.1.0
dat2 %>%
nest_by(e) %>%
mutate(data = list(dat2)) %>%
rename(lvls = 1) %>%
mutate(data = list(data %>%
mutate(e = relevel(factor(e), lvls)))) %>%
mutate(lm_model = list(lm(m ~ g *e, data = data)),
z = summary(lm_model)$coefficients[2,2],
est = summary(lm_model)$coefficients[2,1]) %>%
ungroup %>%
select(lvls, z, est)
-output
# A tibble: 3 × 3
lvls z est
<chr> <dbl> <dbl>
1 E 1.16 1.51
2 H 2.38 -6.33
3 M 0.816 3.34
Or could also do
library(tidyr)
map(unique(dat2$e), ~ dat2 %>%
mutate( e = relevel(factor(e), .x)) %>%
reframe(lvls = .x, out = broom::tidy(lm(m ~ g *e,
data = pick(everything()))) %>%
slice(2))) %>%
list_rbind() %>%
unnest(where(is_tibble)) %>%
select(lvls, z = std.error, est = estimate)
# A tibble: 3 × 3
lvls z est
<chr> <dbl> <dbl>
1 E 1.16 1.51
2 M 0.816 3.34
3 H 2.38 -6.33