rdplyrmap-function

How releve model within map function in dplyr


I have the following dataset

 dput(dat2)
    structure(list(g = c("Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Women", "Women", "Women", 
    "Women", "Women", "Women", "Women", "Men", "Men", "Men", "Men", 
    "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", 
    "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", 
    "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", 
    "Men", "Men", "Men", "Men", "Men", "Men", "Men"), e = c("E", 
    "M", "M", "M", "M", "M", "M", "M", "M", "E", "E", "E", "E", "M", 
    "M", "E", "M", "E", "M", "M", "E", "M", "M", "M", "M", "E", "M", 
    "E", "E", "M", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
    "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "M", "E", "E", 
    "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", "E", 
    "E", "E", "E", "E", "E", "M", "E", "E", "E", "E", "E", "E", "E", 
    "E", "E", "M", "E", "E", "E", "E", "H", "M", "E", "E", "E", "E", 
    "E", "E", "E", "H", "M", "M", "M", "M", "M", "M", "M", "M", "H", 
    "M", "M", "H", "M", "M", "M", "H", "M", "H", "E", "H", "E", "M", 
    "M", "M", "M", "M", "E", "E", "M", "M", "M", "E", "E", "M", "E", 
    "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", 
    "E"), m = structure(c(109, 111, 114, 110, 111, 100, 110, 108, 
    104, 107, 112, 113, 108, 109, 109, 111, 108, 113, 111, 111, 113, 
    108, 108, 114, 114, 111, 108, 105, 113, 113, 111, 106, 108, 112, 
    110, 108, 110, 112, 112, 113, 113, 105, 110, 110, 106, 108, 106, 
    111, 109, 104, 105, 110, 106, 107, 108, 110, 106, 107, 112, 110, 
    102, 106, 113, 103, 108, 110, 110, 108, 109, 113, 110, 109, 109, 
    109, 107, 111, 106, 105, 110, 107, 105, 107, 110, 105, 108, 108, 
    99, 112, 108, 105, 107, 103, 101, 107, 106, 96, 113, 104, 110, 
    105, 100, 106, 110, 106, 104, 110, 101, 103, 101, 103, 105, 105, 
    108, 108, 107, 108, 107, 103, 109, 104, 109, 108, 105, 107, 103, 
    107, 105, 109, 107, 105, 106, 103, 101, 103, 103, 108, 101, 107, 
    102, 107, 107, 102, 112, 107, 108), format.spss = "F16.2")), row.names = c(NA, 
    -145L), class = c("tbl_df", "tbl", "data.frame"))

I would need to extract coefficients from it by revealing the variable e. The operation would correspond to extracting the coefficient from the summary

dat2$e =  relevel(dat$e, ref = 'E')
m1 = lm(m ~ g*e, data = dat2)
summary(m1)$coefficients


dat2$e = relevel(dat$e, ref = 'M')
m2 = lm(m ~ g*e, data = dat2)
summary(m2)$coefficients


dat2$e = relevel(dat2$e, ref = 'H')
m3 = lm(m ~ g*e, data = dat2)
summary(m3)$coefficients

The problem is that I have to do this in a dplyr code. I have put the code as follows:

dat2 %>% 
  nest(data = -c(Education)) %>% 
  mutate(stderr = map(data,~ summary(lm(Savoring ~ Gender, data = .x))$coefficients)) %>% 
  unnest(stderr)

but this code is despite the interactions and the coefficients are different from those I get from those calculated above. I think that the key might properly arrange the relevel() function within map(), but I do not have any idea about how.

I try with a solution like this but this not work

dat2 %>% 
  mutate(lev = case_when(e == 'E'  ~ 1, 
                         e == 'M' ~ 2, 
                         TRUE ~ 3)) %>% 
  nest(data = -c(lev)) %>% 
  mutate(stderr = map(dat2$e = relevel(dat2$e, ref = .x),
                      ~ summary(lm(m ~ g*e, data = .x))$coefficients))

Can any give some suggestions?

Desired output

Let's assume I would like to get the estimate and the standard error column. The output would more or less look like this

dat = dat2 %>%
  pluck('Education') %>% 
  levels() %>% 
  as.data.frame() %>% 
  mutate(z = (dat2 %>% pluck("e") %>% levels() %>%
                map(~summary(lm(m ~ g*e, 
                                within(dat3, e <- relevel(e, .x))))$coefficients[2,2]) %>%
                unlist() %>% as.data.frame()), 
         est =(dat2 %>% pluck("e") %>% levels() %>%
               map(~summary(lm(m ~ g*e, 
                               within(dat2, e <- relevel(e, .x))))$coefficients[2,1]) %>%
               unlist() %>% as.data.frame())
  )
       .        .         .
1 E 1.160201  1.507042 
2 M 0.815786  3.339161 
3 H 2.376032 -6.333333 

Thanks


Solution

  • Using base R

    lapply(unique(dat2$e), \(x) summary(lm(m ~ g * e,
        transform(dat2, e = relevel(factor(e), x))))$coef)
    

    -output

    [[1]]
                  Estimate Std. Error    t value      Pr(>|t|)
    (Intercept) 107.000000   1.099889 97.2825314 9.893783e-130
    gWomen        1.507042   1.160201  1.2989491  1.961120e-01
    eH           -1.000000   1.905064 -0.5249168  6.004773e-01
    eM           -1.884615   1.257771 -1.4983768  1.363030e-01
    gWomen:eH    -7.840376   2.644162 -2.9651641  3.561488e-03
    gWomen:eM     1.832119   1.418300  1.2917713  1.985801e-01
    
    [[2]]
                   Estimate Std. Error     t value      Pr(>|t|)
    (Intercept) 105.1153846  0.6101087 172.2895948 6.154064e-164
    gWomen        3.3391608  0.8157860   4.0931821  7.180778e-05
    eE            1.8846154  1.2577714   1.4983768  1.363030e-01
    eH            0.8846154  1.6708515   0.5294398  5.973449e-01
    gWomen:eE    -1.8321186  1.4182995  -1.2917713  1.985801e-01
    gWomen:eH    -9.6724942  2.5121774  -3.8502432  1.793655e-04
    
    [[3]]
                   Estimate Std. Error    t value      Pr(>|t|)
    (Intercept) 106.0000000   1.555478 68.1462485 1.088620e-108
    gWomen       -6.3333333   2.376032 -2.6655083  8.597742e-03
    eE            1.0000000   1.905064  0.5249168  6.004773e-01
    eM           -0.8846154   1.670852 -0.5294398  5.973449e-01
    gWomen:eE     7.8403756   2.644162  2.9651641  3.561488e-03
    gWomen:eM     9.6724942   2.512177  3.8502432  1.793655e-04
    

    With the updated desired output

    library(purrr) #version >= 1.0.1
    library(dplyr)# version >= 1.1.0
    dat2 %>% 
     nest_by(e) %>%
      mutate(data = list(dat2)) %>%
      rename(lvls = 1) %>% 
      mutate(data = list(data %>% 
           mutate(e = relevel(factor(e), lvls)))) %>% 
      mutate(lm_model = list(lm(m ~ g *e, data = data)), 
            z = summary(lm_model)$coefficients[2,2], 
      est = summary(lm_model)$coefficients[2,1]) %>%
       ungroup %>%
      select(lvls, z, est)
    

    -output

    # A tibble: 3 × 3
      lvls      z   est
      <chr> <dbl> <dbl>
    1 E     1.16   1.51
    2 H     2.38  -6.33
    3 M     0.816  3.34
    

    Or could also do

    library(tidyr)
     map(unique(dat2$e), ~ dat2 %>% 
      mutate( e = relevel(factor(e), .x)) %>% 
      reframe(lvls = .x, out = broom::tidy(lm(m ~ g *e, 
       data = pick(everything()))) %>%
        slice(2))) %>%
       list_rbind() %>% 
       unnest(where(is_tibble)) %>% 
       select(lvls, z = std.error, est = estimate)
    # A tibble: 3 × 3
      lvls      z   est
      <chr> <dbl> <dbl>
    1 E     1.16   1.51
    2 M     0.816  3.34
    3 H     2.38  -6.33