rregression

Design matrix in R after altering the data


Consider the following code where we fit a regression model, alter the data and create the corresponding design matrix for the altered data based on the fitted regression model:

mtcars_alt <- mtcars |>
  dplyr::mutate(vs = factor(vs),
                am = factor(am))

mod_lm <- lm(mpg ~ vs*am, data = mtcars_alt)
mtcars_alt |>
  dplyr::mutate(am = "0") |>
  model.matrix(mod_lm, data = _)

mod_glm <- glm(mpg ~ vs*am, data = mtcars_alt)
mtcars_alt |>
  dplyr::mutate(am = "0") |>
  model.matrix(mod_glm, data = _)

When the regression model is based on lm() then it works fine, however, it throws an error when it's based on glm()

Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
  contrasts can be applied only to factors with 2 or more levels

Any idea how to circumvent this error?


Solution

  • If you call model.matrix() on the GLM's formula rather than the model object itself and create the constant factor am as a factor with two levels (even though only one is observed, as suggested by @Stefan Hansen in the comments), you should get the desired result.

    mtcars_alt <- mtcars |>
      dplyr::mutate(vs = factor(vs),
                    am = factor(am))
    
    mod_glm <- glm(mpg ~ vs*am, data = mtcars_alt)
    mtcars_alt |>
      dplyr::mutate(am = factor("0", levels=c("0", "1"))) |>
      model.matrix(formula(mod_glm), data = _)
    #>                     (Intercept) vs1 am1 vs1:am1
    #> Mazda RX4                     1   0   0       0
    #> Mazda RX4 Wag                 1   0   0       0
    #> Datsun 710                    1   1   0       0
    #> Hornet 4 Drive                1   1   0       0
    #> Hornet Sportabout             1   0   0       0
    #> Valiant                       1   1   0       0
    #> Duster 360                    1   0   0       0
    #> Merc 240D                     1   1   0       0
    #> Merc 230                      1   1   0       0
    #> Merc 280                      1   1   0       0
    #> Merc 280C                     1   1   0       0
    #> Merc 450SE                    1   0   0       0
    #> Merc 450SL                    1   0   0       0
    #> Merc 450SLC                   1   0   0       0
    #> Cadillac Fleetwood            1   0   0       0
    #> Lincoln Continental           1   0   0       0
    #> Chrysler Imperial             1   0   0       0
    #> Fiat 128                      1   1   0       0
    #> Honda Civic                   1   1   0       0
    #> Toyota Corolla                1   1   0       0
    #> Toyota Corona                 1   1   0       0
    #> Dodge Challenger              1   0   0       0
    #> AMC Javelin                   1   0   0       0
    #> Camaro Z28                    1   0   0       0
    #> Pontiac Firebird              1   0   0       0
    #> Fiat X1-9                     1   1   0       0
    #> Porsche 914-2                 1   0   0       0
    #> Lotus Europa                  1   1   0       0
    #> Ford Pantera L                1   0   0       0
    #> Ferrari Dino                  1   0   0       0
    #> Maserati Bora                 1   0   0       0
    #> Volvo 142E                    1   1   0       0
    #> attr(,"assign")
    #> [1] 0 1 2 3
    #> attr(,"contrasts")
    #> attr(,"contrasts")$vs
    #> [1] "contr.treatment"
    #> 
    #> attr(,"contrasts")$am
    #> [1] "contr.treatment"
    

    Created on 2024-11-14 with reprex v2.1.0