r lme4 mixed-models confidence-interval statistics-bootstrap

Calculate confidence interval of random + fixed effect coefficients in a mixed effect model in R

I use the the lme4 package in R to fit a mixed effect model:

library(lme4)

model <- lmer(y~x1*x2+(1|subect)+(0 + x1+x2+x1:x2|subject)

If I would like to have total effect per subject that accounts for both fixed and random slopes, I can use the function coef which reports coefficients per subject.

coef(model)

Now, I would like to calculate confidence intervals on those estimates per subject accounting for both fixed and random effects CI. Do you have tips on how to do that in R?

Thank you!

Solution

update 2024/25: see also this answer.

Here I show three ways to get these values:

adding the variances of the fixed and random effects components and computing Normal CIs, using a modified/hacked version of the lme4:::coef.merMod() function. This assumes (1) that the sampling distributions of these components are Normal, and (2) that the variance in the fixed and random effects are independent. (Also not making any finite-size corrections)
Parametric bootstrap with bootMer(). This assumes that the model is correct
Nonparametric bootstrapping with lmeresampler. This makes weaker assumptions, but would be difficult to implement a case with (e.g.) crossed random effects (this package offers a few alternative methods, e.g. residual or Wild bootstrapping ...)

There's a reasonable discussion of nonparametric vs parametric bootstraps here

All three methods give reasonably similar results in this (pretty well-behaved) example ...

The code below is a little ugly, could probably be cleaned up (I mostly avoided tidyverse, which might actually be useful ...)

load packages and fit basic model

library(lme4)
library(ggplot2)
library(lmeresampler)
fm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)

parametric bootstrap

f <- function(x) unlist(coef(x)$Subject)
b <- bootMer(fm1, FUN = f, seed = 101, nsim = 1000,
             use.u = TRUE,  ## important - *don't* resample RE values
             parallel = "multicore", ncpus = 5)
## rearrange
bootmer_res <- t(apply(as.data.frame(b), 2, quantile, c(0.025, 0.975)))
bootmer_res <- setNames(as.data.frame(bootmer_res), c("lwr", "upr"))

sum of variances

cc <- coef2.merMod(fm1)  ## defined below
coef_res <- data.frame(unlist(cc$values$Subject + qnorm(0.025)*cc$sd$Subject),
                       unlist(cc$values$Subject + qnorm(0.975)*cc$sd$Subject))
coef_res <- setNames(as.data.frame(coef_res), c("lwr", "upr"))

nonparametric bootstrap

set.seed(101)
dd <- bootstrap(fm1, .f = f, type = "case", B = 1000, 
     ## resample only within Subjects
     resample = c(FALSE, TRUE))
lmeresamp_res <- t(apply(dd$replicates, 2, quantile, c(0.025, 0.975)))
lmeresamp_res <- setNames(as.data.frame(lmeresamp_res), c("lwr", "upr"))

combine estimates and plot

all_res <- rbind(transform(bootmer_res, nm = "bootmer"),
                 transform(coef_res, nm = "coef2"),
                 transform(lmeresamp_res, nm = "lmeresamp"))
## better to do this by manipulating existing metadata (e.g. rownames)
nsubj <- 18
all_res$var <- rep(rep(c("(Intercept)", "Days"), each = nsubj), 3)
all_res$subj <- rep(seq(nsubj), 6)
ggplot(all_res, aes(xmin = lwr, xmax = upr, y = subj, colour = nm)) +
    facet_wrap(~var, scale = "free") +
    geom_linerange(position = position_dodge(width = 0.5))
                  
ggsave("coef_SDs.png")

coef2.merMod() function

coef2.merMod <- function (object, ...)  {
    fef <- data.frame(rbind(fixef(object)), check.names = FALSE)
    fef_var <- data.frame(rbind(diag(vcov(object))), check.names = FALSE) ## *
    ref <- ranef(object, condVar = TRUE)  ## *
    refnames <- unlist(lapply(ref, colnames))
    nmiss <- length(missnames <- setdiff(refnames, names(fef)))
    if (nmiss > 0) {
        fillvars <- setNames(data.frame(rbind(rep(0, nmiss))), 
            missnames)
        fef <- cbind(fillvars, fef)
        fef_var <- cbind(fillvars, ref_var)
    }
    dfun <- function(x) lapply(ref, function(y) x[rep.int(1L, nrow(y)), , drop = FALSE])
    val <- dfun(fef)
    val_var <- dfun(fef_var)
    for (i in seq_along(val)) {
        refi <- ref[[i]]
        refi_var <- t(apply(attr(refi, "postVar"), 3, diag))
        colnames(refi_var) <- colnames(refi)
        row.names(val[[i]]) <- row.names(val_var[[i]]) <- row.names(refi) ## *
        nmsi <- colnames(refi)
        if (!all(nmsi %in% names(fef))) 
            stop("unable to align random and fixed effects")
        ## 
        for (nm in nmsi) {
            val[[i]][[nm]] <- val[[i]][[nm]] + refi[, nm]
            val_var[[i]][[nm]] <- val_var[[i]][[nm]] + refi_var[, nm]
        }
        val_var[[i]] <- as.data.frame(lapply(val_var[[i]], sqrt),
                                      check.names = FALSE)
    }
    list(values = val, sd = val_var)
}