rlme4mixed-modelsconfidence-intervalstatistics-bootstrap

Calculate confidence interval of random + fixed effect coefficients in a mixed effect model in R


I use the the lme4 package in R to fit a mixed effect model:

library(lme4)

model <- lmer(y~x1*x2+(1|subect)+(0 + x1+x2+x1:x2|subject)

If I would like to have total effect per subject that accounts for both fixed and random slopes, I can use the function coef which reports coefficients per subject.

coef(model)

Now, I would like to calculate confidence intervals on those estimates per subject accounting for both fixed and random effects CI. Do you have tips on how to do that in R?

Thank you!


Solution

  • update 2024/25: see also this answer.

    Here I show three ways to get these values:

    There's a reasonable discussion of nonparametric vs parametric bootstraps here

    All three methods give reasonably similar results in this (pretty well-behaved) example ...

    The code below is a little ugly, could probably be cleaned up (I mostly avoided tidyverse, which might actually be useful ...)

    load packages and fit basic model

    library(lme4)
    library(ggplot2)
    library(lmeresampler)
    fm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
    

    parametric bootstrap

    f <- function(x) unlist(coef(x)$Subject)
    b <- bootMer(fm1, FUN = f, seed = 101, nsim = 1000,
                 use.u = TRUE,  ## important - *don't* resample RE values
                 parallel = "multicore", ncpus = 5)
    ## rearrange
    bootmer_res <- t(apply(as.data.frame(b), 2, quantile, c(0.025, 0.975)))
    bootmer_res <- setNames(as.data.frame(bootmer_res), c("lwr", "upr"))
    

    sum of variances

    cc <- coef2.merMod(fm1)  ## defined below
    coef_res <- data.frame(unlist(cc$values$Subject + qnorm(0.025)*cc$sd$Subject),
                           unlist(cc$values$Subject + qnorm(0.975)*cc$sd$Subject))
    coef_res <- setNames(as.data.frame(coef_res), c("lwr", "upr"))
    

    nonparametric bootstrap

    set.seed(101)
    dd <- bootstrap(fm1, .f = f, type = "case", B = 1000, 
         ## resample only within Subjects
         resample = c(FALSE, TRUE))
    lmeresamp_res <- t(apply(dd$replicates, 2, quantile, c(0.025, 0.975)))
    lmeresamp_res <- setNames(as.data.frame(lmeresamp_res), c("lwr", "upr"))
    

    combine estimates and plot

    all_res <- rbind(transform(bootmer_res, nm = "bootmer"),
                     transform(coef_res, nm = "coef2"),
                     transform(lmeresamp_res, nm = "lmeresamp"))
    ## better to do this by manipulating existing metadata (e.g. rownames)
    nsubj <- 18
    all_res$var <- rep(rep(c("(Intercept)", "Days"), each = nsubj), 3)
    all_res$subj <- rep(seq(nsubj), 6)
    ggplot(all_res, aes(xmin = lwr, xmax = upr, y = subj, colour = nm)) +
        facet_wrap(~var, scale = "free") +
        geom_linerange(position = position_dodge(width = 0.5))
                      
    ggsave("coef_SDs.png")
    

    enter image description here


    coef2.merMod() function

    coef2.merMod <- function (object, ...)  {
        fef <- data.frame(rbind(fixef(object)), check.names = FALSE)
        fef_var <- data.frame(rbind(diag(vcov(object))), check.names = FALSE) ## *
        ref <- ranef(object, condVar = TRUE)  ## *
        refnames <- unlist(lapply(ref, colnames))
        nmiss <- length(missnames <- setdiff(refnames, names(fef)))
        if (nmiss > 0) {
            fillvars <- setNames(data.frame(rbind(rep(0, nmiss))), 
                missnames)
            fef <- cbind(fillvars, fef)
            fef_var <- cbind(fillvars, ref_var)
        }
        dfun <- function(x) lapply(ref, function(y) x[rep.int(1L, nrow(y)), , drop = FALSE])
        val <- dfun(fef)
        val_var <- dfun(fef_var)
        for (i in seq_along(val)) {
            refi <- ref[[i]]
            refi_var <- t(apply(attr(refi, "postVar"), 3, diag))
            colnames(refi_var) <- colnames(refi)
            row.names(val[[i]]) <- row.names(val_var[[i]]) <- row.names(refi) ## *
            nmsi <- colnames(refi)
            if (!all(nmsi %in% names(fef))) 
                stop("unable to align random and fixed effects")
            ## 
            for (nm in nmsi) {
                val[[i]][[nm]] <- val[[i]][[nm]] + refi[, nm]
                val_var[[i]][[nm]] <- val_var[[i]][[nm]] + refi_var[, nm]
            }
            val_var[[i]] <- as.data.frame(lapply(val_var[[i]], sqrt),
                                          check.names = FALSE)
        }
        list(values = val, sd = val_var)
    }