I'm trying to loop over multiple multilevel ordered logistic regressions with random intercepts on the country
, dropping one observation at a time from the main dataset, while producing one massive, augmented tidy data frame with the results. Given that I am just having trouble with the looping, and I can produce everything one data frame at a time, I will first show what I am trying to do with two data frames. Then, I will (poorly) attempt to solve the problem with a loop, which is the part for which I would greatly appreciate your assistance.
First, let's load some libraries:
# load libraries
library(dplyr)
library(purrr)
library(tibble)
library(ordinal)
library(reprex)
library(magrittr)
Next, let's create some sample data:
# create original df in the right format
data0 = data.frame(country = rep(c("Algeria","Belgium","Canada","Denmark","England", "France"),times=10),
x1 = rep(0:1),times=30,
x2 = rnorm(n = 60, mean = 100, sd = 5),
y = rep(1:6),times=10)
data0$country = factor(data0$country)
data0$y = factor(data0$y)
data0 <- data0 %>% dplyr::select(country,x1,x2,y)
Building on this post, I'll create a custom tidier for clmm2
models:
tidy.clmm2 <-
function(fit){
results = as.data.frame(coefficients(summary(fit)))
colnames(results) = c("estimate","std.error","statistic","p.value")
results %>% tibble::rownames_to_column("term")
}
Estimate the first model, tidy it, and augment it with confidence intervals, odds ratios, and model observation number:
# model 1: get the dataset with one less observation, removing the top row
data1 <- data0 %>% dplyr::slice(-1)
# model 1: estimate
m1 <- clmm2(y ~ x1 + x2, random=country, data=data1, Hess = TRUE)
summary(m1)
# model 1: get odds ratio and renames columns for model 1
or1 <- as.data.frame(exp(coef(m1)))
or1 <- or1 %>% rename(estimate.odds =1)
or1$term <- row.names(or1)
# model 1: tidy the model, get the CIs, and store the observation/model number
tidy_m1 <- tidy.clmm2(m1)
tidy_m1$obs <- m1$nobs[[1]]
tidy_m1$conf.low <- tidy_m1$estimate - (1.96 * tidy_m1$std.error)
tidy_m1$conf.high <- tidy_m1$estimate + (1.96 * tidy_m1$std.error)
# model 1: merge over the odds ratios and make final data
tidy_m1 <- left_join(tidy_m1, or1, by=c("term"))
Do the same for model 2, and bind the final output with model 1:
# model 2: get the dataset with one less observation, removing the top row
data2 <- data1 %>% dplyr::slice(-1)
# model 2: estimate
m2 <- clmm2(y ~ x1 + x2, random=country, data=data2, Hess = TRUE)
summary(m2)
# model 2: get odds ratio and renames columns for model 1
or2 <- as.data.frame(exp(coef(m2)))
or2 <- or2 %>% rename(estimate.odds =1)
or2$term <- row.names(or2)
# model 2: tidy the model, get the CIs, and store the observation/model number
tidy_m2 <- tidy.clmm2(m2)
tidy_m2$obs <- m2$nobs[[1]]
tidy_m2$conf.low <- tidy_m2$estimate - (1.96 * tidy_m2$std.error)
tidy_m2$conf.high <- tidy_m2$estimate + (1.96 * tidy_m2$std.error)
# model 2: merge over the odds ratios and make final data
tidy_m2 <- left_join(tidy_m2, or2, by=c("term"))
## Bind everything together to get a final data frame
tidy_final <- dplyr::bind_rows(tidy_m1,tidy_m2)
Here is my (poor) attempt at a loop, building on this excellent post:
# create vector to store observation/data frame numbers
vector <- 1:59
# start of model
df_final <- purrr::map_dfr(1:59,
function(i) data.frame(model = i,
tidy.clmm2(glm(as.formula(paste0('y ~ ', i)),
random=country,
Hess = TRUE,
data = data[vector]))))
How do I fix the above in line with what I was doing one model at a time? Don't worry about the NaN
s, as I have only provided a highly-stylized example to facilitate responses.
First I would improve your tidier function to do all of the extra tidying you were doing for each model (I'm assuming a tidier for this model class doesn't already exist in some package or other):
tidy.clmm2 <-
function(fit){
results = as.data.frame(coefficients(summary(fit)))
colnames(results) = c("estimate","std.error","statistic","p.value")
tidy_fit <- results %>% tibble::rownames_to_column("term")
or1 <- as.data.frame(exp(coef(fit)))
or1 <- or1 %>% rename(estimate.odds =1)
or1$term <- row.names(or1)
#tidy the model, get the CIs, and store the observation/model number
tidy_fit$obs <- fit$nobs[[1]]
tidy_fit$conf.low <- tidy_fit$estimate - (1.96 * tidy_fit$std.error)
tidy_fit$conf.high <- tidy_fit$estimate + (1.96 * tidy_fit$std.error)
#merge over the odds ratios and make final data
left_join(tidy_fit, or1, by=c("term"))
}
I don't know purrr
, but you can then use the base function lapply
(which I guess is similar) to estimate the model and the get the tidy parameters on successively smaller datasets, befor passing the results to bind_rows
. Here I only do it for the first 10:
df_final <- lapply(1:10, function(i) {
mod <- clmm2(y ~ x1 + x2, random=country, data=data0[(i+1):60,], Hess = TRUE)
tidy.clmm2(mod)
}) %>% bind_rows(, .id="Dataset")
> df_final
Dataset term estimate std.error statistic p.value obs conf.low conf.high estimate.odds
1 1 1|2 -6.438544e+01 NaN NaN NaN 59 NaN NaN 1.090838e-28
2 1 2|3 -3.518858e+01 NaN NaN NaN 59 NaN NaN 5.221475e-16
3 1 3|4 2.961620e-01 NaN NaN NaN 59 NaN NaN 1.344688e+00
4 1 4|5 2.971124e+01 2.207366e+01 1.346004e+00 1.783011e-01 59 -13.55313129 72.97561932 8.006253e+12
5 1 5|6 7.273474e+01 NaN NaN NaN 59 NaN NaN 3.875214e+31
6 1 x1 2.054609e+01 NaN NaN NaN 59 NaN NaN 8.376306e+08
7 1 x2 -8.899366e-02 3.978817e-03 -2.236686e+01 8.275733e-111 59 -0.09679214 -0.08119518 9.148514e-01
8 2 1|2 -7.093800e+01 NaN NaN NaN 58 NaN NaN 1.556034e-31
9 2 2|3 -2.937359e+01 7.293763e+00 -4.027221e+00 5.644000e-05 58 -43.66936969 -15.07781910 1.750693e-13
10 2 3|4 -4.496101e+00 5.083061e+00 -8.845263e-01 3.764122e-01 58 -14.45889969 5.46669816 1.115240e-02
11 2 4|5 2.637413e+01 1.028467e+00 2.564412e+01 4.917528e-145 58 24.35833179 28.38992198 2.845364e+11
12 2 5|6 8.163547e+01 5.497370e-02 1.484991e+03 0.000000e+00 58 81.52771782 81.74321472 2.843364e+35
13 2 x1 1.821630e+01 5.424801e+00 3.357967e+00 7.851802e-04 58 7.58369175 28.84891081 8.151530e+07
14 2 x2 -7.476304e-02 NaN NaN NaN 58 NaN NaN 9.279633e-01
15 3 1|2 -1.105259e+02 NaN NaN NaN 57 NaN NaN 9.981611e-49
16 3 2|3 -4.660975e+01 NaN NaN NaN 57 NaN NaN 5.723279e-21
17 3 3|4 -1.713909e+00 5.880686e+00 -2.914471e-01 7.707094e-01 57 -13.24005308 9.81223521 1.801602e-01
18 3 4|5 4.382707e+01 NaN NaN NaN 57 NaN NaN 1.081073e+19
19 3 5|6 1.265335e+02 3.425069e-03 3.694335e+04 0.000000e+00 57 126.52681215 126.54023843 8.970400e+54
20 3 x1 3.988252e+01 5.999608e-04 6.647520e+04 0.000000e+00 57 39.88133918 39.88369103 2.092937e+17
21 3 x2 -2.175413e-01 2.615869e-04 -8.316215e+02 0.000000e+00 57 -0.21805405 -0.21702863 8.044943e-01
22 4 1|2 -6.646351e+01 NaN NaN NaN 56 NaN NaN 1.365411e-29
23 4 2|3 -3.064726e+01 1.722949e+01 -1.778768e+00 7.527783e-02 56 -64.41706724 3.12253810 4.898489e-14
24 4 3|4 -3.327152e+00 NaN NaN NaN 56 NaN NaN 3.589518e-02
25 4 4|5 3.188512e+01 NaN NaN NaN 56 NaN NaN 7.039323e+13
26 4 5|6 7.214246e+01 NaN NaN NaN 56 NaN NaN 2.143249e+31
27 4 x1 2.524262e+01 1.420819e+01 1.776624e+00 7.563011e-02 56 -2.60544100 53.09068131 9.177632e+10
28 4 x2 -1.139253e-01 5.874164e-04 -1.939430e+02 0.000000e+00 56 -0.11507663 -0.11277396 8.923246e-01
29 5 1|2 -5.520472e+01 1.024174e-03 -5.390171e+04 0.000000e+00 55 -55.20673057 -55.20271580 1.058994e-24
30 5 2|3 -3.324923e+01 NaN NaN NaN 55 NaN NaN 3.631150e-15
31 5 3|4 7.955460e-01 5.817109e+00 1.367597e-01 8.912208e-01 55 -10.60598823 12.19708026 2.215650e+00
32 5 4|5 2.816002e+01 5.061437e+00 5.563641e+00 2.642027e-08 55 18.23960201 38.08043320 1.697228e+12
33 5 5|6 6.669283e+01 NaN NaN NaN 55 NaN NaN 9.211492e+28
34 5 x1 3.351064e+01 1.024452e-03 3.271080e+04 0.000000e+00 55 33.50863540 33.51265125 3.576741e+14
35 5 x2 -1.850497e-01 1.173298e-03 -1.577175e+02 0.000000e+00 55 -0.18734938 -0.18275005 8.310630e-01
36 6 1|2 -3.286871e+01 NaN NaN NaN 54 NaN NaN 5.312531e-15
37 6 2|3 -1.394418e+01 NaN NaN NaN 54 NaN NaN 8.792619e-07
38 6 3|4 1.402239e+01 NaN NaN NaN 54 NaN NaN 1.229831e+06
39 6 4|5 4.198923e+01 NaN NaN NaN 54 NaN NaN 1.720640e+18
40 6 5|6 6.091421e+01 NaN NaN NaN 54 NaN NaN 2.849095e+26
41 6 x1 2.791695e+01 9.151861e+00 3.050412e+00 2.285273e-03 54 9.97930271 45.85459762 1.330998e+12
42 6 x2 6.368881e-04 NaN NaN NaN 54 NaN NaN 1.000637e+00
43 7 1|2 -8.540551e+01 NaN NaN NaN 53 NaN NaN 8.106962e-38
44 7 2|3 -4.981897e+01 NaN NaN NaN 53 NaN NaN 2.311503e-22
45 7 3|4 1.323464e-02 NaN NaN NaN 53 NaN NaN 1.013323e+00
46 7 4|5 4.199212e+01 3.930682e+00 1.068317e+01 1.220384e-26 53 34.28798714 49.69626009 1.725630e+18
47 7 5|6 9.757102e+01 NaN NaN NaN 53 NaN NaN 2.368942e+42
48 7 x1 2.742401e+01 NaN NaN NaN 53 NaN NaN 8.130075e+11
49 7 x2 -1.149872e-01 NaN NaN NaN 53 NaN NaN 8.913776e-01
50 8 1|2 -3.428518e+01 2.418647e+01 -1.417536e+00 1.563264e-01 52 -81.69065068 13.12029594 1.288655e-15
51 8 2|3 -1.468742e+01 2.158542e+01 -6.804326e-01 4.962306e-01 52 -56.99484187 27.61999711 4.181514e-07
52 8 3|4 -2.123523e+00 2.585943e+01 -8.211793e-02 9.345529e-01 52 -52.80800668 48.56096089 1.196095e-01
53 8 4|5 1.428943e+01 2.595126e+01 5.506258e-01 5.818902e-01 52 -36.57503632 65.15390302 1.606283e+06
54 8 5|6 3.811502e+01 1.071360e+00 3.557631e+01 3.257489e-277 52 36.01515820 40.21488826 3.573915e+16
55 8 x1 8.597913e+00 2.399747e+01 3.582841e-01 7.201307e-01 52 -38.43712837 55.63295352 5.420333e+03
56 8 x2 -3.759098e-02 NaN NaN NaN 52 NaN NaN 9.631068e-01
57 9 1|2 -1.381079e+02 NaN NaN NaN 51 NaN NaN 1.048377e-60
58 9 2|3 -5.910938e+01 NaN NaN NaN 51 NaN NaN 2.133649e-26
59 9 3|4 -9.084277e+00 NaN NaN NaN 51 NaN NaN 1.134354e-04
60 9 4|5 5.875811e+01 NaN NaN NaN 51 NaN NaN 3.298542e+25
61 9 5|6 1.564752e+02 NaN NaN NaN 51 NaN NaN 9.043358e+67
62 9 x1 4.528686e+01 NaN NaN NaN 51 NaN NaN 4.654054e+19
63 9 x2 -2.132578e-01 2.314244e-05 -9.215010e+03 0.000000e+00 51 -0.21330318 -0.21321246 8.079478e-01
64 10 1|2 -5.537366e+01 2.398432e+01 -2.308744e+00 2.095778e-02 50 -102.38292260 -8.36439034 8.943892e-25
65 10 2|3 -2.567911e+01 1.644416e+01 -1.561594e+00 1.183836e-01 50 -57.90967366 6.55145288 7.042130e-12
66 10 3|4 -3.561160e+00 1.421535e+01 -2.505152e-01 8.021890e-01 50 -31.42323652 24.30091746 2.840587e-02
67 10 4|5 2.622061e+01 1.739894e+01 1.507024e+00 1.318045e-01 50 -7.88130243 60.32253224 2.440441e+11
68 10 5|6 6.147091e+01 NaN NaN NaN 50 NaN NaN 4.971402e+26
69 10 x1 1.720323e+01 NaN NaN NaN 50 NaN NaN 2.959845e+07
70 10 x2 -6.799849e-02 NaN NaN NaN 50 NaN NaN 9.342619e-01