Inspired by SO this answer I'm using the do
dplyr to perform several regressions at once, I would however like to display my output using texreg and do()
produces a rowwise_df
object, but if I extract the list of regression some information seem to have been lost. Is there a simple way to solve this? The minimal example below.
First some required packages
# install.packages(c("tidyverse", "broom", "texreg"), dependencies = TRUE)
library(tidyverse)
Second, some dummy data
df.h = data.frame(
hour = factor(rep(1:6, each = 21)),
price = runif(504, min = -10, max = 125),
wind = runif(504, min = 0, max = 2500),
temp = runif(504, min = - 10, max = 25)
)
Third the do()
dfHour = df.h %>% group_by(hour) %>%
do(fitHour = lm(price ~ wind + temp, data = .))
Forth, get the coefficients by group in a tidy data_frame
library(broom)
dfHourCoef = tidy(dfHour, fitHour)
dfHourCoef
#> # A tibble: 72 x 6
#> # Groups: hour [6]
#> hour term estimate std.error statistic p.value
#> <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 (Intercept) 78.2 17.6 4.44 0.000316
#> 2 1 wind 0.000145 0.0107 0.0135 0.989
#> 3 1 temp - 1.27 0.834 -1.52 0.145
#> 4 2 (Intercept) 69.7 18.9 3.68 0.00171
#> 5 2 wind - 0.0150 0.0121 -1.24 0.232
#> 6 2 temp - 0.00355 0.989 -0.00359 0.997
#> 7 3 (Intercept) 61.0 14.1 4.32 0.000413
#> 8 3 wind - 0.00599 0.00987 -0.607 0.552
#> 9 3 temp 0.603 0.704 0.858 0.402
#> 10 4 (Intercept) 57.9 19.1 3.02 0.00729
#> # ... with 8 more rows
I would however like to use texreg, I've tried something like this, but the output gets scrambled up somehow.
library(texreg)
class(dfHour[[2]])
#> [1] "list"
screenreg(dfHour[[2]]) # Not working
Doing it manually would look something like this:
fit1 <- lm(price ~ wind + temp, data = subset(df.h, hour == 1))
fit2 <- lm(price ~ wind + temp, data = subset(df.h, hour == 2))
fit3 <- lm(price ~ wind + temp, data = subset(df.h, hour == 3))
fit4 <- lm(price ~ wind + temp, data = subset(df.h, hour == 4))
fit5 <- lm(price ~ wind + temp, data = subset(df.h, hour == 5))
fit6 <- lm(price ~ wind + temp, data = subset(df.h, hour == 6))
fits <- list(fit1, fit2, fit3, fit4, fit5, fit6)
texreg::screenreg(fits)
#> =================================================================================
#> Model 1 Model 2 Model 3 Model 4 Model 5 Model 6
#> ---------------------------------------------------------------------------------
#> (Intercept) 78.23 *** 69.73 ** 60.96 *** 57.87 ** 89.18 *** 64.29 ***
#> (17.62) (18.94) (14.11) (19.14) (19.08) (15.62)
#> wind 0.00 -0.01 -0.01 -0.01 -0.01 0.00
#> (0.01) (0.01) (0.01) (0.01) (0.01) (0.01)
#> temp -1.27 -0.00 0.60 1.39 -0.48 -2.17 *
#> (0.83) (0.99) (0.70) (0.94) (0.98) (0.93)
#> ---------------------------------------------------------------------------------
#> R^2 0.11 0.08 0.05 0.11 0.06 0.23
#> Adj. R^2 0.02 -0.02 -0.05 0.01 -0.05 0.15
#> Num. obs. 21 21 21 21 21 21
#> RMSE 35.24 41.60 32.59 41.44 39.87 38.39
#> =================================================================================
#> *** p < 0.001, ** p < 0.01, * p < 0.05#>
We can pull
the 'fitHour' and apply screenreg
library(texreg)
out <- dfHour %>%
pull(fitHour) %>%
screenreg
-output
out
#================================================================================================================================================================================================================================================================================
# Model 1 Model 2 Model 3 Model 4 Model 5 Model 6 Model 7 Model 8 Model 9 Model 10 Model 11 Model 12 Model 13 Model 14 Model 15 Model 16 Model 17 Model 18 Model 19 Model 20 Model 21 Model 22 Model 23 Model 24
#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#(Intercept) 32.42 26.59 43.68 * 52.69 ** 48.22 ** 70.75 *** 48.65 51.23 ** 63.47 ** 68.99 *** 102.76 *** 64.77 *** 77.99 ** 82.14 *** 50.16 ** 50.87 * 69.29 ** 64.07 ** 31.96 66.61 ** 44.88 * 88.75 *** 47.27 ** 83.94 ***
# (19.57) (19.59) (15.44) (16.96) (16.05) (15.81) (25.35) (15.93) (20.87) (15.09) (15.62) (16.50) (20.63) (12.96) (16.84) (20.74) (19.68) (22.24) (19.97) (19.48) (21.17) (17.92) (14.79) (19.90)
#wind 0.02 0.02 0.00 0.00 0.01 -0.00 -0.01 0.00 -0.01 -0.00 -0.04 *** 0.01 -0.01 -0.01 -0.00 -0.00 -0.00 0.00 0.02 0.00 -0.00 -0.03 * 0.01 -0.01
# (0.01) (0.01) (0.01) (0.02) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01) (0.01)
#temp 0.33 0.57 -0.97 -0.09 -0.65 -0.95 0.99 -0.56 -0.27 1.21 -0.84 -0.82 -0.76 -0.67 0.66 -0.02 -0.50 0.62 0.21 -0.75 1.29 0.60 1.04 0.40
# (0.90) (0.85) (0.76) (0.93) (0.67) (0.90) (1.09) (1.05) (0.91) (0.91) (0.72) (0.90) (0.90) (0.56) (1.06) (1.05) (1.06) (0.77) (0.94) (0.98) (1.00) (0.87) (0.73) (1.05)
#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#R^2 0.10 0.11 0.10 0.00 0.07 0.06 0.07 0.02 0.01 0.10 0.52 0.05 0.06 0.16 0.02 0.00 0.02 0.04 0.10 0.03 0.09 0.20 0.18 0.08
#Adj. R^2 -0.00 0.01 -0.00 -0.11 -0.04 -0.04 -0.03 -0.09 -0.10 -0.01 0.47 -0.06 -0.05 0.06 -0.09 -0.11 -0.09 -0.07 0.00 -0.07 -0.01 0.12 0.08 -0.03
#Num. obs. 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21
#RMSE 36.96 37.34 32.60 45.40 35.19 41.34 42.79 39.51 38.83 35.61 28.44 39.39 38.70 30.11 40.61 40.08 40.88 40.13 41.68 43.08 42.78 39.12 30.62 40.79
#================================================================================================================================================================================================================================================================================
If we need to apply on individual models of 'dfHour'
dfHour2 <- dfHour %>%
ungroup %>%
mutate(Texreg = map(fitHour, screenreg))
The whole exercise can be done without do
as well
df.h %>%
group_by(hour) %>%
nest(-hour) %>%
mutate(model = map(data, ~ {
mod <- lm(price ~ wind + temp, data = .x)
tibble(list(mod), Texreg = list(screenreg(mod)))})) %>%
select(-data) %>%
unnest