I am attempting to do a longitudinal multilevel model using ESS survey data from 2002-2018. I want to look at the effects of macroeconomic performance on political trust. In order to do this I am using a 'within-between' approach which models a cross sectional and longitudinal component simultaneously.
Right now I have a dataset in which I have added the actual inflation,GDP growth and unemployment rates for each country-year. In order to create a time invariant component, I have to create a variable which is the mean (GDP growth, inflation, unemployment), for each country over the entire time series (e.g mean GDP growth for Austria 2002-2018). However, when I run it using the code below, it creates a variable with the mean of the overall dataset for the contextual variables as opposed to the mean by country.
data<-data%>%
group_by(country)%>%
mutate( inflation_mean = mean(inflation, na.rm = TRUE),
inflation_diff = inflation - inflation_mean,
growth_mean = mean(GDPgrowth, na.rm = TRUE),
growth_diff = GDPgrowth - growth_mean,
unemployment_mean = mean(unemployment,na.rm = TRUE),
unemployment_diff = unemployment - unemployment_mean)
This is an example of what my data frame looks like
# country year country_year trust inflation GDPGrowth Unemployment
# 1 Austria 2002 AT2002 4 2.2 4.2 4
# 2 Austria 2002 AT2002 9 2.2 4.2 4
# 55 Belgium 2002 BE2002 7 1.7 2.5 6
# 56 Belgium 2002 BE2002 3 1.7 2.5 6
# 91 Austria 2005 AT2005 2 3.4 2.9 3
# 91 Austria 2005 AT2005 6 3.4 2.9 3
# 141 Belgium 2005 BE2005 5 0.5 1.6 5
# 142 Belgium 2005 BE2005 9 0.5 1.6 5
structure(list(idno = structure(c(1, 2, 3, 4, 6, 7), format.stata = "%12.0g"),
cntry = structure(c("AT", "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"),
essround = structure(c(1, 1, 1, 1, 1, 1), format.stata = "%12.0g"),
pspwght = structure(c(0.9409328155361, 0.47046640776805,
1.39215496052674, 1.38216297191755, 1.43776626993043, 1.39215496052674
), format.stata = "%12.0g"), agea = structure(c(54, 50, 63,
44, 41, 63), format.stata = "%13.0g"), gndr = structure(c(1,
1, 2, 1, 2, 2), format.stata = "%12.0g"), eduyrs = structure(c(11,
14, 9, 18, 15, 11), format.stata = "%12.0g"), mnactic = structure(c(1,
1, 6, 1, 1, 6), format.stata = "%41.0g"), lrscale = structure(c(6,
6, 5, 5, 5, NA), format.stata = "%12.0g"), rlgdgr = structure(c(8,
5, 7, 7, 10, 3), format.stata = "%20.0g"), dscrgrp = structure(c(2,
1, 2, 2, 2, 1), format.stata = "%12.0g"), dscretn = structure(c(0,
0, 0, 0, 0, 0), format.stata = "%12.0g"), ctzcntr = structure(c(1,
1, 1, 1, 1, 1), format.stata = "%12.0g"), blgetmg = structure(c(2,
2, NA, 2, 2, 2), format.stata = "%12.0g"), hincfel = structure(c(1,
3, 2, 1, 1, 3), format.stata = "%36.0g"), trstprl = structure(c(9,
0, 6, 8, 6, 0), format.stata = "%15.0g"), inwyr = structure(c(2003,
2003, 2003, 2003, 2003, 2003), format.stata = "%13.0g"),
inwyys = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), format.stata = "%13.0g"), inwyye = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), format.stata = "%13.0g"),
education = structure(c(11, 14, 9, 18, 15, 11), format.stata = "%12.0g"),
female = structure(c(0, 0, 1, 0, 1, 1), format.stata = "%12.0g"),
employement = structure(c(1, 1, 6, 1, 1, 6), format.stata = "%41.0g"),
age = structure(c(54, 50, 63, 44, 41, 63), format.stata = "%13.0g"),
year = c(2003, 2003, 2003, 2003, 2003, 2003), year1 = c(2010,
2010, 2010, 2010, 2010, 2010), year0 = c(1, 1, 1, 1, 1, 1
), cntry_year = c("AT 2003", "AT 2003", "AT 2003", "AT 2003",
"AT 2003", "AT 2003"), n = c(2257L, 2257L, 2257L, 2257L,
2257L, 2257L), year_lag = c(2002, 2002, 2002, 2002, 2002,
2002), gini_disp = c(26.7, 26.7, 26.7, 26.7, 26.7, 26.7),
unemployment = c("4.849999905", "4.849999905", "4.849999905",
"4.849999905", "4.849999905", "4.849999905"), corruption = c(1.966434,
1.966434, 1.966434, 1.966434, 1.966434, 1.966434), wb_growth = structure(c(1.65155392186669,
1.65155392186669, 1.65155392186669, 1.65155392186669, 1.65155392186669,
1.65155392186669), label = "GDP growth (annual %)"), wb_inflation = structure(c(1.81035787764132,
1.81035787764132, 1.81035787764132, 1.81035787764132, 1.81035787764132,
1.81035787764132), label = "Inflation, consumer prices (annual %)"),
old_demo = c(1, 1, 1, 1, 1, 1), gini_mean = c(28.9443587367257,
28.9443587367257, 28.9443587367257, 28.9443587367257, 28.9443587367257,
28.9443587367257), gini_diff = c(-2.24435873672569, -2.24435873672569,
-2.24435873672569, -2.24435873672569, -2.24435873672569,
-2.24435873672569), inflation_mean = c(2.36193292302435,
2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435), inflation_diff = structure(c(-0.551575045383031,
-0.551575045383031, -0.551575045383031, -0.551575045383031,
-0.551575045383031, -0.551575045383031), label = "Inflation, consumer prices (annual %)"),
growth_mean = c(2.11454728111128, 2.11454728111128, 2.11454728111128,
2.11454728111128, 2.11454728111128, 2.11454728111128), growth_diff = structure(c(-0.462993359244594,
-0.462993359244594, -0.462993359244594, -0.462993359244594,
-0.462993359244594, -0.462993359244594), label = "GDP growth (annual %)"),
corruption_mean = c(1.26648259354364, 1.26648259354364, 1.26648259354364,
1.26648259354364, 1.26648259354364, 1.26648259354364), corruption_diff = c(0.699951406456357,
0.699951406456357, 0.699951406456357, 0.699951406456357,
0.699951406456357, 0.699951406456357), `fdata1$cntry` = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"), country = structure(c("AT",
"AT", "AT", "AT", "AT", "AT"), format.stata = "%2s")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
`fdata1$cntry` = structure("AT", format.stata = "%2s"), .rows = structure(list(
1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
require(tidyverse)
data <- mydatacomp %>%
merge(., countrycode::codelist %>%
janitor::clean_names() %>%
select(
country_name = country_name_en,
appr = iso2c,
cont = continent
),
by.x = "cntry",
by.y = "appr") %>%
tibble() %>%
mutate(cntry = country_name)
data %>%
mutate(unemployment = unemployment %>% as.numeric()) %>%
select(cntry, year, wb_inflation, wb_growth, unemployment) %>%
group_by(cntry) %>%
mutate(inflation_mean = mean(wb_inflation, na.rm = TRUE),
inflation_diff = wb_inflation - inflation_mean,
growth_mean = mean(wb_growth, na.rm = TRUE),
growth_diff = wb_growth - growth_mean,
unemploy_mean = mean(unemployment, na.rm = TRUE),
unemploy_diff = unemployment - unemploy_mean) %>%
distinct_all() %>%
filter(!is.na(year)) %>%
arrange(cntry, -year)