rdplyrmultilevel-analysis

Create a time varying and time invariant contextual variables in r


I am attempting to do a longitudinal multilevel model using ESS survey data from 2002-2018. I want to look at the effects of macroeconomic performance on political trust. In order to do this I am using a 'within-between' approach which models a cross sectional and longitudinal component simultaneously.

Right now I have a dataset in which I have added the actual inflation,GDP growth and unemployment rates for each country-year. In order to create a time invariant component, I have to create a variable which is the mean (GDP growth, inflation, unemployment), for each country over the entire time series (e.g mean GDP growth for Austria 2002-2018). However, when I run it using the code below, it creates a variable with the mean of the overall dataset for the contextual variables as opposed to the mean by country.

data<-data%>%
  group_by(country)%>%
  mutate( inflation_mean = mean(inflation, na.rm = TRUE),
          inflation_diff = inflation - inflation_mean,
          growth_mean = mean(GDPgrowth, na.rm = TRUE),
          growth_diff = GDPgrowth - growth_mean,
          unemployment_mean = mean(unemployment,na.rm = TRUE),
          unemployment_diff = unemployment - unemployment_mean)


This is an example of what my data frame looks like

#     country  year country_year  trust  inflation GDPGrowth Unemployment
# 1   Austria  2002  AT2002       4      2.2       4.2          4
# 2   Austria  2002  AT2002       9      2.2       4.2          4
# 55  Belgium  2002  BE2002       7      1.7       2.5          6
# 56  Belgium  2002  BE2002       3      1.7       2.5          6
# 91  Austria  2005  AT2005       2      3.4       2.9          3
# 91  Austria  2005  AT2005       6      3.4       2.9          3
# 141 Belgium  2005  BE2005       5      0.5       1.6          5
# 142 Belgium  2005  BE2005       9      0.5       1.6          5

structure(list(idno = structure(c(1, 2, 3, 4, 6, 7), format.stata = "%12.0g"), 
    cntry = structure(c("AT", "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"), 
    essround = structure(c(1, 1, 1, 1, 1, 1), format.stata = "%12.0g"), 
    pspwght = structure(c(0.9409328155361, 0.47046640776805, 
    1.39215496052674, 1.38216297191755, 1.43776626993043, 1.39215496052674
    ), format.stata = "%12.0g"), agea = structure(c(54, 50, 63, 
    44, 41, 63), format.stata = "%13.0g"), gndr = structure(c(1, 
    1, 2, 1, 2, 2), format.stata = "%12.0g"), eduyrs = structure(c(11, 
    14, 9, 18, 15, 11), format.stata = "%12.0g"), mnactic = structure(c(1, 
    1, 6, 1, 1, 6), format.stata = "%41.0g"), lrscale = structure(c(6, 
    6, 5, 5, 5, NA), format.stata = "%12.0g"), rlgdgr = structure(c(8, 
    5, 7, 7, 10, 3), format.stata = "%20.0g"), dscrgrp = structure(c(2, 
    1, 2, 2, 2, 1), format.stata = "%12.0g"), dscretn = structure(c(0, 
    0, 0, 0, 0, 0), format.stata = "%12.0g"), ctzcntr = structure(c(1, 
    1, 1, 1, 1, 1), format.stata = "%12.0g"), blgetmg = structure(c(2, 
    2, NA, 2, 2, 2), format.stata = "%12.0g"), hincfel = structure(c(1, 
    3, 2, 1, 1, 3), format.stata = "%36.0g"), trstprl = structure(c(9, 
    0, 6, 8, 6, 0), format.stata = "%15.0g"), inwyr = structure(c(2003, 
    2003, 2003, 2003, 2003, 2003), format.stata = "%13.0g"), 
    inwyys = structure(c(NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_), format.stata = "%13.0g"), inwyye = structure(c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), format.stata = "%13.0g"), 
    education = structure(c(11, 14, 9, 18, 15, 11), format.stata = "%12.0g"), 
    female = structure(c(0, 0, 1, 0, 1, 1), format.stata = "%12.0g"), 
    employement = structure(c(1, 1, 6, 1, 1, 6), format.stata = "%41.0g"), 
    age = structure(c(54, 50, 63, 44, 41, 63), format.stata = "%13.0g"), 
    year = c(2003, 2003, 2003, 2003, 2003, 2003), year1 = c(2010, 
    2010, 2010, 2010, 2010, 2010), year0 = c(1, 1, 1, 1, 1, 1
    ), cntry_year = c("AT 2003", "AT 2003", "AT 2003", "AT 2003", 
    "AT 2003", "AT 2003"), n = c(2257L, 2257L, 2257L, 2257L, 
    2257L, 2257L), year_lag = c(2002, 2002, 2002, 2002, 2002, 
    2002), gini_disp = c(26.7, 26.7, 26.7, 26.7, 26.7, 26.7), 
    unemployment = c("4.849999905", "4.849999905", "4.849999905", 
    "4.849999905", "4.849999905", "4.849999905"), corruption = c(1.966434, 
    1.966434, 1.966434, 1.966434, 1.966434, 1.966434), wb_growth = structure(c(1.65155392186669, 
    1.65155392186669, 1.65155392186669, 1.65155392186669, 1.65155392186669, 
    1.65155392186669), label = "GDP growth (annual %)"), wb_inflation = structure(c(1.81035787764132, 
    1.81035787764132, 1.81035787764132, 1.81035787764132, 1.81035787764132, 
    1.81035787764132), label = "Inflation, consumer prices (annual %)"), 
    old_demo = c(1, 1, 1, 1, 1, 1), gini_mean = c(28.9443587367257, 
    28.9443587367257, 28.9443587367257, 28.9443587367257, 28.9443587367257, 
    28.9443587367257), gini_diff = c(-2.24435873672569, -2.24435873672569, 
    -2.24435873672569, -2.24435873672569, -2.24435873672569, 
    -2.24435873672569), inflation_mean = c(2.36193292302435, 
    2.36193292302435, 2.36193292302435, 2.36193292302435, 2.36193292302435,   2.36193292302435), inflation_diff = structure(c(-0.551575045383031, 
    -0.551575045383031, -0.551575045383031, -0.551575045383031, 
    -0.551575045383031, -0.551575045383031), label = "Inflation, consumer prices (annual %)"), 
    growth_mean = c(2.11454728111128, 2.11454728111128, 2.11454728111128, 
    2.11454728111128, 2.11454728111128, 2.11454728111128), growth_diff = structure(c(-0.462993359244594, 
    -0.462993359244594, -0.462993359244594, -0.462993359244594, 
    -0.462993359244594, -0.462993359244594), label = "GDP growth (annual %)"), 
    corruption_mean = c(1.26648259354364, 1.26648259354364, 1.26648259354364, 
    1.26648259354364, 1.26648259354364, 1.26648259354364), corruption_diff = c(0.699951406456357, 
    0.699951406456357, 0.699951406456357, 0.699951406456357, 
    0.699951406456357, 0.699951406456357), `fdata1$cntry` = structure(c("AT", 
    "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s"), country = structure(c("AT", 
    "AT", "AT", "AT", "AT", "AT"), format.stata = "%2s")), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
    `fdata1$cntry` = structure("AT", format.stata = "%2s"), .rows = structure(list(
        1:6), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))







Solution

  • require(tidyverse)
    
    data <- mydatacomp %>%  
      merge(.,  countrycode::codelist %>%
              janitor::clean_names() %>%
              select(
                country_name = country_name_en,
                appr = iso2c,
                cont = continent
              ), 
            by.x = "cntry", 
            by.y = "appr") %>%  
      tibble() %>%  
      mutate(cntry = country_name) 
    
    
    data %>%  
      mutate(unemployment = unemployment %>% as.numeric()) %>%  
      select(cntry, year, wb_inflation, wb_growth, unemployment) %>%  
      group_by(cntry) %>%  
      
      mutate(inflation_mean = mean(wb_inflation, na.rm = TRUE), 
             inflation_diff = wb_inflation - inflation_mean, 
             growth_mean = mean(wb_growth, na.rm = TRUE), 
             growth_diff = wb_growth - growth_mean, 
             unemploy_mean = mean(unemployment, na.rm = TRUE), 
             unemploy_diff = unemployment - unemploy_mean) %>% 
      
      distinct_all() %>%  
      filter(!is.na(year)) %>%  
      arrange(cntry, -year)