rdplyrstata

How to use the Stata equivalent of a foreach loop in R with mutate()


I have 4 different variables in my raw data (all starting with the prefix raw_), followed by combinations of hours/money and school/social. In my real data, I have many more options for each value of variable, so it ends up being quite a long chunk of code to have to iterate through 4 times.

There must be a simple way to do the equivalent of a foreach loop in Stata (which I use more frequently for data cleaning) in R to condense the 4 iterations to just 1.


In Stata, the loop would look something like:

foreach suffix in hours_school hours_social money_school money_social {
   gen olderch_`suffix' = .
   replace olderch_`suffix' = 90 if raw_`suffix' == "90% or more 10% or less"
   replace olderch_`suffix' = 10 if raw_`suffix' == "10% or less 90% or more"
}

And here's my current version of R code that I'd love to condense similarly:

d <- d %>% 
  mutate(olderch_hours_school = case_when (
    raw_hours_school == "90% or more 10% or less" ~ 90,
    raw_hours_school == "10% or less 90% or more" ~ 10
  ))

d <- d %>% 
  mutate(olderch_hours_social = case_when (
    raw_hours_social == "90% or more 10% or less" ~ 90,
    raw_hours_social == "10% or less 90% or more" ~ 10
  ))

d <- d %>% 
  mutate(olderch_money_school = case_when (
    raw_money_school == "90% or more 10% or less" ~ 90,
    raw_money_school == "10% or less 90% or more" ~ 10
  ))

d <- d %>% 
  mutate(olderch_money_social = case_when (
    raw_money_social == "90% or more 10% or less" ~ 90,
    raw_money_social == "10% or less 90% or more" ~ 10
  ))


Solution

  • Edit: As requested in a comment, answer examples create new columns based on the originals.

    dplyr::across() (docs) allows you to apply a function across multiple columns in your data frame. You can adapt your case_when statements by adding ~ to make a lambda function and using . to represent each column.

    You can provide a vector of all columns being transformed, or use a selection helper, such as everything() or starts_with().

    Test data follows the examples.

    library(dplyr)
    
    glimpse(df)
    #> Rows: 10
    #> Columns: 4
    #> $ raw_hours_school <chr> "90% or more 10% or less", "90% or more 10% or less",…
    #> $ raw_hours_social <chr> "10% or less 90% or more", "10% or less 90% or more",…
    #> $ raw_money_school <chr> "90% or more 10% or less", "10% or less 90% or more",…
    #> $ raw_money_social <chr> "90% or more 10% or less", "10% or less 90% or more",…
    
    df |>
      mutate(across(
        c(raw_hours_school, raw_hours_social,
          raw_money_school, raw_money_social),
        ~ case_when(. ==  "90% or more 10% or less" ~ 90,
                    . == "10% or less 90% or more" ~ 10),
        .names = "new_{.col}")) %>%
      setNames(gsub('_raw', '', names(.)))
    #>           raw_hours_school        raw_hours_social        raw_money_school
    #> 1  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 2  90% or more 10% or less 10% or less 90% or more 10% or less 90% or more
    #> 3  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 4  10% or less 90% or more 90% or more 10% or less 90% or more 10% or less
    #> 5  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 6  10% or less 90% or more 90% or more 10% or less 90% or more 10% or less
    #> 7  10% or less 90% or more 10% or less 90% or more 10% or less 90% or more
    #> 8  10% or less 90% or more 90% or more 10% or less 10% or less 90% or more
    #> 9  90% or more 10% or less 90% or more 10% or less 90% or more 10% or less
    #> 10 90% or more 10% or less 90% or more 10% or less 10% or less 90% or more
    #>           raw_money_social new_hours_school new_hours_social new_money_school
    #> 1  90% or more 10% or less               90               10               90
    #> 2  10% or less 90% or more               90               10               10
    #> 3  90% or more 10% or less               90               10               90
    #> 4  10% or less 90% or more               10               90               90
    #> 5  10% or less 90% or more               90               10               90
    #> 6  90% or more 10% or less               10               90               90
    #> 7  90% or more 10% or less               10               10               10
    #> 8  90% or more 10% or less               10               90               10
    #> 9  90% or more 10% or less               90               90               90
    #> 10 10% or less 90% or more               90               90               10
    #>    new_money_social
    #> 1                90
    #> 2                10
    #> 3                90
    #> 4                10
    #> 5                10
    #> 6                90
    #> 7                90
    #> 8                90
    #> 9                90
    #> 10               10
    
    df |>
      mutate(across(
       starts_with("raw"),
        ~ case_when(. ==  "90% or more 10% or less" ~ 90,
                    . == "10% or less 90% or more" ~ 10),
       .names = "new_{.col}")) %>%
      setNames(gsub('_raw', '', names(.)))
    #>           raw_hours_school        raw_hours_social        raw_money_school
    #> 1  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 2  90% or more 10% or less 10% or less 90% or more 10% or less 90% or more
    #> 3  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 4  10% or less 90% or more 90% or more 10% or less 90% or more 10% or less
    #> 5  90% or more 10% or less 10% or less 90% or more 90% or more 10% or less
    #> 6  10% or less 90% or more 90% or more 10% or less 90% or more 10% or less
    #> 7  10% or less 90% or more 10% or less 90% or more 10% or less 90% or more
    #> 8  10% or less 90% or more 90% or more 10% or less 10% or less 90% or more
    #> 9  90% or more 10% or less 90% or more 10% or less 90% or more 10% or less
    #> 10 90% or more 10% or less 90% or more 10% or less 10% or less 90% or more
    #>           raw_money_social new_hours_school new_hours_social new_money_school
    #> 1  90% or more 10% or less               90               10               90
    #> 2  10% or less 90% or more               90               10               10
    #> 3  90% or more 10% or less               90               10               90
    #> 4  10% or less 90% or more               10               90               90
    #> 5  10% or less 90% or more               90               10               90
    #> 6  90% or more 10% or less               10               90               90
    #> 7  90% or more 10% or less               10               10               10
    #> 8  90% or more 10% or less               10               90               10
    #> 9  90% or more 10% or less               90               90               90
    #> 10 10% or less 90% or more               90               90               10
    #>    new_money_social
    #> 1                90
    #> 2                10
    #> 3                90
    #> 4                10
    #> 5                10
    #> 6                90
    #> 7                90
    #> 8                90
    #> 9                90
    #> 10               10
    
    set.seed(123)
    
    df <- data.frame(
      raw_hours_school = sample(c("90% or more 10% or less",
                              "10% or less 90% or more"),
                            10, replace = TRUE),
      raw_hours_social = sample(c("90% or more 10% or less",
                              "10% or less 90% or more"),
                            10, replace = TRUE),
      raw_money_school = sample(c("90% or more 10% or less",
                              "10% or less 90% or more"),
                            10, replace = TRUE),
      raw_money_social = sample(c("90% or more 10% or less",
                              "10% or less 90% or more"),
                            10, replace = TRUE)
    )