rdplyrjanitor

How to provide multiple pairs of columns to str_glue?


I I can't figure out a way to make str_glue work across multiple pairs of columns without just typing out each pair (var1_n with var1_rate, var2_n with var2_rate, etc.).

I attempted to use across but I couldn't figure out the right syntax. I know there's an option in janitor/tabyl but I need to export this as a csv and I always have trouble with tabyl format playing nicely.

I tried to mimic the solution from Create multiple columns using pairs of different columns with map2 as well but I am having trouble with how to call columns and rename them in the same expression I think.

Example dataframe:

library(tidyverse)

example_data <-  structure(list(state = c("AK", "AL", "AR", "AZ", "CA", 
                                          "CO","FL", "GA", "HI", "IA", 
                                          "ID", "IL", "IN", "KS", "KY", 
                                          "LA", "MA","ME", "MI", "MN", 
                                          "MO", "MS", "MT", "NC", "ND", 
                                          "NE", "NH", "NM","NV", "NY", 
                                          "OH", "OK", "OR", "PA", "SC",
                                          "SD", "TN", "TX", "UT","VA", 
                                          "VT", "WA", "WI", "WV", "WY"), 
                                
                                n = c(13L, 5L, 28L, 15L, 35L, 32L, 10L, 30L, 9L, 
                                      82L, 27L, 52L, 34L, 82L, 27L, 25L, 3L, 16L, 
                                      36L, 77L, 33L, 30L, 49L, 20L, 36L, 63L, 13L, 
                                      11L,13L, 18L, 33L, 40L, 25L, 16L, 3L, 39L, 
                                      15L, 83L, 13L, 8L, 8L,39L, 58L, 21L, 16L), 
                                
                                var1_n = c(4L, 2L, 15L, 6L,14L, 14L, 6L, 22L,4L, 
                                           43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L,
                                           8L, 22L, 23L, 22L, 16L, 23L, 4L, 5L, 
                                           34L, 5L, 4L, 5L, 8L, 18L,24L, 7L, 5L, NA, 
                                           5L, 10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L), 
                                
                                var2_n = c(4L, 2L, 15L, 6L, 14L, 14L, 6L, 22L, 4L,
                                           43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L, 
                                           8L, 22L, 23L, 22L, 16L,23L, 4L, 5L, 34L, 
                                           5L, 4L, 5L, 8L, 18L, 24L, 7L, 5L, NA, 5L, 
                                           10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L), 
                                
                                var3_n = c(4L,2L, 5L, 5L, 8L, 7L, 4L, 8L, 2L, 
                                           15L, 9L, 11L, 2L, 12L, 4L, 6L,1L, 
                                           3L, 8L, 18L, 7L, 5L, 10L, 1L, 4L, 
                                           21L, 1L, 6L, 2L, 5L, 3L,6L, 3L, 1L, NA, 
                                           3L, NA, 16L, 1L, 1L, 2L, 8L, 8L, 2L, 6L), 
                                
                                var4_n = c(12L, 5L, 24L, 10L, 25L, 28L,9L, 26L, 7L, 
                                           73L, 20L, 50L, 33L, 66L, 25L, 21L, 3L,14L, 
                                           31L,70L, 31L, 25L, 36L, 15L, 23L, 48L, 9L, 
                                           10L, 8L, 16L, 30L, 28L,24L, 13L, 1L, 38L, 
                                           12L, 52L, 9L, 8L, 8L, 27L, 54L, 21L, 13L), 
                                
                                var1_rate = c("31%","40%", "54%", "40%", "40%", "44%", "60%", "73%", "44%", 
                                              "52%","52%", "60%", "29%", "51%", "59%", "44%", "67%", "50%", 
                                              "61%","30%", "67%", "53%", "47%", "20%", "14%", "54%", "38%", 
                                              "36%","38%", "44%", "55%", "60%", "28%", "31%", "NA%", "13%", 
                                              "67%","48%", "54%", "50%", "25%", "44%", "62%", "38%", "38%"), 
                                
                                var2_rate = c("31%", "40%", "54%", "40%", "40%", "44%","60%", "73%", "44%", 
                                              "52%", "52%", "60%", "29%", "51%", "59%","44%", "67%", "50%", 
                                              "61%", "30%", "67%", "53%", "47%", "20%","14%", "54%", "38%", 
                                              "36%", "38%", "44%", "55%", "60%", "28%","31%", "NA%", "13%", 
                                              "67%", "48%", "54%", "50%", "25%", "44%","62%", "38%", "38%"), 
                                
                                var3_rate = c("31%", "40%","18%", "33%", "23%", "22%", "40%", "27%", "22%", 
                                              "18%", "33%","21%", "6%", "15%", "15%", "24%", "33%", "19%", 
                                              "22%", "23%","21%", "17%", "20%", "5%", "11%", "33%", "8%", 
                                              "55%", "15%","28%", "9%", "15%", "12%", "6%", "NA%", "8%", 
                                              "NA%", "19%","8%", "13%", "25%", "21%", "14%", "10%", "38%"), 
                                
                                
                                var4_rate = c("92%","100%", "86%", "67%", "71%", "88%", "90%", "87%", "78%",
                                              "89%", "74%", "96%", "97%", "80%", "93%", "84%", "100%","88%", 
                                              "86%", "91%", "94%", "83%", "73%", "75%", "64%", "76%","69%", 
                                              "91%", "62%", "89%", "91%", "70%", "96%", "81%", "33%","97%", 
                                              "80%", "63%", "69%", "100%", "100%", "69%", "93%","100%", "81%")), 
                           
                           row.names = c(NA, -45L), class = "data.frame")

and then the code I used that worked:

pct_table <-  example_data %>%
  mutate(both_var1 = str_glue("{var1_n} ({var1_rate})"))

I am hoping to avoid typing the columns names manually. I am open to other easy ways if they will still play nicely with excel, besides str_glue.


Solution

  • Perhaps something like this?

    Here, I reshape the data long so that the various variable pairs are stacked instead of side by side. Then we can do the glue function once and reshape wide.

    library(dplyr); library(tidyr)
    example_data %>%
      rename("total_n" = n) %>%  # to avoid duplicate "n" columns
      pivot_longer(-c(1:2), names_sep = "_", names_to = c("Var", ".value")) %>%
      mutate(both = stringr::str_glue("{n} ({rate})")) %>%
      select(state, total_n, Var, both) |>
      pivot_wider(names_from = Var, values_from = both)
    
    
    # A tibble: 45 × 6
       state total_n var1     var2     var3     var4    
       <chr>   <int> <glue>   <glue>   <glue>   <glue>  
     1 AK         13 4 (31%)  4 (31%)  4 (31%)  12 (92%)
     2 AL          5 2 (40%)  2 (40%)  2 (40%)  5 (100%)
     3 AR         28 15 (54%) 15 (54%) 5 (18%)  24 (86%)
     4 AZ         15 6 (40%)  6 (40%)  5 (33%)  10 (67%)
     5 CA         35 14 (40%) 14 (40%) 8 (23%)  25 (71%)
     6 CO         32 14 (44%) 14 (44%) 7 (22%)  28 (88%)
     7 FL         10 6 (60%)  6 (60%)  4 (40%)  9 (90%) 
     8 GA         30 22 (73%) 22 (73%) 8 (27%)  26 (87%)
     9 HI          9 4 (44%)  4 (44%)  2 (22%)  7 (78%) 
    10 IA         82 43 (52%) 43 (52%) 15 (18%) 73 (89%)
    # ℹ 35 more rows
    # ℹ Use `print(n = ...)` to see more rows