I I can't figure out a way to make str_glue
work across multiple pairs of columns without just typing out each pair (var1_n with var1_rate, var2_n with var2_rate, etc.).
I attempted to use across
but I couldn't figure out the right syntax. I know there's an option in janitor/tabyl but I need to export this as a csv and I always have trouble with tabyl format playing nicely.
I tried to mimic the solution from Create multiple columns using pairs of different columns with map2 as well but I am having trouble with how to call columns and rename them in the same expression I think.
Example dataframe:
library(tidyverse)
example_data <- structure(list(state = c("AK", "AL", "AR", "AZ", "CA",
"CO","FL", "GA", "HI", "IA",
"ID", "IL", "IN", "KS", "KY",
"LA", "MA","ME", "MI", "MN",
"MO", "MS", "MT", "NC", "ND",
"NE", "NH", "NM","NV", "NY",
"OH", "OK", "OR", "PA", "SC",
"SD", "TN", "TX", "UT","VA",
"VT", "WA", "WI", "WV", "WY"),
n = c(13L, 5L, 28L, 15L, 35L, 32L, 10L, 30L, 9L,
82L, 27L, 52L, 34L, 82L, 27L, 25L, 3L, 16L,
36L, 77L, 33L, 30L, 49L, 20L, 36L, 63L, 13L,
11L,13L, 18L, 33L, 40L, 25L, 16L, 3L, 39L,
15L, 83L, 13L, 8L, 8L,39L, 58L, 21L, 16L),
var1_n = c(4L, 2L, 15L, 6L,14L, 14L, 6L, 22L,4L,
43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L,
8L, 22L, 23L, 22L, 16L, 23L, 4L, 5L,
34L, 5L, 4L, 5L, 8L, 18L,24L, 7L, 5L, NA,
5L, 10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L),
var2_n = c(4L, 2L, 15L, 6L, 14L, 14L, 6L, 22L, 4L,
43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L,
8L, 22L, 23L, 22L, 16L,23L, 4L, 5L, 34L,
5L, 4L, 5L, 8L, 18L, 24L, 7L, 5L, NA, 5L,
10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L),
var3_n = c(4L,2L, 5L, 5L, 8L, 7L, 4L, 8L, 2L,
15L, 9L, 11L, 2L, 12L, 4L, 6L,1L,
3L, 8L, 18L, 7L, 5L, 10L, 1L, 4L,
21L, 1L, 6L, 2L, 5L, 3L,6L, 3L, 1L, NA,
3L, NA, 16L, 1L, 1L, 2L, 8L, 8L, 2L, 6L),
var4_n = c(12L, 5L, 24L, 10L, 25L, 28L,9L, 26L, 7L,
73L, 20L, 50L, 33L, 66L, 25L, 21L, 3L,14L,
31L,70L, 31L, 25L, 36L, 15L, 23L, 48L, 9L,
10L, 8L, 16L, 30L, 28L,24L, 13L, 1L, 38L,
12L, 52L, 9L, 8L, 8L, 27L, 54L, 21L, 13L),
var1_rate = c("31%","40%", "54%", "40%", "40%", "44%", "60%", "73%", "44%",
"52%","52%", "60%", "29%", "51%", "59%", "44%", "67%", "50%",
"61%","30%", "67%", "53%", "47%", "20%", "14%", "54%", "38%",
"36%","38%", "44%", "55%", "60%", "28%", "31%", "NA%", "13%",
"67%","48%", "54%", "50%", "25%", "44%", "62%", "38%", "38%"),
var2_rate = c("31%", "40%", "54%", "40%", "40%", "44%","60%", "73%", "44%",
"52%", "52%", "60%", "29%", "51%", "59%","44%", "67%", "50%",
"61%", "30%", "67%", "53%", "47%", "20%","14%", "54%", "38%",
"36%", "38%", "44%", "55%", "60%", "28%","31%", "NA%", "13%",
"67%", "48%", "54%", "50%", "25%", "44%","62%", "38%", "38%"),
var3_rate = c("31%", "40%","18%", "33%", "23%", "22%", "40%", "27%", "22%",
"18%", "33%","21%", "6%", "15%", "15%", "24%", "33%", "19%",
"22%", "23%","21%", "17%", "20%", "5%", "11%", "33%", "8%",
"55%", "15%","28%", "9%", "15%", "12%", "6%", "NA%", "8%",
"NA%", "19%","8%", "13%", "25%", "21%", "14%", "10%", "38%"),
var4_rate = c("92%","100%", "86%", "67%", "71%", "88%", "90%", "87%", "78%",
"89%", "74%", "96%", "97%", "80%", "93%", "84%", "100%","88%",
"86%", "91%", "94%", "83%", "73%", "75%", "64%", "76%","69%",
"91%", "62%", "89%", "91%", "70%", "96%", "81%", "33%","97%",
"80%", "63%", "69%", "100%", "100%", "69%", "93%","100%", "81%")),
row.names = c(NA, -45L), class = "data.frame")
and then the code I used that worked:
pct_table <- example_data %>%
mutate(both_var1 = str_glue("{var1_n} ({var1_rate})"))
I am hoping to avoid typing the columns names manually. I am open to other easy ways if they will still play nicely with excel, besides str_glue
.
Perhaps something like this?
Here, I reshape the data long so that the various variable pairs are stacked instead of side by side. Then we can do the glue function once and reshape wide.
library(dplyr); library(tidyr)
example_data %>%
rename("total_n" = n) %>% # to avoid duplicate "n" columns
pivot_longer(-c(1:2), names_sep = "_", names_to = c("Var", ".value")) %>%
mutate(both = stringr::str_glue("{n} ({rate})")) %>%
select(state, total_n, Var, both) |>
pivot_wider(names_from = Var, values_from = both)
# A tibble: 45 × 6
state total_n var1 var2 var3 var4
<chr> <int> <glue> <glue> <glue> <glue>
1 AK 13 4 (31%) 4 (31%) 4 (31%) 12 (92%)
2 AL 5 2 (40%) 2 (40%) 2 (40%) 5 (100%)
3 AR 28 15 (54%) 15 (54%) 5 (18%) 24 (86%)
4 AZ 15 6 (40%) 6 (40%) 5 (33%) 10 (67%)
5 CA 35 14 (40%) 14 (40%) 8 (23%) 25 (71%)
6 CO 32 14 (44%) 14 (44%) 7 (22%) 28 (88%)
7 FL 10 6 (60%) 6 (60%) 4 (40%) 9 (90%)
8 GA 30 22 (73%) 22 (73%) 8 (27%) 26 (87%)
9 HI 9 4 (44%) 4 (44%) 2 (22%) 7 (78%)
10 IA 82 43 (52%) 43 (52%) 15 (18%) 73 (89%)
# ℹ 35 more rows
# ℹ Use `print(n = ...)` to see more rows