I have tried the following to turn my DATA
into my Desired_output
(reproducible code below). But as you can see, I get a bunch of NA
in my output.
Is there a way to obtain my Desired_output
?
Tried:
library(tidyverse)
DATA %>%
mutate(
category = case_when(
str_detect(Discription, "CTE") ~ "CTE",
str_detect(Discription, "AP/IB") ~ "AP_IB",
TRUE ~ "Other"
),
group = case_when(
str_detect(Discription, "ELs") ~ "ELs",
str_detect(Discription, "Former ELs") ~ "Former ELs",
str_detect(Discription, "Monitor ELs") ~ "Monitor ELs",
str_detect(Discription, "Never ELs") ~ "Never ELs", # Explicitly include Never ELs
TRUE ~ "Other"
),
value_type = case_when(
str_detect(Discription, "^Total") ~ "Total", # Identify rows with Total values
str_detect(Discription, "Percent") ~ "Percent", # Identify rows with Percent values
TRUE ~ "Value" # Identify rows with regular values
)
) %>%
# Separate the Total rows and use them in the "Total" column for each group
filter(value_type != "Percent" & value_type != "Other") %>%
pivot_wider(
names_from = value_type,
values_from = value,
values_fn = list(value = sum),
values_fill = list(value = NA)
) %>%
# Pivot the Total values separately
mutate(
Total = case_when(
str_detect(Discription, "Total") ~ Value, # Take "Total" values for each group
TRUE ~ NA_real_
)
) %>%
# Now calculate Percent based on (Value / Total) * 100 for each group
group_by(group) %>%
mutate(
Percent = ifelse(!is.na(Value) & !is.na(Total), (Value / Total) * 100, NA)
) %>%
ungroup() %>%
# Remove rows where Description contains 'Percent' because we already have a Percent column
filter(!str_detect(Discription, "Percent")) %>%
# Select the columns for the final output
select(ResdDistInstID, InstNm, category, group, Discription, Value, Total, Percent) %>%
arrange(InstNm, category, group)
DATA <- structure(list(
ResdDistInstID = c(1894, 1894, 1894, 1894, 1894,
1894, 1894, 1894, 1894, 1894, 1894, 1894, 1894, 1894, 1894, 1894,
1894, 1894, 1894, 1894),
InstNm = c("Baker SD 5J", "Baker SD 5J",
"Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J",
"Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J",
"Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J",
"Baker SD 5J", "Baker SD 5J", "Baker SD 5J"),
Discription = c("ELs in an AP/IB Class",
"ELs CTE Class", "Total ELs", "Percent ELs in AP/IB Class", "Percent ELs in CTE Class",
"Never ELs in an AP/IB Class", "Never ELs in a CTE Class", "Total Never ELs",
"Percent Never ELs in AP/IB", "Percent Never ELs in CTE", "Former ELs CTE Class",
"Former ELs in an AP/IB Class", "Total Former ELs", "Percent Former ELs in AP/IB",
"Percent Former ELs in CTE", "Monitor ELs CTE Class", "Monitor ELs in an AP/IB Class",
"Total Monitor ELs", "Percent Monitor ELs in AP/IB", "Percent Monitor ELs in CTE"
),
value = c(1, 6, 83, 1.2, 7.2, 95, 329, 4845, 1.9, 6.7, 12,
5, 129, 3.8, 9.3, 2, 0, 29, 0, 6.8)),
row.names = c(NA, -20L),
class = "data.frame"
)
Desired_Output <- structure(list(
ResdDistInstID = c(1894, 1894, 1894, 1894, 1894, 1894, 1894, 1894),
InstNm = c("Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J",
"Baker SD 5J", "Baker SD 5J", "Baker SD 5J", "Baker SD 5J"),
category = c("AP_IB", "CTE", "AP_IB", "CTE", "AP_IB", "CTE", "AP_IB", "CTE"),
group = c("ELs", "ELs", "Former ELs", "Former ELs", "Never ELs", "Never ELs",
"Monitor ELs", "Monitor ELs"),
Discription = c("ELs in an AP/IB Class", "ELs CTE Class", "Former ELs in an AP/IB Class",
"Former ELs CTE Class", "Never ELs in an AP/IB Class", "Never ELs in a CTE Class",
"Monitor ELs in an AP/IB Class", "Monitor ELs CTE Class"),
Value = c(1, 6, 5, 12, 95, 329, 2, 0),
Total = c(83, 83, 129, 129, 4845, 4845, 29, 29),
Percent = c(1.2, 7.2, 3.9, 9.3, 1.9, 6.8, 6.9, 0)
), row.names = c(NA, -8L), class = "data.frame")
This should do what you want. I used
Gregor Thomas' cool str_extract(Discription, pattern = "((Former )|(Monitor )|(Never ))?ELs"))
and some left_joins
.
DO <- DATA %>%
mutate(
category = if_else(grepl("AP/IB", Discription), "AP_IB", "CTE"),
group = str_extract(Discription, pattern = "((Former )|(Monitor )|(Never ))?ELs")
) %>%
# Get totals
left_join(DATA %>% filter(grepl("Total", Discription)) %>%
mutate(
group = gsub("Total ", "", Discription),
Total = value
) %>% select(group, Total), by = "group") %>%
# Get percentages
left_join(DATA %>% filter(grepl("Percent", Discription)) %>%
mutate(
group = str_extract(Discription, pattern = "((Former )|(Monitor )|(Never ))?ELs"),
category = if_else(grepl("AP/IB", Discription), "AP_IB", "CTE")
) %>% select(group, category, Percent = value), by = c("group", "category")
) %>%
filter(!grepl("Total|Percent", Discription)) %>%
select(ResdDistInstID,InstNm, category, group,Discription,Value = value,Total,Percent) %>% arrange(InstNm, category, group)