This is a follow-up question: Adjusting values in a table based on another table
I have two tables:
myt_repeating <- data.frame(
name = c("a", "a", "a", "b", "b", "c", "c", "c", "d", "e", "e"),
var1 = c(120, 80, 100, 20, 30, 15, 25, 35, 40, 50, 60),
var2 = c(90, 60, 70, 25, 35, 20, 30, 40, 45, 55, 65),
var3 = c(110, 70, 90, 30, 40, 25, 35, 45, 50, 60, 70)
)
rel_table <- data.frame(
name = c("a", "b", "c"),
proportion = c(0.5, 0.4, 0.1)
)
My Question: For var1 and var2
I could not think of any way to do this without a if loop:
redistribute <- function(x, val.cols, name.col, prop, from) {
val.col <- match(val.cols, names(x))
x.names <- x[[name.col]]
prop.names <- prop[[1]]
rows <- match(prop.names, x.names)
from_totals <- colSums(x[x.names == from, val.col, drop=FALSE])
for(i in seq_along(prop.names)) {
group_name <- prop.names[i]
group_prop <- prop[[2]][i]
group_rows <- x.names == group_name
n_rows <- sum(group_rows)
if(group_name == from) {
x[group_rows, val.col] <- x[group_rows, val.col] * group_prop
} else {
add_values <- (from_totals * group_prop) / n_rows
x[group_rows, val.col] <- x[group_rows, val.col] +
matrix(add_values, nrow=n_rows, ncol=length(val.col), byrow=TRUE)
}
}
x
}
result <- redistribute(myt_repeating, val.cols=c("var1", "var2"),
name.col="name", prop=rel_table, from="a")
Is there a more concise way to do this?
Same as previous question, here we just need to consider/divide by number of rows in each group.
library(dplyr)
library(rlang)
distribute_fn_multi <- function(dat, idCol, fromRow, toRows, colVars, props) {
all_targets <- c(fromRow, toRows)
prop_vec <- setNames(props, all_targets)
source_totals <- dat %>%
filter({{ idCol }} == fromRow) %>%
summarise(across({{ colVars }}, sum)) %>%
as.list()
group_counts <- dat %>%
filter({{ idCol }} %in% all_targets) %>%
count({{ idCol }}, name = "n_rows")
dat %>%
left_join(group_counts, by = as_label(enquo(idCol))) %>%
mutate(
across({{ colVars }}, ~ {
current_id <- {{ idCol }}
case_when(
current_id == fromRow ~ source_totals[[cur_column()]] *
prop_vec[as.character(fromRow)] / n_rows,
current_id %in% toRows ~ .x +
(source_totals[[cur_column()]] *
prop_vec[as.character(current_id)] / n_rows),
TRUE ~ .x
)
})
) %>%
select(-n_rows)
}
distribute_fn_multi(
dat = myt_repeating,
idCol = name,
fromRow = "a",
toRows = c("b", "c"),
colVars = c(var1, var2),
props = c(0.5, 0.4, 0.1)
)
# > name var1 var2 var3
# > 1 a 50 36.66667 110
# > 2 a 50 36.66667 70
# > 3 a 50 36.66667 90
# > 4 b 80 69.00000 30
# > 5 b 90 79.00000 40
# > 6 c 25 27.33333 25
# > 7 c 35 37.33333 35
# > 8 c 45 47.33333 45
# > 9 d 40 45.00000 50
# > 10 e 50 55.00000 60
# > 11 e 60 65.00000 70