(The last) follow up question: Adjusting values in a table based on another table, Splitting multiple values among multiple values
I have these tables in R:
myt_with_counts <- data.frame(
name = c("a", "a", "a", "b", "b", "c", "c", "c", "d", "e", "e"),
var1 = c(120, 80, 100, 20, 30, 15, 25, 35, 40, 50, 60),
var2 = c(90, 60, 70, 25, 35, 20, 30, 40, 45, 55, 65),
count = c(10, 5, 15, 8, 12, 6, 9, 3, 7, 11, 4)
)
rel_table <- data.frame(
name = c("a", "b", "c"),
proportion = c(0.5, 0.4, 0.1)
)
My Question: I want to redistribute var1 and var2 values from group "a" to other groups based on specified proportions (50% stays, 40% to "b", 10% to "C"), where the amount removed from each "a" row and added to each receiving row is weighted by a count variable. The total sum of each value column remains unchanged after redistribution.
I really struggled to do this:
redistribute <- function(x, val.cols, name.col, count.col, prop, from) {
val.col <- match(val.cols, names(x))
x.names <- x[[name.col]]
prop.names <- prop[[1]]
rows <- match(prop.names, x.names)
from_totals <- colSums(x[x.names == from, val.col, drop=FALSE])
from_counts <- x[x.names == from, count.col]
total_from_counts <- sum(from_counts)
for(i in seq_along(prop.names)) {
group_name <- prop.names[i]
group_prop <- prop[[2]][i]
group_rows <- x.names == group_name
if(group_name == from) {
proportion_to_remove <- 1 - group_prop
amount_to_remove <- from_totals * proportion_to_remove
for(j in seq_along(val.col)) {
removal_per_count <- amount_to_remove[j] / total_from_counts
x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] - from_counts * removal_per_count
}
} else {
group_counts <- x[group_rows, count.col]
total_group_counts <- sum(group_counts)
amount_to_add <- from_totals * group_prop
for(j in seq_along(val.col)) {
addition_per_count <- amount_to_add[j] / total_group_counts
x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] + group_counts * addition_per_count
}
}
}
x
}
result <- redistribute(myt_with_counts, val.cols=c("var1", "var2"), name.col="name", count.col="count", prop=rel_table, from="a")
Again, same concept as the previous question. Only here, instead of dividing by number of rows, we multiply by current count divided by total count.
library(dplyr)
library(rlang)
distribute_fn_weighted <- \(dat, idCol, fromRow, toRows,
colVars, props, countCol) {
all_targets <- c(fromRow, toRows)
prop_vec <- setNames(props, all_targets)
source_totals <- dat %>%
filter({{ idCol }} == fromRow) %>%
summarise(across({{ colVars }}, sum)) %>%
as.list()
group_count_totals <- dat %>%
filter({{ idCol }} %in% all_targets) %>%
group_by({{ idCol }}) %>%
summarise(total_count = sum({{ countCol }}), .groups = "drop")
dat %>%
left_join(group_count_totals, by = as_label(enquo(idCol))) %>%
mutate(
across({{ colVars }}, ~ {
current_id <- {{ idCol }}
current_count <- {{ countCol }}
case_when(
current_id == fromRow ~
.x - (source_totals[[cur_column()]] *
(1 - prop_vec[as.character(fromRow)]) *
current_count / total_count),
current_id %in% toRows ~ .x +
(source_totals[[cur_column()]] *
prop_vec[as.character(current_id)] *
current_count / total_count),
TRUE ~ .x
)
})
) %>%
select(-total_count)
}
distribute_fn_weighted(
dat = myt_with_counts,
idCol = name,
fromRow = "a",
toRows = c("b", "c"),
colVars = c(var1, var2),
props = c(0.5, 0.4, 0.1),
countCol = count
)
#> name var1 var2 count
#> 1 a 70 53.33333 10
#> 2 a 55 41.66667 5
#> 3 a 25 15.00000 15
#> 4 b 68 60.20000 8
#> 5 b 102 87.80000 12
#> 6 c 25 27.33333 6
#> 7 c 40 41.00000 9
#> 8 c 40 43.66667 3
#> 9 d 40 45.00000 7
#> 10 e 50 55.00000 11
#> 11 e 60 65.00000 4
Created on 2025-07-25 with reprex v2.1.1