I have these 2 tables:
myt <- data.frame(
name = c("a", "b", "c", "d", "e", "f"),
var1 = c(100, 20, 30, 40, 50, 60),
var2 = c(80, 25, 35, 45, 55, 65),
var3 = c(90, 30, 40, 50, 60, 70)
)
rel_table <- data.frame(
name = c("a", "b", "c"),
proportion = c(0.5, 0.4, 0.1)
)
I want to redistribute values from row "a" (myt) by keeping 50% for itself, giving 40% to row "b", and 10% to row "C" across variables var1 and var2 based on proportions defined in a relationship table (rel_table).
I tried seeing if there is some easy way to do this. I could not find one. I also tried to write a function:
redistribute_from_a <- function(df, rel_df) {
a_var1 <- df[df$name == "a", "var1"]
a_var2 <- df[df$name == "a", "var2"]
df[df$name == "a", "var1"] <- a_var1 * rel_df[rel_df$name == "a", "proportion"]
df[df$name == "a", "var2"] <- a_var2 * rel_df[rel_df$name == "a", "proportion"]
for(target in c("b", "c")) {
if(target %in% df$name) {
prop <- rel_df[rel_df$name == target, "proportion"]
df[df$name == target, "var1"] <- df[df$name == target, "var1"] + (a_var1 * prop)
df[df$name == target, "var2"] <- df[df$name == target, "var2"] + (a_var2 * prop)
}
}
return(df)
}
myt_redistributed <- redistribute_from_a(myt, rel_table)
Are there any standard ways to do this in R?
a_var1_orig <- myt$var1[myt$name == "a"]
a_var2_orig <- myt$var2[myt$name == "a"]
myt_redistributed <- myt %>%
mutate(
var1 = case_when(
name == "a" ~ var1 * 0.5,
name == "b" ~ var1 + (a_var1_orig * 0.4),
name == "c" ~ var1 + (a_var1_orig * 0.1),
TRUE ~ var1
),
var2 = case_when(
name == "a" ~ var2 * 0.5,
name == "b" ~ var2 + (a_var2_orig * 0.4),
name == "c" ~ var2 + (a_var2_orig * 0.1),
TRUE ~ var2
)
)
print(myt_redistributed)
library(dplyr)
myt %>%
left_join(rel_table, by = "name") %>%
mutate(
across(c(var1, var2), ~ ifelse(
name == "a",
.x * proportion,
.x + (myt[myt$name == "a",
c("var1", "var2")][[cur_column()]] *
coalesce(proportion, 0))
))
) %>%
select(-proportion)
#> name var1 var2 var3
#> 1 a 50 40 90
#> 2 b 60 57 30
#> 3 c 40 43 40
#> 4 d 40 45 50
#> 5 e 50 55 60
#> 6 f 60 65 70
Created on 2025-07-22 with reprex v2.1.1
Inspired by this comment, I wrote a function to make this a little more versatile;
library(rlang)
library(dplyr)
distribute_fn <- \(dat, idCol, fromRow, toRows, colVars, props) {
all_targets <- c(fromRow, toRows)
prop_vec <- setNames(props, all_targets)
source_values <- dat %>%
filter({{ idCol }} == fromRow) %>%
select({{ colVars }})
dat %>%
mutate(
across({{ colVars }}, ~ {
current_id <- {{ idCol }}
case_when(
current_id == fromRow ~ .x * prop_vec[as.character(fromRow)],
current_id %in% toRows ~ .x + (source_values[[cur_column()]] *
prop_vec[as.character(current_id)]),
TRUE ~ .x)})
)
}
distribute_fn(dat = myt, idCol = name, fromRow = "a",
toRows = c("b", "c"), colVars = c(var1, var2),
props = c(0.5, 0.4, 0.1))
#> name var1 var2 var3
#> 1 a 50 40 90
#> 2 b 60 57 30
#> 3 c 40 43 40
#> 4 d 40 45 50
#> 5 e 50 55 60
#> 6 f 60 65 70
Created on 2025-07-22 with reprex v2.1.1