rdataframedplyr

Adjusting values in a table based on another table


I have these 2 tables:

    myt <- data.frame(
      name = c("a", "b", "c", "d", "e", "f"),
      var1 = c(100, 20, 30, 40, 50, 60), 
      var2 = c(80, 25, 35, 45, 55, 65),   
      var3 = c(90, 30, 40, 50, 60, 70)    
    )


rel_table <- data.frame(
  name = c("a", "b", "c"),
  proportion = c(0.5, 0.4, 0.1)
)

I want to redistribute values from row "a" (myt) by keeping 50% for itself, giving 40% to row "b", and 10% to row "C" across variables var1 and var2 based on proportions defined in a relationship table (rel_table).

I tried seeing if there is some easy way to do this. I could not find one. I also tried to write a function:

redistribute_from_a <- function(df, rel_df) {
  a_var1 <- df[df$name == "a", "var1"]
  a_var2 <- df[df$name == "a", "var2"]
  
  df[df$name == "a", "var1"] <- a_var1 * rel_df[rel_df$name == "a", "proportion"]
  df[df$name == "a", "var2"] <- a_var2 * rel_df[rel_df$name == "a", "proportion"]
  
  for(target in c("b", "c")) {
    if(target %in% df$name) {
      prop <- rel_df[rel_df$name == target, "proportion"]
      df[df$name == target, "var1"] <- df[df$name == target, "var1"] + (a_var1 * prop)
      df[df$name == target, "var2"] <- df[df$name == target, "var2"] + (a_var2 * prop)
    }
  }
  
  return(df)
}

myt_redistributed <- redistribute_from_a(myt, rel_table)

Are there any standard ways to do this in R?


a_var1_orig <- myt$var1[myt$name == "a"]
a_var2_orig <- myt$var2[myt$name == "a"]

myt_redistributed <- myt %>%
 mutate(
   var1 = case_when(
     name == "a" ~ var1 * 0.5,
     name == "b" ~ var1 + (a_var1_orig * 0.4),
     name == "c" ~ var1 + (a_var1_orig * 0.1),
     TRUE ~ var1
   ),
   var2 = case_when(
     name == "a" ~ var2 * 0.5,
     name == "b" ~ var2 + (a_var2_orig * 0.4),
     name == "c" ~ var2 + (a_var2_orig * 0.1),
     TRUE ~ var2
   )
 )

print(myt_redistributed)

Solution

  • library(dplyr)
    
    myt %>%
      left_join(rel_table, by = "name") %>%
      mutate(
        across(c(var1, var2), ~ ifelse(
          name == "a",
          .x * proportion,  
          .x + (myt[myt$name == "a", 
                    c("var1", "var2")][[cur_column()]] * 
                  coalesce(proportion, 0))  
        ))
      ) %>%
      select(-proportion)
    
    #>   name var1 var2 var3
    #> 1    a   50   40   90
    #> 2    b   60   57   30
    #> 3    c   40   43   40
    #> 4    d   40   45   50
    #> 5    e   50   55   60
    #> 6    f   60   65   70
    

    Created on 2025-07-22 with reprex v2.1.1


    Inspired by this comment, I wrote a function to make this a little more versatile;

    library(rlang)
    library(dplyr)
    
    distribute_fn <- \(dat, idCol, fromRow, toRows, colVars, props) {
      all_targets <- c(fromRow, toRows)
      prop_vec <- setNames(props, all_targets)
      source_values <- dat %>% 
        filter({{ idCol }} == fromRow) %>% 
        select({{ colVars }})
    
      dat %>%
        mutate(
          across({{ colVars }}, ~ {
            current_id <- {{ idCol }}
            case_when(
              current_id == fromRow ~ .x * prop_vec[as.character(fromRow)],
              current_id %in% toRows ~ .x + (source_values[[cur_column()]] * 
                                               prop_vec[as.character(current_id)]),
              TRUE ~ .x)})
        )
    }
    
    distribute_fn(dat = myt, idCol = name, fromRow = "a", 
                  toRows = c("b", "c"), colVars = c(var1, var2), 
                  props = c(0.5, 0.4, 0.1))
    
    #>   name var1 var2 var3
    #> 1    a   50   40   90
    #> 2    b   60   57   30
    #> 3    c   40   43   40
    #> 4    d   40   45   50
    #> 5    e   50   55   60
    #> 6    f   60   65   70
    

    Created on 2025-07-22 with reprex v2.1.1