rdataframerecodemutate

Assignment in mutate a value from dynamic column name in R


Here is my dataframe:

dt <- data.frame(
  session_id = c("17472631", "17472632", "17472633", "17472635", "17472636", "17472638"),
  Attractiveness_Real = c(-2, -2, 1, -1, 3, 1),
  Calmness_Real = c(-3, 2, 3, 2, 3, -2),
  Generosity_Real = rep(c(1, 3, 2), each = 2L),
  Arrogance_Real = c(-2, 1, -2, -3, 3, 2),
  Ambition_Real = c(-1, 1, -2, 2, 3, 1),
  Charisma_Real = c(-1, -3, -3, 2, 2, 2),
  Intelligence_Real = c(3, 2, 3, 2, 2, 2),
  Joyfulness_Real = c(3, -3, 1, 3, 2, -2),
  Friendliness_Real = c(-3, -1, -3, -2, -1, -3),
  Laziness_Real = c(-1, -3, NA, -2, NA, -3),
  Intelligence_Retreived = c(2, 1, NA, NA, NA, 3),
  Laziness_Retreived = rep(c(-3, NA), c(1L, 5L)),
  Attractiveness_Retreived = c(-2, NA, NA, NA, NA, 3),
  Generosity_Retreived = c(-2, 2, 3, 2, NA, 3),
  Friendliness_Retreived = c(NA, 2, -2, NA, -2, 2),
  Arrogance_Retreived = c(NA, NA, NA, -1, NA, 1),
  Calmness_Retreived = c(-2, NA, -1, NA, 1, 3),
  Charisma_Retreived = c(1, NA, NA, 3, -1, 3),
  Joyfulness_Retreived = c(NA, NA, NA, 3, NA, 3),
  Ambition_Retreived = c(NA, 1, 2, NA, 2, 2),
  RealAttribute_1 = c(
    "Attractiveness", "Charisma", "Friendliness", "Joyfulness", "Joyfulness",
    "Friendliness"
  ),
  RealAttribute_2 = c("Calmness", "Joyfulness", "Generosity", "Charisma", "Laziness", "Arrogance"),
  RealAttribute_3 = c("Generosity", "Attractiveness", "Arrogance", "Calmness", "Charisma", "Charisma"),
  RealAttribute_4 = c("Arrogance", "Generosity", "Charisma", "Friendliness", "Friendliness", "Ambition"),
  RealAttribute_5 = c("Ambition", "Calmness", "Ambition", "Laziness", "Arrogance", "Intelligence"),
  RealAttribute_6 = c("Charisma", "Ambition", "Calmness", "Ambition", "Generosity", "Generosity"),
  RealAttribute_7 = c(
    "Intelligence", "Friendliness", "Intelligence", "Attractiveness",
    "Attractiveness", "Laziness"
  ),
  RealAttribute_8 = c("Joyfulness", "Arrogance", "Joyfulness", "Arrogance", "Ambition", "Attractiveness"),
  RealAttribute_9 = c(
    "Friendliness", "Laziness", "Attractiveness", "Generosity", "Intelligence",
    "Joyfulness"
  ),
  RealAttribute_10 = c("Laziness", "Intelligence", "Laziness", "Intelligence", "Calmness", "Calmness"),
  RetrievedAttribute_1 = c(
    "Intelligence", "Intelligence", "Generosity", "Joyfulness", "Laziness",
    "Intelligence"
  ),
  RetrievedAttribute_2 = c("Laziness", "Joyfulness", "Ambition", "Arrogance", "Intelligence", "Ambition"),
  RetrievedAttribute_3 = c("Attractiveness", "Laziness", "Arrogance", "Charisma", "Calmness", "Generosity"),
  RetrievedAttribute_4 = rep(c("Generosity", "Arrogance", "Friendliness"), 2),
  RetrievedAttribute_5 = c("Friendliness", "Calmness", "Arrogance", NA, "Laziness", "Laziness"),
  RetrievedAttribute_6 = c("Arrogance", "Arrogance", "Calmness", NA, "Arrogance", "Charisma"),
  RetrievedAttribute_7 = c("Calmness", "Calmness", NA, NA, "Intelligence", "Attractiveness"),
  RetrievedAttribute_8 = c("Charisma", "Ambition", NA, NA, "Ambition", "Arrogance"),
  RetrievedAttribute_9 = c("Arrogance", "Friendliness", NA, NA, "Friendliness", "Joyfulness"),
  RetrievedAttribute_10 = c("Friendliness", "Generosity", NA, NA, "Charisma", "Calmness"),
  RankedAttribute_1 = c(
    "Intelligence", "Intelligence", "Arrogance", "Generosity", "Laziness",
    "Intelligence"
  ),
  RankedAttribute_2 = c("Laziness", "Generosity", "Friendliness", "Intelligence", "Charisma", "Calmness"),
  RankedAttribute_3 = c("Charisma", "Laziness", NA, "Gloomy/Joyful", "Intelligence", "Laziness"),
  RankedAttribute_4 = c("Friendliness", "Calmness", NA, "Charisma", "Attractiveness", "Charisma"),
  RankedAttribute_5 = c("Ambition", "Friendliness", NA, NA, "Friendliness", "Attractiveness"),
  RankedAttribute_6 = c("Arrogance", "Attractiveness", NA, NA, NA, "Generosity"),
  RankedAttribute_7 = c(NA, "Arrogance", NA, NA, NA, NA),
  RankedAttribute_8 = rep(NA_character_, 6L),
  RankedAttribute_9 = rep(NA_character_, 6L),
  RankedAttribute_10 = rep(NA_character_, 6L),
  target = c("Andy", "Daniel", "Brandon", "Jack", "Daniel", "Tyler"),
  rating_target = c("8", "5", "5", "10", "10", "11 - <i>extremely like</i>")
)
str(finalDataNumeric)
'data.frame':   6 obs. of  53 variables: 
 session_id              : chr  "17472631" "17472632" "17472633" "17472635" ...
 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 Intelligence_Retreived  : num  2 1 NA NA NA 3
 Laziness_Retreived      : num  -3 NA NA NA NA NA
 Attractiveness_Retreived: num  -2 NA NA NA NA 3
 Generosity_Retreived    : num  -2 2 3 2 NA 3
 Friendliness_Retreived  : num  NA 2 -2 NA -2 2
 Arrogance_Retreived     : num  NA NA NA -1 NA 1
 Calmness_Retreived      : num  -2 NA -1 NA 1 3
 Charisma_Retreived      : num  1 NA NA 3 -1 3
 Joyfulness_Retreived    : num  NA NA NA 3 NA 3
 Ambition_Retreived      : num  NA 1 2 NA 2 2
 RealAttribute_1         : chr  "Attractiveness" "Charisma" "Friendliness" "Joyfulness" ...
 RealAttribute_2         : chr  "Calmness" "Joyfulness" "Generosity" "Charisma" ...
 RealAttribute_3         : chr  "Generosity" "Attractiveness" "Arrogance" "Calmness" ...
 RealAttribute_4         : chr  "Arrogance" "Generosity" "Charisma" "Friendliness" ...
 RealAttribute_5         : chr  "Ambition" "Calmness" "Ambition" "Laziness" ...
 RealAttribute_6         : chr  "Charisma" "Ambition" "Calmness" "Ambition" ...
 RealAttribute_7         : chr  "Intelligence" "Friendliness" "Intelligence" "Attractiveness" ...
 RealAttribute_8         : chr  "Joyfulness" "Arrogance" "Joyfulness" "Arrogance" ...
 RealAttribute_9         : chr  "Friendliness" "Laziness" "Attractiveness" "Generosity" ...
 RealAttribute_10        : chr  "Laziness" "Intelligence" "Laziness" "Intelligence" ...
 RetrievedAttribute_1    : chr  "Intelligence" "Intelligence" "Generosity" "Joyfulness" ...
 RetrievedAttribute_2    : chr  "Laziness" "Joyfulness" "Ambition" "Arrogance" ...
 RetrievedAttribute_3    : chr  "Attractiveness" "Laziness" "Arrogance" "Charisma" ...
 RetrievedAttribute_4    : chr  "Generosity" "Arrogance" "Friendliness" "Generosity" ...
 RetrievedAttribute_5    : chr  "Friendliness" "Calmness" "Arrogance" NA ...
 RetrievedAttribute_6    : chr  "Arrogance" "Arrogance" "Calmness" NA ...
 RetrievedAttribute_7    : chr  "Calmness" "Calmness" NA NA ...
 RetrievedAttribute_8    : chr  "Charisma" "Ambition" NA NA ...
 RetrievedAttribute_9    : chr  "Arrogance" "Friendliness" NA NA ...
 RetrievedAttribute_10   : chr  "Friendliness" "Generosity" NA NA ...
 RankedAttribute_1       : chr  "Intelligence" "Intelligence" "Arrogance" "Generosity" ...
 RankedAttribute_2       : chr  "Laziness" "Generosity" "Friendliness" "Intelligence" ...
 RankedAttribute_3       : chr  "Charisma" "Laziness" NA "Joyfulness" ...
 RankedAttribute_4       : chr  "Friendliness" "Calmness" NA "Charisma" ...
 RankedAttribute_5       : chr  "Ambition" "Friendliness" NA NA ...
 RankedAttribute_6       : chr  "Arrogance" "Attractiveness" NA NA ...
 RankedAttribute_7       : chr  NA "Arrogance" NA NA ...
 RankedAttribute_8       : chr  NA NA NA NA ...
 RankedAttribute_9       : chr  NA NA NA NA ...
 RankedAttribute_10      : chr  NA NA NA NA ...
 target                  : chr  "Andy" "Daniel" "Brandon" "Jack" ...
 rating_target           : chr  "8" "5" "5" "10" ...

I want to recode each of the columns that contain "Attribute*" ("RealAttribute_*", "*RetrievedAttribute_", "RankedAttribute_") so they would be assigned with the value from the column that starts with the string they contain and ends with "_Real".

So for example if "RetrievedAttribute_1" contain "Intelligence" in a certain row, they would get the value under "Intelligence_Real" in that row.

Another example, taking only "RealAttribute_" columns, if the first four rows of the data looks like this:

 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 RealAttribute_1         : chr  "Attractiveness" "Charisma" "Friendliness" "Joyfulness" 
 RealAttribute_2         : chr  "Calmness" "Joyfulness" "Generosity" "Charisma" 
 RealAttribute_3         : chr  "Generosity" "Attractiveness" "Arrogance" "Calmness" 
 RealAttribute_4         : chr  "Arrogance" "Generosity" "Charisma" "Friendliness" ...
 RealAttribute_5         : chr  "Ambition" "Calmness" "Ambition" "Laziness" 
 RealAttribute_6         : chr  "Charisma" "Ambition" "Calmness" "Ambition" 
 RealAttribute_7         : chr  "Intelligence" "Friendliness" "Intelligence" "Attractiveness" 
 RealAttribute_8         : chr  "Joyfulness" "Arrogance" "Joyfulness" "Arrogance" 
 RealAttribute_9         : chr  "Friendliness" "Laziness" "Attractiveness" "Generosity"
 RealAttribute_10        : chr  "Laziness" "Intelligence" "Laziness" "Intelligence" 

The end result should look like this:

 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 RealAttribute_1         : num  -2 -3 -3 3 
 RealAttribute_2         : num  -3 -3 3 2 
 RealAttribute_3         : num  1 -2 -2 2 
 RealAttribute_4         : num  -2 1 -3 -2 
 RealAttribute_5         : num  -1 2 -2 -2 
 RealAttribute_6         : num  -1 1 3 2 
 RealAttribute_7         : num 3 -1 3 -1 
 RealAttribute_8         : num  3 1 1 -3 
 RealAttribute_9         : num  -3 -3 1 3
 RealAttribute_10        : num  -1 2 NA 2

I tryed first:

dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = !!paste0(., "_Real")))

And got:

Error in `call2()`:

! `.fn` must be a string, a symbol, a call, or a function

Run `rlang::last_trace()` to see where the error occurred.

Also tried

dt <- dt %>% mutate_at(vars(contains("Attribute_")), function(){. = !!paste0(., "_Real")})
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = value[paste0(., "_Real")]))
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = get(paste0(., "_Real"))))

All returned errors

And even:

columns_to_recode <- grep("Attribute_", colnames(dt), value = TRUE)
for (col in columns_to_recode) {
  att<-dt[[col]]
  col_name <- paste0(att, "_Real")
  dt[[col]] <- dt[[col_name]]
}

And got

Error in .subset2(x, i, exact = exact) : 
  recursive indexing failed at level 2

Thanks in advance!


Solution

  • The data is quite untidy at the moment- what I mean by that is, Attribute, Ordering, and Ranking should each be their own column, rather than being encoded in column names. Pulling them out into their own columns will make the data easier to use, and ultimately make any analysis you do down the road easier also.

    You have two main data types in the dataset: the objective values ("X_Real", as you said), and then the ordering data ("RankedAttribute_X"). One way of tidying the data would be to convert "X_Real"-type column names into having "RankedAttribute_X"-style column names too, and then doing a pivot_longer with pattern matching. The other way (the one I went with) is to create two separate dataframes for the two different data types, and then join them.

    # first, we create the dataframe with the real observations
    real_df <- dt |> 
      select(session_id, matches("Real$")) |> # session_id and columns which end in Real
      pivot_longer(-session_id, names_to = c("Attribute", ".value"), names_sep = "_") # this uses regex. It's kinda hard to explain, but if you look at the output hopefully it will give you an idea of what is going on. Feel free to ask me any questions you have!
    
    # next, the ranking dataframe
    rank_df <- dt |> 
      select(session_id, matches("^Real|^Retreived|^Ranked")) |> # ^ means the start of a line in regex, so ^Real matches columns which start with Real
      pivot_longer(-session_id, names_to = c("type", ".value", "num"), names_pattern = "(Real|Retreived|Ranked)(.*)_([0-9]+)")
    
    # finally, we join the real observations to the rankings
    left_join(rank_df, real_df, by = join_by(session_id, Attribute))
    

    Output:

    # A tibble: 120 × 5
       session_id type  num   Attribute       Real
       <chr>      <chr> <chr> <chr>          <dbl>
     1 17472631   Real  1     Attractiveness    -2
     2 17472631   Real  2     Calmness          -3
     3 17472631   Real  3     Generosity         1
     4 17472631   Real  4     Arrogance         -2
     5 17472631   Real  5     Ambition          -1
     6 17472631   Real  6     Charisma          -1
     7 17472631   Real  7     Intelligence       3
     8 17472631   Real  8     Joyfulness         3
     9 17472631   Real  9     Friendliness      -3
    10 17472631   Real  10    Laziness          -1
    # ℹ 110 more rows
    

    You could pivot the data wider again, but I'd say it's debatable if this is better or worse:

    # add the line below onto the end, after the join
    pivot_wider(id_cols = c(session_id, type), names_from = num, values_from = Real)