Here is my dataframe:
dt <- data.frame(
session_id = c("17472631", "17472632", "17472633", "17472635", "17472636", "17472638"),
Attractiveness_Real = c(-2, -2, 1, -1, 3, 1),
Calmness_Real = c(-3, 2, 3, 2, 3, -2),
Generosity_Real = rep(c(1, 3, 2), each = 2L),
Arrogance_Real = c(-2, 1, -2, -3, 3, 2),
Ambition_Real = c(-1, 1, -2, 2, 3, 1),
Charisma_Real = c(-1, -3, -3, 2, 2, 2),
Intelligence_Real = c(3, 2, 3, 2, 2, 2),
Joyfulness_Real = c(3, -3, 1, 3, 2, -2),
Friendliness_Real = c(-3, -1, -3, -2, -1, -3),
Laziness_Real = c(-1, -3, NA, -2, NA, -3),
Intelligence_Retreived = c(2, 1, NA, NA, NA, 3),
Laziness_Retreived = rep(c(-3, NA), c(1L, 5L)),
Attractiveness_Retreived = c(-2, NA, NA, NA, NA, 3),
Generosity_Retreived = c(-2, 2, 3, 2, NA, 3),
Friendliness_Retreived = c(NA, 2, -2, NA, -2, 2),
Arrogance_Retreived = c(NA, NA, NA, -1, NA, 1),
Calmness_Retreived = c(-2, NA, -1, NA, 1, 3),
Charisma_Retreived = c(1, NA, NA, 3, -1, 3),
Joyfulness_Retreived = c(NA, NA, NA, 3, NA, 3),
Ambition_Retreived = c(NA, 1, 2, NA, 2, 2),
RealAttribute_1 = c(
"Attractiveness", "Charisma", "Friendliness", "Joyfulness", "Joyfulness",
"Friendliness"
),
RealAttribute_2 = c("Calmness", "Joyfulness", "Generosity", "Charisma", "Laziness", "Arrogance"),
RealAttribute_3 = c("Generosity", "Attractiveness", "Arrogance", "Calmness", "Charisma", "Charisma"),
RealAttribute_4 = c("Arrogance", "Generosity", "Charisma", "Friendliness", "Friendliness", "Ambition"),
RealAttribute_5 = c("Ambition", "Calmness", "Ambition", "Laziness", "Arrogance", "Intelligence"),
RealAttribute_6 = c("Charisma", "Ambition", "Calmness", "Ambition", "Generosity", "Generosity"),
RealAttribute_7 = c(
"Intelligence", "Friendliness", "Intelligence", "Attractiveness",
"Attractiveness", "Laziness"
),
RealAttribute_8 = c("Joyfulness", "Arrogance", "Joyfulness", "Arrogance", "Ambition", "Attractiveness"),
RealAttribute_9 = c(
"Friendliness", "Laziness", "Attractiveness", "Generosity", "Intelligence",
"Joyfulness"
),
RealAttribute_10 = c("Laziness", "Intelligence", "Laziness", "Intelligence", "Calmness", "Calmness"),
RetrievedAttribute_1 = c(
"Intelligence", "Intelligence", "Generosity", "Joyfulness", "Laziness",
"Intelligence"
),
RetrievedAttribute_2 = c("Laziness", "Joyfulness", "Ambition", "Arrogance", "Intelligence", "Ambition"),
RetrievedAttribute_3 = c("Attractiveness", "Laziness", "Arrogance", "Charisma", "Calmness", "Generosity"),
RetrievedAttribute_4 = rep(c("Generosity", "Arrogance", "Friendliness"), 2),
RetrievedAttribute_5 = c("Friendliness", "Calmness", "Arrogance", NA, "Laziness", "Laziness"),
RetrievedAttribute_6 = c("Arrogance", "Arrogance", "Calmness", NA, "Arrogance", "Charisma"),
RetrievedAttribute_7 = c("Calmness", "Calmness", NA, NA, "Intelligence", "Attractiveness"),
RetrievedAttribute_8 = c("Charisma", "Ambition", NA, NA, "Ambition", "Arrogance"),
RetrievedAttribute_9 = c("Arrogance", "Friendliness", NA, NA, "Friendliness", "Joyfulness"),
RetrievedAttribute_10 = c("Friendliness", "Generosity", NA, NA, "Charisma", "Calmness"),
RankedAttribute_1 = c(
"Intelligence", "Intelligence", "Arrogance", "Generosity", "Laziness",
"Intelligence"
),
RankedAttribute_2 = c("Laziness", "Generosity", "Friendliness", "Intelligence", "Charisma", "Calmness"),
RankedAttribute_3 = c("Charisma", "Laziness", NA, "Gloomy/Joyful", "Intelligence", "Laziness"),
RankedAttribute_4 = c("Friendliness", "Calmness", NA, "Charisma", "Attractiveness", "Charisma"),
RankedAttribute_5 = c("Ambition", "Friendliness", NA, NA, "Friendliness", "Attractiveness"),
RankedAttribute_6 = c("Arrogance", "Attractiveness", NA, NA, NA, "Generosity"),
RankedAttribute_7 = c(NA, "Arrogance", NA, NA, NA, NA),
RankedAttribute_8 = rep(NA_character_, 6L),
RankedAttribute_9 = rep(NA_character_, 6L),
RankedAttribute_10 = rep(NA_character_, 6L),
target = c("Andy", "Daniel", "Brandon", "Jack", "Daniel", "Tyler"),
rating_target = c("8", "5", "5", "10", "10", "11 - <i>extremely like</i>")
)
str(finalDataNumeric)
'data.frame': 6 obs. of 53 variables:
session_id : chr "17472631" "17472632" "17472633" "17472635" ...
Attractiveness_Real : num -2 -2 1 -1 3 1
Calmness_Real : num -3 2 3 2 3 -2
Generosity_Real : num 1 1 3 3 2 2
Arrogance_Real : num -2 1 -2 -3 3 2
Ambition_Real : num -1 1 -2 2 3 1
Charisma_Real : num -1 -3 -3 2 2 2
Intelligence_Real : num 3 2 3 2 2 2
Joyfulness_Real : num 3 -3 1 3 2 -2
Friendliness_Real : num -3 -1 -3 -2 -1 -3
Laziness_Real : num -1 -3 NA -2 NA -3
Intelligence_Retreived : num 2 1 NA NA NA 3
Laziness_Retreived : num -3 NA NA NA NA NA
Attractiveness_Retreived: num -2 NA NA NA NA 3
Generosity_Retreived : num -2 2 3 2 NA 3
Friendliness_Retreived : num NA 2 -2 NA -2 2
Arrogance_Retreived : num NA NA NA -1 NA 1
Calmness_Retreived : num -2 NA -1 NA 1 3
Charisma_Retreived : num 1 NA NA 3 -1 3
Joyfulness_Retreived : num NA NA NA 3 NA 3
Ambition_Retreived : num NA 1 2 NA 2 2
RealAttribute_1 : chr "Attractiveness" "Charisma" "Friendliness" "Joyfulness" ...
RealAttribute_2 : chr "Calmness" "Joyfulness" "Generosity" "Charisma" ...
RealAttribute_3 : chr "Generosity" "Attractiveness" "Arrogance" "Calmness" ...
RealAttribute_4 : chr "Arrogance" "Generosity" "Charisma" "Friendliness" ...
RealAttribute_5 : chr "Ambition" "Calmness" "Ambition" "Laziness" ...
RealAttribute_6 : chr "Charisma" "Ambition" "Calmness" "Ambition" ...
RealAttribute_7 : chr "Intelligence" "Friendliness" "Intelligence" "Attractiveness" ...
RealAttribute_8 : chr "Joyfulness" "Arrogance" "Joyfulness" "Arrogance" ...
RealAttribute_9 : chr "Friendliness" "Laziness" "Attractiveness" "Generosity" ...
RealAttribute_10 : chr "Laziness" "Intelligence" "Laziness" "Intelligence" ...
RetrievedAttribute_1 : chr "Intelligence" "Intelligence" "Generosity" "Joyfulness" ...
RetrievedAttribute_2 : chr "Laziness" "Joyfulness" "Ambition" "Arrogance" ...
RetrievedAttribute_3 : chr "Attractiveness" "Laziness" "Arrogance" "Charisma" ...
RetrievedAttribute_4 : chr "Generosity" "Arrogance" "Friendliness" "Generosity" ...
RetrievedAttribute_5 : chr "Friendliness" "Calmness" "Arrogance" NA ...
RetrievedAttribute_6 : chr "Arrogance" "Arrogance" "Calmness" NA ...
RetrievedAttribute_7 : chr "Calmness" "Calmness" NA NA ...
RetrievedAttribute_8 : chr "Charisma" "Ambition" NA NA ...
RetrievedAttribute_9 : chr "Arrogance" "Friendliness" NA NA ...
RetrievedAttribute_10 : chr "Friendliness" "Generosity" NA NA ...
RankedAttribute_1 : chr "Intelligence" "Intelligence" "Arrogance" "Generosity" ...
RankedAttribute_2 : chr "Laziness" "Generosity" "Friendliness" "Intelligence" ...
RankedAttribute_3 : chr "Charisma" "Laziness" NA "Joyfulness" ...
RankedAttribute_4 : chr "Friendliness" "Calmness" NA "Charisma" ...
RankedAttribute_5 : chr "Ambition" "Friendliness" NA NA ...
RankedAttribute_6 : chr "Arrogance" "Attractiveness" NA NA ...
RankedAttribute_7 : chr NA "Arrogance" NA NA ...
RankedAttribute_8 : chr NA NA NA NA ...
RankedAttribute_9 : chr NA NA NA NA ...
RankedAttribute_10 : chr NA NA NA NA ...
target : chr "Andy" "Daniel" "Brandon" "Jack" ...
rating_target : chr "8" "5" "5" "10" ...
I want to recode each of the columns that contain "Attribute*" ("RealAttribute_*", "*RetrievedAttribute_", "RankedAttribute_") so they would be assigned with the value from the column that starts with the string they contain and ends with "_Real".
So for example if "RetrievedAttribute_1" contain "Intelligence" in a certain row, they would get the value under "Intelligence_Real" in that row.
Another example, taking only "RealAttribute_" columns, if the first four rows of the data looks like this:
Attractiveness_Real : num -2 -2 1 -1 3 1
Calmness_Real : num -3 2 3 2 3 -2
Generosity_Real : num 1 1 3 3 2 2
Arrogance_Real : num -2 1 -2 -3 3 2
Ambition_Real : num -1 1 -2 2 3 1
Charisma_Real : num -1 -3 -3 2 2 2
Intelligence_Real : num 3 2 3 2 2 2
Joyfulness_Real : num 3 -3 1 3 2 -2
Friendliness_Real : num -3 -1 -3 -2 -1 -3
Laziness_Real : num -1 -3 NA -2 NA -3
RealAttribute_1 : chr "Attractiveness" "Charisma" "Friendliness" "Joyfulness"
RealAttribute_2 : chr "Calmness" "Joyfulness" "Generosity" "Charisma"
RealAttribute_3 : chr "Generosity" "Attractiveness" "Arrogance" "Calmness"
RealAttribute_4 : chr "Arrogance" "Generosity" "Charisma" "Friendliness" ...
RealAttribute_5 : chr "Ambition" "Calmness" "Ambition" "Laziness"
RealAttribute_6 : chr "Charisma" "Ambition" "Calmness" "Ambition"
RealAttribute_7 : chr "Intelligence" "Friendliness" "Intelligence" "Attractiveness"
RealAttribute_8 : chr "Joyfulness" "Arrogance" "Joyfulness" "Arrogance"
RealAttribute_9 : chr "Friendliness" "Laziness" "Attractiveness" "Generosity"
RealAttribute_10 : chr "Laziness" "Intelligence" "Laziness" "Intelligence"
The end result should look like this:
Attractiveness_Real : num -2 -2 1 -1 3 1
Calmness_Real : num -3 2 3 2 3 -2
Generosity_Real : num 1 1 3 3 2 2
Arrogance_Real : num -2 1 -2 -3 3 2
Ambition_Real : num -1 1 -2 2 3 1
Charisma_Real : num -1 -3 -3 2 2 2
Intelligence_Real : num 3 2 3 2 2 2
Joyfulness_Real : num 3 -3 1 3 2 -2
Friendliness_Real : num -3 -1 -3 -2 -1 -3
Laziness_Real : num -1 -3 NA -2 NA -3
RealAttribute_1 : num -2 -3 -3 3
RealAttribute_2 : num -3 -3 3 2
RealAttribute_3 : num 1 -2 -2 2
RealAttribute_4 : num -2 1 -3 -2
RealAttribute_5 : num -1 2 -2 -2
RealAttribute_6 : num -1 1 3 2
RealAttribute_7 : num 3 -1 3 -1
RealAttribute_8 : num 3 1 1 -3
RealAttribute_9 : num -3 -3 1 3
RealAttribute_10 : num -1 2 NA 2
I tryed first:
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = !!paste0(., "_Real")))
And got:
Error in `call2()`:
! `.fn` must be a string, a symbol, a call, or a function
Run `rlang::last_trace()` to see where the error occurred.
Also tried
dt <- dt %>% mutate_at(vars(contains("Attribute_")), function(){. = !!paste0(., "_Real")})
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = value[paste0(., "_Real")]))
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = get(paste0(., "_Real"))))
All returned errors
And even:
columns_to_recode <- grep("Attribute_", colnames(dt), value = TRUE)
for (col in columns_to_recode) {
att<-dt[[col]]
col_name <- paste0(att, "_Real")
dt[[col]] <- dt[[col_name]]
}
And got
Error in .subset2(x, i, exact = exact) :
recursive indexing failed at level 2
Thanks in advance!
The data is quite untidy at the moment- what I mean by that is, Attribute, Ordering, and Ranking should each be their own column, rather than being encoded in column names. Pulling them out into their own columns will make the data easier to use, and ultimately make any analysis you do down the road easier also.
You have two main data types in the dataset: the objective values ("X_Real", as you said), and then the ordering data ("RankedAttribute_X"). One way of tidying the data would be to convert "X_Real"-type column names into having "RankedAttribute_X"-style column names too, and then doing a pivot_longer with pattern matching. The other way (the one I went with) is to create two separate dataframes for the two different data types, and then join them.
# first, we create the dataframe with the real observations
real_df <- dt |>
select(session_id, matches("Real$")) |> # session_id and columns which end in Real
pivot_longer(-session_id, names_to = c("Attribute", ".value"), names_sep = "_") # this uses regex. It's kinda hard to explain, but if you look at the output hopefully it will give you an idea of what is going on. Feel free to ask me any questions you have!
# next, the ranking dataframe
rank_df <- dt |>
select(session_id, matches("^Real|^Retreived|^Ranked")) |> # ^ means the start of a line in regex, so ^Real matches columns which start with Real
pivot_longer(-session_id, names_to = c("type", ".value", "num"), names_pattern = "(Real|Retreived|Ranked)(.*)_([0-9]+)")
# finally, we join the real observations to the rankings
left_join(rank_df, real_df, by = join_by(session_id, Attribute))
Output:
# A tibble: 120 × 5
session_id type num Attribute Real
<chr> <chr> <chr> <chr> <dbl>
1 17472631 Real 1 Attractiveness -2
2 17472631 Real 2 Calmness -3
3 17472631 Real 3 Generosity 1
4 17472631 Real 4 Arrogance -2
5 17472631 Real 5 Ambition -1
6 17472631 Real 6 Charisma -1
7 17472631 Real 7 Intelligence 3
8 17472631 Real 8 Joyfulness 3
9 17472631 Real 9 Friendliness -3
10 17472631 Real 10 Laziness -1
# ℹ 110 more rows
You could pivot the data wider again, but I'd say it's debatable if this is better or worse:
# add the line below onto the end, after the join
pivot_wider(id_cols = c(session_id, type), names_from = num, values_from = Real)