I have to create a table that shows average values of each variable DT06_1-DT06_19 (caution: I only have variables DT06_1-DT06_3, DT06_7, DT06_10-DT06_19. The averages shall be computed by group. I have four groups shown in insuff_suff_res_cat (caution: not every participant shows values for each of the four categories. I dont have rows where the participant shows values for a category). And last, the averages on each DT06-variable shall only be computed where the respective DT01-variable shows the value "1" instead of "0". So together: For each group of the variable "insuff_suff_res_cat", please calculate the average value of each DT06-variable (e.g., for DT06_1, for DT06_2 and so on) only if the respective DT01-variable == "1".
my dataset:
structure(list(ID = c("poHYvL", "34HEg4", "poHYvL", "34HEg4",
"Y0FPVt", "uXaA9j", "nIogJg", "9tAGOE", "Y0FPVt", "uXaA9j",
"nIogJg", "9tAGOE"), insuff_suff_res_cat = c("not_enough_resources",
"not_enough_resources", "not_enough_resources_help", "not_enough_resources_help",
"enough_resources", "enough_resources", "enough_resources", "enough_resources",
"enough_resources_help", "enough_resources_help", "enough_resources_help",
"enough_resources_help"), DT01_1 = structure(c(0, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 0), label = "Printed textbooks:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_2 = structure(c(0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0), label = "Digital textbooks:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_3 = structure(c(0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0), label = "Photocopied materials:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_4 = structure(c(0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0), label = "Academic articles and other articles/blogs:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_5 = structure(c(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0), label = "Lecturer slides/lecturer notes/lecture recordings:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_6 = structure(c(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0), label = "Notes from lectures (your notes or other students’ notes):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_7 = structure(c(0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0), label = "Printed practice test questions or exam practice papers:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_8 = structure(c(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0), label = "Practice test questions or exam practice papers on my VLE or LMS (e g , Moodle, Canvas, Blackboard):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_9 = structure(c(0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0), label = "University VLE or LMS discussion forums:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_10 = structure(c(0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0), label = "Digital study / revision platforms / websites or apps (could include flashcards, test questions, multiple choice questions, content summaries etc):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_11 = structure(c(0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1), label = "Video (e g , Youtube, TED talks or similar platforms):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_12 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Social Media (e g , TikTok):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_13 = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0), label = "Online chat forums:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_14 = structure(c(0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0), label = "Chat-GPT or other AI based chatbots (/or open-AI technologies):Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_15 = structure(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1), label = "Study plan apps/ goal tracking apps:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_16 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Memory booster / distraction blocker apps:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_17 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Software development platforms / Data analysis software:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_18 = structure(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0), label = "Tutoring services:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT01_19 = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), label = "Other:Tool / resource usage", format.spss = "F5.0", labels = c(Unchecked = 0,
Checked = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), DT06_1 = c(NA, NA, NA, NA, "150", NA, NA, NA, "170", NA,
NA, NA), DT06_2 = c(NA, NA, NA, NA, "0", NA, "40", NA, "0", NA,
"40", NA), DT06_3 = c(NA, NA, NA, NA, "0", NA, NA, NA, "0", NA,
NA, NA), DT06_7 = c(NA, NA, NA, NA, "0", "0", NA, NA, "0", "0",
NA, NA), DT06_10 = c(NA, NA, NA, NA, "0", "0", NA, NA, "0", "0",
NA, NA), DT06_11 = c(NA, "0", NA, "0", "0", "0", NA, "60", "0",
"0", NA, "45"), DT06_12 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
DT06_13 = c(NA, NA, NA, NA, "0", NA, NA, NA, "0", NA, NA,
NA), DT06_14 = c(NA, NA, NA, NA, "50", "0", NA, NA, "50",
"0", NA, NA), DT06_15 = c(NA, NA, NA, NA, NA, NA, NA, "21",
NA, NA, NA, "21"), DT06_16 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), DT06_17 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), DT06_18 = c(NA,
NA, NA, NA, NA, NA, "60", NA, NA, NA, "60", NA), DT06_19 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_)), row.names = c(NA,
-12L), class = "data.frame")
the desired outcome (numbers are invented):
I have already tried to apply the summarize command but I really can't find a solution to connect it to my conditions. Any help appreciated.
Using haven
, dplyr
, and tidyr
, here is the workflow to return mean values of insuff_suff_res_cat groups from DT06* columns based on pairwise relationships with DT01* columns:
as.numeric()
so they can be interpreted by R and pivoted togetherlibrary(dplyr)
library(tidyr)
library(haven)
# Create vector of DT06 suffix values e.g. c(1,2,3,7,10,...) for filtering result
DT06_cols <- sub("^DT06_", "", grep("^DT06", names(df), value = TRUE))
result <- df |>
mutate(across(starts_with("DT"), as.numeric)) |>
pivot_longer(cols = starts_with("DT"),
names_to = c(".value", "ID1"),
names_pattern = "(DT\\d+)_(\\d+)") |>
filter(DT01 == 1) |>
summarise(DT06 = round(mean(DT06, na.rm = TRUE), 2),
.by = c(insuff_suff_res_cat, ID1)) |>
pivot_wider(id_cols = ID1,
names_from = insuff_suff_res_cat,
values_from = DT06) |>
complete(ID1 = DT06_cols) |>
arrange(as.integer(ID1)) |>
filter(ID1 %in% DT06_cols) |>
mutate(ID1 = paste0("DT06_", sprintf("%02d", as.integer(ID1))))
data.frame(result)
# ID1 not_enough_resources not_enough_resources_help enough_resources enough_resources_help
# 1 DT06_01 NA NA 150 170
# 2 DT06_02 NA NaN 20 20
# 3 DT06_03 NA NaN 0 0
# 4 DT06_07 NA NA 0 0
# 5 DT06_10 NA NA 0 0
# 6 DT06_11 0 0 20 15
# 7 DT06_12 NA NA NA NA
# 8 DT06_13 NA NA 0 0
# 9 DT06_14 NA NA 25 25
# 10 DT06_15 NA NA 21 21
# 11 DT06_16 NA NA NA NA
# 12 DT06_17 NA NA NA NA
# 13 DT06_18 NA NA 60 60
# 14 DT06_19 NA NA NA NA