All works well until I tried to pass variables to geom_point in ggplot. This is the error message I got:
Error in `geom_point()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 2nd layer.
Caused by error:
! object 'proficiency' not found
Here is my function:
my_function <- function(data, component, variable, group = "all"){
library(dplyr)
#==== data based on group ===
if(group == "all"){
df <- data
} else if(group == "high"){
df <- data[data$level == "high",]
} else if(group == "low"){
df <- data[data$level == "low",]
} else{
stop("unrecognized level, which musg be 'all', 'high', or 'low'")
}
#==== selection based on independent variables ====
if (variable == "proficiency"){
order <- c("below average", "average", "above average")
data_df <- df[df$proficiency != "Saya tidak tahu" , ]
x_axis_lab <- "English proficiency level"
variable <- as.name(variable)
} else if(variable =="experience"){
order <- c("inexperienced", "experienced")
data_df <- df
x_axis_lab <- "Teaching experience"
variable <- as.name("exp")
} else if(variable =="region"){
order <- c("remote", "rural", "urban")
data_df <- df[df$school == "remote" | df$school == "rural" | df$school == "urban", ]
x_axis_lab <- "School region"
variable <- as.name("school")
} else if(variable == "tech_use"){
order <- c("sometimes", "usually", "always")
data_df <- df[df$tech_use != "never" & df$tech_use != "rarely", ]
x_axis_lab <- "The frequency of technology use"
variable <- as.name(variable)
} else {
stop("dependent variable is invalid")
}
#====variable conversion ===
component <- as.name(component)
component2 <- data_df %>% select({{component}})
variable2 <- data_df %>% select({{variable}})
df2 <- data.frame(cbind(component2, variable2))
df3 <- setNames(df2, c("component1", "variable1"))
#====effect size===
effect_size <- as.data.frame(df3 %>% rstatix::kruskal_effsize(component1 ~ variable1))
effectsize <- paste("Effect size = ", round(effect_size$effsize, 3), " (",effect_size$magnitude, ")", sep = "")
effsize <- grid::grobTree(grid::textGrob(effectsize, x=0.1, y=0.945, hjust=-0.2,
gp=grid::gpar(col="#1f1f1f", fontsize=11, family="sans", fontface = "plain")))
library(ggplot2)
ggplot(data_df, aes(x=factor({{variable}}, level = order), y = {{component}}, fill={{variable}})) +
geom_boxplot() +
geom_point(data = aggregate({{component}} ~ {{variable}}, data =data_df, mean),
aes(x = {{variable}}, y = {{component}}), color = "#FFF", size = 1.5) +
scale_fill_brewer(palette="Dark2")+ theme_classic() + theme(legend.position = "none", axis.title.x = element_text(vjust = -1.1)) + labs(x = x_axis_lab) +
ggpubr::stat_kruskal_test(label = "as_detailed_italic", label.y = 5.1, hjust=0.2)+
annotation_custom(effsize)
}
my code to call the function:
my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")
I want to know what I did wrong in geom_point
. When I commented these two lines, the plot was generated well.
Here is my data (to save space, I only included 20 records):
structure(list(ID = c("PST100", "PST101", "PST102", "PST103",
"PST104", "PST105", "PST106", "PST107", "PST108", "PST109", "PST110",
"PST111", "PST112", "PST113", "PST114", "PST115", "PST116", "PST117",
"PST118", "PST119", "PST120"), record_id = c(158L, 160L, 161L,
162L, 163L, 166L, 167L, 169L, 170L, 171L, 172L, 173L, 174L, 175L,
183L, 185L, 186L, 187L, 189L, 190L, 191L), gender = c("female",
"female", "female", "male", "female", "female", "female", "female",
"female", "female", "female", "female", "male", "female", "female",
"female", "female", "male", "female", "female", "female"), age = c(22,
23, 43, 36, 47, 24, 22, 21, 23, 21, 21, 23, 22, 22, 20, 22, 20,
21, 21, 21, 21), school = c("urban", "urban", "urban", "remote",
"urban", "urban", "remote", "remote", "urban", "rural", "urban",
"rural", "urban", "rural", "urban", "rural", "all", "rural",
"rural", "rural", "non_remote"), proficiency = c("average", "average",
"average", "below average", "above average", "average", "average",
"average", "average", "above average", "above average", "average",
"average", "below average", "average", "below average", "above average",
"average", "average", "below average", "average"), tech_use = c("always",
"rarely", "always", "sometimes", "usually", "usually", "always",
"sometimes", "always", "never", "always", "sometimes", "usually",
"sometimes", "usually", "sometimes", "always", "sometimes", "sometimes",
"usually", "usually"), experience = c("level_2", "level_1", "level_6",
"level_6", "level_6", "level_1", "level_2", "level_6", "level_2",
"level_2", "level_2", "level_1", "level_2", "level_1", "level_2",
"level_1", "level_1", "level_3", "level_5", "level_1", "level_2"
), exp = c("inexperienced", "inexperienced", "experienced", "experienced",
"experienced", "inexperienced", "inexperienced", "experienced",
"inexperienced", "inexperienced", "inexperienced", "inexperienced",
"inexperienced", "inexperienced", "inexperienced", "inexperienced",
"inexperienced", "inexperienced", "experienced", "inexperienced",
"inexperienced"), level = c("low", "low", "low", "high", "high",
"low", "high", "low", "high", "low", "high", "low", "high", "low",
"high", "low", "high", "low", "low", "low", "low"), CK1 = c(3L,
4L, 3L, 4L, 4L, 3L, 4L, 4L, 5L, 5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L,
2L, 4L, 3L, 4L), CK2 = c(3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 4L,
5L, 4L, 3L, 5L, 4L, 5L, 4L, 5L, 3L, 4L, 3L, 3L), CK3 = c(4L,
4L, 3L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 4L, 4L, 5L, 4L, 5L,
2L, 3L, 3L, 4L), CK4 = c(4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L, 4L,
4L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 3L, 3L, 2L, 4L), PK6 = c(5L,
4L, 3L, 4L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 5L,
3L, 5L, 2L, 4L), PK7 = c(5L, 4L, 3L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), PK8 = c(5L,
4L, 2L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L,
3L, 4L, 3L, 3L), PK9 = c(4L, 4L, 3L, 5L, 5L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PK10 = c(4L,
4L, 3L, 4L, 5L, 3L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L,
3L, 3L, 3L, 4L), PK11 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L,
4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 3L, 3L, 3L), PK12 = c(4L,
3L, 3L, 5L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
3L, 4L, 4L, 4L), PCK13 = c(4L, 3L, 3L, 5L, 5L, 3L, 5L, 3L, 4L,
3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 4L, 3L), PCK14 = c(4L,
3L, 3L, 5L, 5L, 2L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 4L,
3L, 4L, 3L, 4L), PCK15 = c(3L, 3L, 3L, 5L, 5L, 3L, 4L, 4L, 4L,
5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PCK16 = c(3L,
3L, 3L, 4L, 5L, 3L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L,
3L, 4L, 4L, 3L), TK17 = c(4L, 3L, 3L, 5L, 5L, 4L, 5L, 4L, 5L,
5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 2L, 4L, 4L, 5L), TK19 = c(4L,
3L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L,
2L, 4L, 4L, 5L), TK20 = c(4L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK21 = c(4L,
3L, 4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L,
2L, 4L, 4L, 5L), TK22 = c(4L, 3L, 3L, 4L, 5L, 2L, 5L, 5L, 5L,
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK23 = c(4L,
3L, 3L, 4L, 5L, 2L, 5L, 5L, 4L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L,
2L, 4L, 4L, 5L), TCK25 = c(4L, 4L, 3L, 4L, 5L, 3L, 5L, 4L, 5L,
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 5L), TCK26 = c(4L,
4L, 2L, 4L, 4L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 5L, 4L, 4L, 4L, 4L,
3L, 4L, 3L, 5L), TCK27 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 5L, 5L,
3L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), TCK28 = c(3L,
3L, 3L, 4L, 3L, 3L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 3L, 4L,
3L, 4L, 3L, 3L), TCK29 = c(4L, 3L, 3L, 4L, 5L, 3L, 5L, 5L, 5L,
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L), TCK30 = c(4L,
3L, 3L, 5L, 5L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 4L, 4L, 4L, 4L, 5L,
3L, 4L, 4L, 3L), TCK31 = c(3L, 3L, 2L, 5L, 4L, 3L, 5L, 3L, 5L,
3L, 5L, 4L, 3L, 4L, 4L, 3L, 5L, 2L, 4L, 4L, 3L), TPK32 = c(4L,
3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L,
3L, 4L, 3L, 4L), TPK33 = c(3L, 3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L,
3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 2L, 4L, 3L, 4L), TPK34 = c(4L,
3L, 2L, 5L, 5L, 3L, 5L, 3L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L,
2L, 4L, 3L, 4L), TPK35 = c(4L, 4L, 2L, 5L, 5L, 3L, 5L, 3L, 4L,
4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 3L, 4L, 2L, 4L), TPK36 = c(4L,
4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 5L,
2L, 4L, 2L, 3L), TPK37 = c(3L, 3L, 2L, 4L, 5L, 3L, 5L, 3L, 4L,
4L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 4L), TPK38 = c(4L,
3L, 2L, 4L, 5L, 3L, 5L, 3L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L,
3L, 4L, 3L, 3L), TPCK39 = c(3L, 3L, 2L, 4L, 4L, 3L, 5L, 3L, 4L,
3L, 5L, 4L, 3L, 3L, 4L, 3L, 5L, 3L, 4L, 3L, 3L), TPCK40 = c(4L,
3L, 3L, 4L, 5L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 3L, 4L, 4L, 4L, 4L,
3L, 4L, 3L, 3L), TPCK41 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 4L,
5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 3L, 4L, 3L, 4L), TPCK42 = c(4L,
3L, 3L, 4L, 4L, 3L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 4L,
3L, 4L, 3L, 4L), TPCK43 = c(3L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 5L,
5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L), TPCK44 = c(3L,
3L, 3L, 4L, 4L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L,
2L, 4L, 3L, 4L), TPCK45 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L,
4L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 3L), CK = c(3.5,
4, 3, 4, 4, 2.75, 4, 4, 4.5, 4.75, 4, 3, 4.5, 4, 4.75, 3.75,
4.5, 2.5, 3.5, 2.75, 3.75), PK = c(4.42857142857143, 3.85714285714286,
2.71428571428571, 4.28571428571429, 5, 3.57142857142857, 4.85714285714286,
3.71428571428571, 4.42857142857143, 4, 4, 3.57142857142857, 3.57142857142857,
4, 4.14285714285714, 3.28571428571429, 4.14285714285714, 2.57142857142857,
3.85714285714286, 3, 3.71428571428571), PCK = c(3.5, 3, 3, 4.75,
5, 2.75, 4.25, 3.25, 3.75, 3.75, 4, 3.75, 4, 4.25, 4, 3.25, 4,
2.5, 4, 3.5, 3.5), TK = c(4, 3, 3.33333333333333, 4.5, 5, 3.33333333333333,
5, 4.83333333333333, 4.83333333333333, 5, 5, 3.5, 5, 4, 5, 4,
4, 2.33333333333333, 4, 4, 5), TCK = c(3.57142857142857, 3.42857142857143,
2.57142857142857, 4.28571428571429, 4.28571428571429, 3, 4.85714285714286,
4.28571428571429, 5, 3, 5, 3.57142857142857, 4.57142857142857,
4, 4, 3.57142857142857, 4.28571428571429, 2.42857142857143, 4,
3.42857142857143, 3.71428571428571), TPK = c(3.71428571428571,
3.28571428571429, 2, 4.57142857142857, 5, 3, 5, 3, 4.28571428571429,
3.71428571428571, 5, 4, 4.57142857142857, 4, 4, 3.71428571428571,
4.85714285714286, 2.42857142857143, 4, 2.57142857142857, 3.71428571428571
), TPCK = c(3.42857142857143, 3.42857142857143, 2.42857142857143,
4, 4.42857142857143, 3, 4.85714285714286, 3.57142857142857, 4.14285714285714,
4.57142857142857, 5, 3.85714285714286, 4, 3.71428571428571, 4,
3.57142857142857, 4.28571428571429, 2.71428571428571, 4, 2.85714285714286,
3.42857142857143), TPACK = c(3.76190476190476, 3.42857142857143,
2.66666666666667, 4.33333333333333, 4.69047619047619, 3.0952380952381,
4.76190476190476, 3.80952380952381, 4.45238095238095, 4.07142857142857,
4.64285714285714, 3.64285714285714, 4.30952380952381, 3.97619047619048,
4.23809523809524, 3.5952380952381, 4.30952380952381, 2.5, 3.92857142857143,
3.14285714285714, 3.83333333333333)), row.names = 100:120, class = "data.frame")
I can't reproduce your error. Instead I get an error
Error in model.frame.default(formula = { : object is not a matrix`.
Anyway. The issue is that you can't use curly-curly aka {{
in aggregate
. Instead I would suggest to use dplyr::summarise
. Besides of that I simplified a code a bit and as you are passing your variable names as characters switched to the .data
pro-noun.
library(ggplot2)
library(dplyr)
my_function <- function(data, component, variable, group = "all") {
# ==== data based on group ===
if (group == "all") {
df <- data
} else if (group == "high") {
df <- data[data$level == "high", ]
} else if (group == "low") {
df <- data[data$level == "low", ]
} else {
stop("unrecognized level, which musg be 'all', 'high', or 'low'")
}
# ==== selection based on independent variables ====
if (variable == "proficiency") {
order <- c("below average", "average", "above average")
data_df <- df[df$proficiency != "Saya tidak tahu", ]
x_axis_lab <- "English proficiency level"
} else if (variable == "experience") {
order <- c("inexperienced", "experienced")
data_df <- df
x_axis_lab <- "Teaching experience"
variable <- "exp"
} else if (variable == "region") {
order <- c("remote", "rural", "urban")
data_df <- df[df$school %in% c("remote", "rural", "urban"), ]
x_axis_lab <- "School region"
variable <- "school"
} else if (variable == "tech_use") {
order <- c("sometimes", "usually", "always")
data_df <- df[!df$tech_use %in% c("never", "rarely"), ]
x_axis_lab <- "The frequency of technology use"
} else {
stop("dependent variable is invalid")
}
# ====variable conversion ===
effect_size <- data_df %>%
select(all_of(c(component1 = component, variable1 = variable))) %>%
rstatix::kruskal_effsize(component1 ~ variable1)
effectsize <- paste0(
"Effect size = ",
round(effect_size$effsize, 3), " (", effect_size$magnitude, ")"
)
effsize <- grid::textGrob(effectsize,
x = 0.1, y = 0.945, hjust = -0.2,
gp = grid::gpar(
col = "#1f1f1f", fontsize = 11,
family = "sans", fontface = "plain"
)
)
ggplot(data_df, aes(
x = factor(.data[[variable]], level = order),
y = .data[[component]], fill = .data[[variable]]
)) +
geom_boxplot() +
geom_point(
data = summarise(
data_df,
"{component}" := mean(.data[[component]]),
.by = all_of(variable)
),
aes(x = .data[[variable]], y = .data[[component]]),
color = "#FFF", size = 1.5
) +
scale_fill_brewer(palette = "Dark2") +
theme_classic() +
theme(
legend.position = "none",
axis.title.x = element_text(vjust = 1)
) +
labs(x = x_axis_lab) +
ggpubr::stat_kruskal_test(
label = "as_detailed_italic",
label.y = 5.1, hjust = 0.2
) +
annotation_custom(effsize)
}
my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")