rfunctionggplot2dplyr

Error in passing a variable to ggplot in an r custom function


All works well until I tried to pass variables to geom_point in ggplot. This is the error message I got:

Error in `geom_point()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 2nd layer.
Caused by error:
! object 'proficiency' not found

Here is my function:

my_function <- function(data, component, variable, group = "all"){
  library(dplyr)
#==== data based on group ===
if(group == "all"){
  df <- data
}  else if(group == "high"){
  df <- data[data$level == "high",]
} else if(group == "low"){
  df <- data[data$level == "low",]
} else{
  stop("unrecognized level, which musg be 'all', 'high', or 'low'")
}
#==== selection based on independent variables ====
if (variable == "proficiency"){
  order <- c("below average", "average", "above average")
  data_df <- df[df$proficiency != "Saya tidak tahu" , ] 
  x_axis_lab <- "English proficiency level"
  variable <- as.name(variable)
} else if(variable =="experience"){
  order <-  c("inexperienced", "experienced")
  data_df <- df
  x_axis_lab <- "Teaching experience"
  variable <- as.name("exp")
} else if(variable =="region"){
  order <- c("remote", "rural", "urban")
  data_df <- df[df$school == "remote" | df$school == "rural" | df$school == "urban", ]
  x_axis_lab <- "School region"
  variable <- as.name("school")
} else if(variable == "tech_use"){
  order <- c("sometimes", "usually", "always")
  data_df <- df[df$tech_use != "never" & df$tech_use != "rarely", ]
  x_axis_lab <- "The frequency of technology use"
  variable <- as.name(variable)
} else {
  stop("dependent variable is invalid")
}
#====variable conversion ===
component <- as.name(component)
component2 <- data_df %>% select({{component}})
variable2 <- data_df %>% select({{variable}})
df2 <- data.frame(cbind(component2, variable2))
df3 <- setNames(df2, c("component1", "variable1"))
#====effect size===
effect_size <- as.data.frame(df3 %>% rstatix::kruskal_effsize(component1 ~ variable1))
effectsize <- paste("Effect size = ", round(effect_size$effsize, 3), " (",effect_size$magnitude, ")", sep = "")
effsize <- grid::grobTree(grid::textGrob(effectsize, x=0.1,  y=0.945, hjust=-0.2,
                                         gp=grid::gpar(col="#1f1f1f", fontsize=11, family="sans", fontface = "plain")))
library(ggplot2)

  ggplot(data_df, aes(x=factor({{variable}}, level = order), y = {{component}}, fill={{variable}})) +
    geom_boxplot() +
    geom_point(data = aggregate({{component}} ~ {{variable}}, data =data_df, mean),
                 aes(x = {{variable}}, y = {{component}}), color = "#FFF", size = 1.5) +
    scale_fill_brewer(palette="Dark2")+ theme_classic() + theme(legend.position = "none", axis.title.x = element_text(vjust = -1.1)) + labs(x = x_axis_lab) +
    ggpubr::stat_kruskal_test(label = "as_detailed_italic", label.y = 5.1, hjust=0.2)+
    annotation_custom(effsize)
}

my code to call the function:

my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")

I want to know what I did wrong in geom_point. When I commented these two lines, the plot was generated well.

Here is my data (to save space, I only included 20 records):

structure(list(ID = c("PST100", "PST101", "PST102", "PST103", 
"PST104", "PST105", "PST106", "PST107", "PST108", "PST109", "PST110", 
"PST111", "PST112", "PST113", "PST114", "PST115", "PST116", "PST117", 
"PST118", "PST119", "PST120"), record_id = c(158L, 160L, 161L, 
162L, 163L, 166L, 167L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 
183L, 185L, 186L, 187L, 189L, 190L, 191L), gender = c("female", 
"female", "female", "male", "female", "female", "female", "female", 
"female", "female", "female", "female", "male", "female", "female", 
"female", "female", "male", "female", "female", "female"), age = c(22, 
23, 43, 36, 47, 24, 22, 21, 23, 21, 21, 23, 22, 22, 20, 22, 20, 
21, 21, 21, 21), school = c("urban", "urban", "urban", "remote", 
"urban", "urban", "remote", "remote", "urban", "rural", "urban", 
"rural", "urban", "rural", "urban", "rural", "all", "rural", 
"rural", "rural", "non_remote"), proficiency = c("average", "average", 
"average", "below average", "above average", "average", "average", 
"average", "average", "above average", "above average", "average", 
"average", "below average", "average", "below average", "above average", 
"average", "average", "below average", "average"), tech_use = c("always", 
"rarely", "always", "sometimes", "usually", "usually", "always", 
"sometimes", "always", "never", "always", "sometimes", "usually", 
"sometimes", "usually", "sometimes", "always", "sometimes", "sometimes", 
"usually", "usually"), experience = c("level_2", "level_1", "level_6", 
"level_6", "level_6", "level_1", "level_2", "level_6", "level_2", 
"level_2", "level_2", "level_1", "level_2", "level_1", "level_2", 
"level_1", "level_1", "level_3", "level_5", "level_1", "level_2"
), exp = c("inexperienced", "inexperienced", "experienced", "experienced", 
"experienced", "inexperienced", "inexperienced", "experienced", 
"inexperienced", "inexperienced", "inexperienced", "inexperienced", 
"inexperienced", "inexperienced", "inexperienced", "inexperienced", 
"inexperienced", "inexperienced", "experienced", "inexperienced", 
"inexperienced"), level = c("low", "low", "low", "high", "high", 
"low", "high", "low", "high", "low", "high", "low", "high", "low", 
"high", "low", "high", "low", "low", "low", "low"), CK1 = c(3L, 
4L, 3L, 4L, 4L, 3L, 4L, 4L, 5L, 5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 
2L, 4L, 3L, 4L), CK2 = c(3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 
5L, 4L, 3L, 5L, 4L, 5L, 4L, 5L, 3L, 4L, 3L, 3L), CK3 = c(4L, 
4L, 3L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 4L, 4L, 5L, 4L, 5L, 
2L, 3L, 3L, 4L), CK4 = c(4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 3L, 3L, 2L, 4L), PK6 = c(5L, 
4L, 3L, 4L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 5L, 
3L, 5L, 2L, 4L), PK7 = c(5L, 4L, 3L, 4L, 5L, 4L, 5L, 4L, 5L, 
4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), PK8 = c(5L, 
4L, 2L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 
3L, 4L, 3L, 3L), PK9 = c(4L, 4L, 3L, 5L, 5L, 4L, 5L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PK10 = c(4L, 
4L, 3L, 4L, 5L, 3L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 
3L, 3L, 3L, 4L), PK11 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 3L, 3L, 3L), PK12 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
3L, 4L, 4L, 4L), PCK13 = c(4L, 3L, 3L, 5L, 5L, 3L, 5L, 3L, 4L, 
3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 4L, 3L), PCK14 = c(4L, 
3L, 3L, 5L, 5L, 2L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 4L, 
3L, 4L, 3L, 4L), PCK15 = c(3L, 3L, 3L, 5L, 5L, 3L, 4L, 4L, 4L, 
5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PCK16 = c(3L, 
3L, 3L, 4L, 5L, 3L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 
3L, 4L, 4L, 3L), TK17 = c(4L, 3L, 3L, 5L, 5L, 4L, 5L, 4L, 5L, 
5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 2L, 4L, 4L, 5L), TK19 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TK20 = c(4L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK21 = c(4L, 
3L, 4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TK22 = c(4L, 3L, 3L, 4L, 5L, 2L, 5L, 5L, 5L, 
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK23 = c(4L, 
3L, 3L, 4L, 5L, 2L, 5L, 5L, 4L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TCK25 = c(4L, 4L, 3L, 4L, 5L, 3L, 5L, 4L, 5L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 5L), TCK26 = c(4L, 
4L, 2L, 4L, 4L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 
3L, 4L, 3L, 5L), TCK27 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 5L, 5L, 
3L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), TCK28 = c(3L, 
3L, 3L, 4L, 3L, 3L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 
3L, 4L, 3L, 3L), TCK29 = c(4L, 3L, 3L, 4L, 5L, 3L, 5L, 5L, 5L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L), TCK30 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 4L, 4L, 4L, 4L, 5L, 
3L, 4L, 4L, 3L), TCK31 = c(3L, 3L, 2L, 5L, 4L, 3L, 5L, 3L, 5L, 
3L, 5L, 4L, 3L, 4L, 4L, 3L, 5L, 2L, 4L, 4L, 3L), TPK32 = c(4L, 
3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
3L, 4L, 3L, 4L), TPK33 = c(3L, 3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 2L, 4L, 3L, 4L), TPK34 = c(4L, 
3L, 2L, 5L, 5L, 3L, 5L, 3L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
2L, 4L, 3L, 4L), TPK35 = c(4L, 4L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 3L, 4L, 2L, 4L), TPK36 = c(4L, 
4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 
2L, 4L, 2L, 3L), TPK37 = c(3L, 3L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
4L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 4L), TPK38 = c(4L, 
3L, 2L, 4L, 5L, 3L, 5L, 3L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 
3L, 4L, 3L, 3L), TPCK39 = c(3L, 3L, 2L, 4L, 4L, 3L, 5L, 3L, 4L, 
3L, 5L, 4L, 3L, 3L, 4L, 3L, 5L, 3L, 4L, 3L, 3L), TPCK40 = c(4L, 
3L, 3L, 4L, 5L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 3L, 4L, 4L, 4L, 4L, 
3L, 4L, 3L, 3L), TPCK41 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 4L, 
5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 3L, 4L, 3L, 4L), TPCK42 = c(4L, 
3L, 3L, 4L, 4L, 3L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 
3L, 4L, 3L, 4L), TPCK43 = c(3L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 5L, 
5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L), TPCK44 = c(3L, 
3L, 3L, 4L, 4L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 
2L, 4L, 3L, 4L), TPCK45 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 
4L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 3L), CK = c(3.5, 
4, 3, 4, 4, 2.75, 4, 4, 4.5, 4.75, 4, 3, 4.5, 4, 4.75, 3.75, 
4.5, 2.5, 3.5, 2.75, 3.75), PK = c(4.42857142857143, 3.85714285714286, 
2.71428571428571, 4.28571428571429, 5, 3.57142857142857, 4.85714285714286, 
3.71428571428571, 4.42857142857143, 4, 4, 3.57142857142857, 3.57142857142857, 
4, 4.14285714285714, 3.28571428571429, 4.14285714285714, 2.57142857142857, 
3.85714285714286, 3, 3.71428571428571), PCK = c(3.5, 3, 3, 4.75, 
5, 2.75, 4.25, 3.25, 3.75, 3.75, 4, 3.75, 4, 4.25, 4, 3.25, 4, 
2.5, 4, 3.5, 3.5), TK = c(4, 3, 3.33333333333333, 4.5, 5, 3.33333333333333, 
5, 4.83333333333333, 4.83333333333333, 5, 5, 3.5, 5, 4, 5, 4, 
4, 2.33333333333333, 4, 4, 5), TCK = c(3.57142857142857, 3.42857142857143, 
2.57142857142857, 4.28571428571429, 4.28571428571429, 3, 4.85714285714286, 
4.28571428571429, 5, 3, 5, 3.57142857142857, 4.57142857142857, 
4, 4, 3.57142857142857, 4.28571428571429, 2.42857142857143, 4, 
3.42857142857143, 3.71428571428571), TPK = c(3.71428571428571, 
3.28571428571429, 2, 4.57142857142857, 5, 3, 5, 3, 4.28571428571429, 
3.71428571428571, 5, 4, 4.57142857142857, 4, 4, 3.71428571428571, 
4.85714285714286, 2.42857142857143, 4, 2.57142857142857, 3.71428571428571
), TPCK = c(3.42857142857143, 3.42857142857143, 2.42857142857143, 
4, 4.42857142857143, 3, 4.85714285714286, 3.57142857142857, 4.14285714285714, 
4.57142857142857, 5, 3.85714285714286, 4, 3.71428571428571, 4, 
3.57142857142857, 4.28571428571429, 2.71428571428571, 4, 2.85714285714286, 
3.42857142857143), TPACK = c(3.76190476190476, 3.42857142857143, 
2.66666666666667, 4.33333333333333, 4.69047619047619, 3.0952380952381, 
4.76190476190476, 3.80952380952381, 4.45238095238095, 4.07142857142857, 
4.64285714285714, 3.64285714285714, 4.30952380952381, 3.97619047619048, 
4.23809523809524, 3.5952380952381, 4.30952380952381, 2.5, 3.92857142857143, 
3.14285714285714, 3.83333333333333)), row.names = 100:120, class = "data.frame")


Solution

  • I can't reproduce your error. Instead I get an error

    Error in model.frame.default(formula = { : object is not a matrix`.

    Anyway. The issue is that you can't use curly-curly aka {{ in aggregate. Instead I would suggest to use dplyr::summarise. Besides of that I simplified a code a bit and as you are passing your variable names as characters switched to the .data pro-noun.

    library(ggplot2)
    library(dplyr)
    
    my_function <- function(data, component, variable, group = "all") {
      # ==== data based on group ===
      if (group == "all") {
        df <- data
      } else if (group == "high") {
        df <- data[data$level == "high", ]
      } else if (group == "low") {
        df <- data[data$level == "low", ]
      } else {
        stop("unrecognized level, which musg be 'all', 'high', or 'low'")
      }
      # ==== selection based on independent variables ====
      if (variable == "proficiency") {
        order <- c("below average", "average", "above average")
        data_df <- df[df$proficiency != "Saya tidak tahu", ]
        x_axis_lab <- "English proficiency level"
      } else if (variable == "experience") {
        order <- c("inexperienced", "experienced")
        data_df <- df
        x_axis_lab <- "Teaching experience"
        variable <- "exp"
      } else if (variable == "region") {
        order <- c("remote", "rural", "urban")
        data_df <- df[df$school %in% c("remote", "rural", "urban"), ]
        x_axis_lab <- "School region"
        variable <- "school"
      } else if (variable == "tech_use") {
        order <- c("sometimes", "usually", "always")
        data_df <- df[!df$tech_use %in% c("never", "rarely"), ]
        x_axis_lab <- "The frequency of technology use"
      } else {
        stop("dependent variable is invalid")
      }
      
      # ====variable conversion ===
      effect_size <- data_df %>%
        select(all_of(c(component1 = component, variable1 = variable))) %>%
        rstatix::kruskal_effsize(component1 ~ variable1)
      
      effectsize <- paste0(
        "Effect size = ",
        round(effect_size$effsize, 3), " (", effect_size$magnitude, ")"
      )
      effsize <- grid::textGrob(effectsize,
        x = 0.1, y = 0.945, hjust = -0.2,
        gp = grid::gpar(
          col = "#1f1f1f", fontsize = 11,
          family = "sans", fontface = "plain"
        )
      )
    
      ggplot(data_df, aes(
        x = factor(.data[[variable]], level = order),
        y = .data[[component]], fill = .data[[variable]]
      )) +
        geom_boxplot() +
        geom_point(
          data = summarise(
            data_df,
            "{component}" := mean(.data[[component]]),
            .by = all_of(variable)
          ),
          aes(x = .data[[variable]], y = .data[[component]]),
          color = "#FFF", size = 1.5
        ) +
        scale_fill_brewer(palette = "Dark2") +
        theme_classic() +
        theme(
          legend.position = "none",
          axis.title.x = element_text(vjust = 1)
        ) +
        labs(x = x_axis_lab) +
        ggpubr::stat_kruskal_test(
          label = "as_detailed_italic",
          label.y = 5.1, hjust = 0.2
        ) +
        annotation_custom(effsize)
    }
    
    my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")
    

    enter image description here