rggplot2

Why can't I iteratively add ggplot objects to a list?


I've been stuck on this issue for a while. This is a simplified version of a larger code.

lc(df, months=c('May', 'June', 'August'))

This should give me the same plot 3 times. It gives me 3 different plots, 2 of them incorrect. I can't even wrap my head around what is happening. When I simply print(p) inside the loop, it works perfectly. Why doesn't this work? Thanks for any advice.

library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){

  data <- data%>% filter(month %in% c(months))

  myplots <- list()
  for (i in seq_along(months)){

    a <- data[data$month %in% months[i],]
    print(a)
    lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
                    weights = total,
                    data =a)
    LC50 <- lc$dose[1] #strip lc50
    LC90 <- lc$dose[2] #strip lc90
    p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
             geom_point(size=4)+ 
             geom_smooth(method="glm", 
                         method.args=list(family=binomial(link="probit")), 
                         aes(weight=total),colour="blue",se=TRUE, level=0.95) +
             geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
             geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
             geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
             geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1) 
 

myplots[[i]] <- p
  print(myplots[1]) 

    
    
  }
 
}



UPDATE:

Here is the source code for ecotox:LC_probit.

LC_probit <- function(formula, data, p = NULL,
                      weights = NULL,
                      subset = NULL, log_base = NULL,
                      log_x = TRUE,
                      het_sig = NULL,
                      conf_level = NULL,
                      conf_type = NULL,
                      long_output = TRUE) {
  
  model <- do.call("glm", list(formula = formula,
                               family = binomial(link = "probit"),
                               data = data,
                               weights = substitute(weights),
                               subset = substitute(subset)))
  
  # error message for missing weights argument in function call
  if(missing(weights)) {
    warning ("Are you using cbind(response, non-response) method as your y variable, if so do not weight the model. If you are using (response / total) method, model needs the total of test organisms per dose to weight the model properly",
             call. = FALSE)
  }

  if (is.null(p)) {
    p <- seq(1, 99, 1)
    warning("`p`argument has to be supplied otherwise LC values for 1-99 will be displayed", call. = FALSE)
  }
  
  
  
  chi_square <- sum(residuals.glm(model, type = "pearson") ^ 2)
  
  df <- df.residual(model)
  
  pgof <- pchisq(chi_square, df, lower.tail = FALSE)
  summary <- summary(model)
  b0 <- summary$coefficients[1]
  b1 <- summary$coefficients[2]
  intercept_se <- summary$coefficients[3]
  intercept_sig <- summary$coefficients[7]
  slope_se <- summary$coefficients[4]
  slope_sig <- summary$coefficients[8]
  z_value <- summary$coefficients[6]
  n <- df + 2
  est <- qnorm(p / 100)
  m <- (est - b0) / b1
  if (is.null(het_sig)) {
    het_sig <- 0.150
  }
  
  if (pgof < het_sig) {
    het <- chi_square / df
  } else {
    het <- 1
  }
  if (is.null(conf_type)) {
    conf_type <- c("fl")
  } else {
    
    conf_type <- c("dm")
    
  }
  
  if (conf_type == "fl") {
    if (pgof < het_sig) {
      vcova <- vcov(model) * het
    } else {
      vcova <- vcov(model)
    }
    var_b0 <- vcova[1, 1]
    var_b1 <- vcova[2, 2]
    cov_b0_b1 <- vcova[1, 2]
    if (is.null(conf_level)) {
      conf_level <- 0.95
    }
    
    t <- (1 - conf_level)
    
    t_2 <- (t / 2)
    
    if (pgof < het_sig) {
      tdis <- -qt(t_2, df = df)
    } else {
      tdis <- -qnorm(t_2)
    }
    g <- (tdis ^ 2 * var_b1) / (b1 ^ 2)
    cl_part_1 <- (g / (1 - g)) * (m + (cov_b0_b1 / var_b1))
    
    cl_part_2 <- (var_b0 + (2 *  cov_b0_b1 * m) + (m ^ 2 * var_b1) -
                    (g * (var_b0 - cov_b0_b1 ^ 2 / var_b1)))
    
    cl_part_3 <- (tdis / ((1 - g) * abs(b1))) * sqrt(cl_part_2)
    LCL <- (m + (cl_part_1 - cl_part_3))
    UCL <- (m + (cl_part_1 + cl_part_3))
    
  }
  cf <- -cbind(1, m) / b1
  se_1 <- ((cf %*% vcov(model)) * cf) %*% c(1, 1)
  se_2 <- as.numeric(sqrt(se_1))
  var_m <- (1 / (m ^ 2)) * (var_b0 + 2 * m * cov_b0_b1 +
                              m ^ 2 * var_b1)
  
  
  
  if (log_x == TRUE) {
    
    if(is.null(log_base)) {
      log_base <- 10
    }
    dose <- log_base ^ m
    LCL <- log_base ^ LCL
    UCL <- log_base ^ UCL
    se_2 <- log_base ^ se_2
  }
  
  if (log_x == FALSE) {
    dose <- m
    LCL <- LCL
    UCL <- UCL
    se_2 <- se_2
    
  }

  if (long_output == TRUE) {
    table <- tibble(p = p,
                    n = n,
                    dose = dose,
                    LCL = LCL,
                    UCL = UCL,
                    se = se_2,
                    chi_square = chi_square,
                    df = df,
                    pgof_sig = pgof,
                    h = het,
                    slope = b1,
                    slope_se = slope_se,
                    slope_sig = slope_sig,
                    intercept = b0,
                    intercept_se = intercept_se,
                    intercept_sig = intercept_sig,
                    z = z_value,
                    var_m = var_m,
                    covariance = cov_b0_b1)
  }
  
  if (long_output == FALSE) {
    table <- tibble(p = p,
                    n = n,
                    dose = dose,
                    LCL = LCL,
                    UCL = UCL)
  }
  return(table)
  
}

Here is the data for ecotox::lamprey_tox (the data I used in this example which is assigned to df.

structure(list(nominal_dose = c(0, 0.7, 0.7, 0.7, 1.1, 1.1, 1.1, 
1.3, 1.3, 1.3, 1.5, 1.5, 1.5, 1.7, 1.7, 1.7, 2, 2, 2, 0, 1.7, 
1.7, 1.7, 2, 2, 2, 2.3, 2.3, 2.3, 2.5, 2.5, 2.5, 2.7, 2.7, 2.7, 
3, 3, 3, 0, 2.7, 2.7, 3.3, 3.3, 3.7, 3.7, 4.1, 4.1, 4.5, 4.5, 
5, 5, 0, 1.5, 1.5, 2, 2, 2.3, 2.3, 2.6, 2.6, 3, 3, 3.5, 3.5), 
    tank = c("S", "A", "L", "M", "B", "K", "N", "C", "I", "O", 
    "D", "J", "P", "E", "H", "Q", "F", "G", "R", "A", "B", "M", 
    "N", "C", "L", "O", "D", "K", "P", "E", "J", "Q", "F", "I", 
    "R", "G", "H", "S", "M", "A", "L", "B", "K", "C", "J", "D", 
    "I", "E", "H", "F", "G", "m", "A", "L", "B", "K", "C", "J", 
    "D", "I", "E", "H", "F", "G"), month = c("May", "May", "May", 
    "May", "May", "May", "May", "May", "May", "May", "May", "May", 
    "May", "May", "May", "May", "May", "May", "May", "June", 
    "June", "June", "June", "June", "June", "June", "June", "June", 
    "June", "June", "June", "June", "June", "June", "June", "June", 
    "June", "June", "August", "August", "August", "August", "August", 
    "August", "August", "August", "August", "August", "August", 
    "August", "August", "September", "September", "September", 
    "September", "September", "September", "September", "September", 
    "September", "September", "September", "September", "September"
    ), dose = c(0.19, 0.79, 0.8, 0.76, 1.36, 1.22, 1.18, 1.54, 
    1.44, 1.36, 1.69, 1.66, 1.6, 1.92, 1.86, 1.8, 2.2, 2.18, 
    2.1, 0.06, 1.67, 1.69, 1.67, 2.09, 2.1, 2.09, 2.37, 2.38, 
    2.39, 2.55, 2.59, 2.59, 2.68, 2.78, 2.8, 3.07, 3.09, 3.11, 
    0.04, 2.81, 2.78, 3.42, 3.42, 3.87, 3.84, 4.25, 4.26, 4.68, 
    4.69, 5.19, 5.22, 0.05, 1.45, 1.48, 1.99, 2.01, 2.29, 2.32, 
    2.62, 2.65, 3.09, 3.06, 3.59, 3.6), response = c(0, 0, 1, 
    0, 13, 10, 7, 16, 11, 17, 17, 19, 18, 22, 19, 18, 20, 20, 
    20, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 6, 4, 4, 11, 8, 10, 
    11, 9, 0, 0, 0, 2, 2, 2, 6, 9, 4, 10, 5, 10, 10, 0, 0, 0, 
    4, 3, 9, 6, 10, 8, 10, 10, 10, 10), survive = c(20, 20, 19, 
    20, 8, 9, 11, 4, 10, 3, 3, 1, 2, 0, 1, 2, 0, 0, 0, 10, 9, 
    10, 9, 11, 10, 10, 9, 10, 10, 10, 4, 5, 6, 0, 3, 0, 0, 1, 
    10, 10, 10, 8, 8, 8, 4, 1, 6, 0, 5, 0, 0, 0, 10, 10, 6, 7, 
    1, 4, 0, 2, 0, 0, 0, 0), total = c(20, 20, 20, 20, 21, 19, 
    18, 20, 21, 20, 20, 20, 20, 22, 20, 20, 20, 20, 20, 10, 9, 
    10, 9, 11, 10, 10, 10, 10, 10, 10, 10, 9, 10, 11, 11, 10, 
    11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), spec = structure(list(
    cols = list(nominal_dose = structure(list(), class = c("collector_double", 
    "collector")), tank = structure(list(), class = c("collector_character", 
    "collector")), month = structure(list(), class = c("collector_character", 
    "collector")), dose = structure(list(), class = c("collector_double", 
    "collector")), response = structure(list(), class = c("collector_double", 
    "collector")), survive = structure(list(), class = c("collector_double", 
    "collector")), total = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), class = "col_spec"))

Here is an expanded version of what I want to do. But eventually, I'll build it out more to be more flexible. But this shows the problem clearly. I am using Rstudio. I have seeing this in the viewing pane.

library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
  
  data <- data%>% filter(month %in% c(months))
  
  myplots <- list()
  for (i in seq_along(months)){
    
    a <- data[data$month %in% months[i],]
    print(a)
    lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
                    weights = total,
                    data =a)
    LC50 <- lc$dose[1] #strip lc50
    LC90 <- lc$dose[2] #strip lc90
    p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
      geom_point(size=4)+ 
      geom_smooth(method="glm", 
                  method.args=list(family=binomial(link="probit")), 
                  aes(weight=total),colour="blue",se=TRUE, level=0.95) +
      geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
      geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
      geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
      geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1) 
    
    
    myplots[[i]] <- p

    
    
    
  }
  
  return(list(myplots[[1]], myplots[[2]], myplots[[3]]))

  
}


This is what I get, which is not what I want

enter image description here

enter image description here enter image description here

If I print p in the loop, I get the expected results, but it takes away all flexibility of my function.

library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
  
  data <- data%>% filter(month %in% c(months))
  
  myplots <- list()
  for (i in seq_along(months)){
    
    a <- data[data$month %in% months[i],]
    print(a)
    lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
                    weights = total,
                    data =a)
    LC50 <- lc$dose[1] #strip lc50
    LC90 <- lc$dose[2] #strip lc90
    p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
      geom_point(size=4)+ 
      geom_smooth(method="glm", 
                  method.args=list(family=binomial(link="probit")), 
                  aes(weight=total),colour="blue",se=TRUE, level=0.95) +
      geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
      geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
      geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
      geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1) 
    
    
 
print(p)
  }
  


  
}

You can see that now the geom_smooth and the geom_segmentlayers line up as intended. enter image description here enter image description here enter image description here


Solution

  • This is not a complete answer to your question. It is intended to show you how to convert your for loop to an lapply call. This will remove lazy evaluation as the root cause of your problem as the *apply family of functions use forced evaluation, whereas for loops use lazy evaluation.

    As you're using the tidyverse (and since, as I mentioned earlier, I don't have ecotox), I'm using the starwars data frame. Conversion to your use case should be straightforward.

    lapply takes (at least) two arguments. The first is a vector, the second a function. lapply applies the function to each of the values in the vector in turn, passing the value from the vector to the function as its first argument. lapply returns the results of the function calls in a list.

    I select a few species from those represented in the data and plot height against mass my species.

    library(tidyverse)
    
    lc <- function(d, speciesList) {
      lapply(
        speciesList,
        function(x) {
          d %>% 
            filter(species == x) %>% 
            ggplot() +
              geom_point(aes(x = height, y = mass, colour = homeworld)) +
              labs(title = paste0("Species: ", x))
        }
      )
    }
    
    starwars %>% lc(c("Human", "Droid"))
    

    This gives

    [[1]]
    
    [[2]]
    
    Warning messages:
    1: Removed 15 rows containing missing values or values outside the scale range (`geom_point()`). 
    2: Removed 2 rows containing missing values or values outside the scale range (`geom_point()`). 
    

    in the console and two graphs in the Plots pane:

    enter image description here enter image description here

    Of course, facet_wrap(), facet_grid() or group_walk() would be more natural ways of doing this, but my purpose here is not to be natural but to explain how to use lapply.

    Edit

    Following OP's edit to the question, here is some untested (as I don't have the ecotox package and it is not obvious where the LC_probit_2 function comes from) code that converts OP's function to use lapply. I am not convinced that the month, total and response parameters to the lc function are necessary.

    lc <- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
      lapply(
        seq_along(months),
        function(i) {
          a <- data[data$month %in% months[i],]
          lc <- LC_probit_2((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
                            weights = total,
                            data = a)
          LC50 <- lc$dose[1] #strip lc50
          LC90 <- lc$dose[2] #strip lc90
          ggplot(data = a, aes(x = log10(as.numeric(dose)), y = (response/total))) +
            geom_point(size = 4) + 
            geom_smooth(
              method="glm", 
              method.args=list(family = binomial(link = "probit")), 
              aes(weight = total), colour = "blue", se = TRUE, level = 0.95
            ) +
            geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
            geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
            geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
            geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1) 
        }
      )
    }
    

    If you want to adopt a more tidyverse-pure approach, the following may also be an option:

    lc <- function(data, dose='dose', months=c()){
      data %>% 
        filter(month %in% months) %>% 
        group_by(month) %>% 
        group_map(
          function(.x, .y) {
            lc <- LC_probit_2((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
                              weights = total,
                              data = .x)
            LC50 <- lc$dose[1] #strip lc50
            LC90 <- lc$dose[2] #strip lc90
            .x %>% ggplot(aes(x = log10(as.numeric(dose)), y = (response/total))) +
              geom_point(size = 4) + 
              geom_smooth(
                method="glm", 
                method.args=list(family = binomial(link = "probit")), 
                aes(weight = total), colour = "blue", se = TRUE, level = 0.95
              ) +
              geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
              geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
              geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
              geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1) 
          }
        )
    }