rfor-loopggplot2plotgeom-segment

geom_segment does not print corresponding to i'th value in plots generated by for loop


I am using a for loop to produce plots based on the same code but with different values.

This is my current plot: enter image description here

As you can see, the horizontal geom_segment()-lines are printing on the exact same y-coordiates. There are two geom_segment()'s per plot, one for time = 60 and one for time = 120.

First, I tried something like:

for(i in c("1", "2", "3")){

  predplot <- ggplot(filter(df_new, WHO == i), 
                     aes(x = ki67, y = pred, color = time, fill = time)) +

    scale_x_continuous(name = "",
                       breaks = seq(0, 50, 10)) +
    
    geom_segment(aes(x = 0, xend = 40,
                     y = df_margrisk$cuminc[df_margrisk$WHO == i & df_margrisk$time == "60"],
                     yend = df_margrisk$cuminc[df_margrisk$WHO == i & df_margrisk$time == "60"]),
                 color = "red") +
    
    geom_segment(aes(x = 0, xend = 40,
                     y = df_margrisk$cuminc[df_margrisk$WHO == i & df_margrisk$time == "120"],
                     yend = df_margrisk$cuminc[df_margrisk$WHO == i & df_margrisk$time == "120"]),
                 color = "red") +
  
    theme_classic()

  
  assign(paste0("who", i, "pred"), predplot)
  
}

Assemble plots

library(patchwork)
who1pred  | who2pred | who3pred 

Then I desperately tried (which produced the same plot with same error):

geom_segment(aes(x = 0, xend = 40,
                 y = if (i == "1") df_margrisk$cuminc[df_margrisk$WHO == "1" & df_margrisk$time == "60"] else 
                   if (i == "2") df_margrisk$cuminc[df_margrisk$WHO == "2" & df_margrisk$time == "60"] else
                     df_margrisk$cuminc[df_margrisk$WHO == "3" & df_margrisk$time == "60"],
                 yend = if (i == "1") df_margrisk$cuminc[df_margrisk$WHO == "1" & df_margrisk$time == "60"] else 
                   if (i == "2") df_margrisk$cuminc[df_margrisk$WHO == "2" & df_margrisk$time == "60"] else
                     df_margrisk$cuminc[df_margrisk$WHO == "3" & df_margrisk$time == "60"]), 
             color = "grey70",
             size = .1) +
  
  geom_segment(aes(x = 0, xend = 40,
                   y = if (i == "1") df_margrisk$cuminc[df_margrisk$WHO == "1" & df_margrisk$time == "120"] else 
                     if (i == "2") df_margrisk$cuminc[df_margrisk$WHO == "2" & df_margrisk$time == "120"] else
                       df_margrisk$cuminc[df_margrisk$WHO == "3" & df_margrisk$time == "120"],
                   yend = if (i == "1") df_margrisk$cuminc[df_margrisk$WHO == "1" & df_margrisk$time == "120"] else 
                     if (i == "2") df_margrisk$cuminc[df_margrisk$WHO == "2" & df_margrisk$time == "120"] else
                       df_margrisk$cuminc[df_margrisk$WHO == "3" & df_margrisk$time == "120"]), 
               color = "red",
               size = .1) 

Data

    df_new <- structure(list(WHO = c("1", "3", "3", "1", "2", "3", "3", "2", 
"1", "1", "3", "3", "1", "2", "2", "3", "2", "3", "3", "3"), 
    ki67 = c(74, 43, 33, 40, 25, 47, 5, 49, 3, 78, 96, 66, 77, 
    45, 84, 61, 99, 19, 75, 22), pred = c(8.18837741638696e-08, 
    0.656014788470526, 0.467672725799402, 0.0495531139823135, 
    0.676232957612799, 0.555496874842657, 0.452128214447235, 
    0.920513064923983, 0.0592022139774029, 0.507544894144434, 
    0.942705398173106, 0.739445754449513, 0.510905666268942, 
    0.680952044793548, 0.685177333492073, 0.765579700267525, 
    0.35021374381192, 0.61272020356918, 0.854446676200307, 0.442974514059335
    ), time = c("60", "120", "60", "60", "120", "60", "60", "120", 
    "60", "120", "60", "60", "120", "60", "120", "120", "120", 
    "120", "120", "60")), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

And

df_margrisk <- structure(list(time = c("60", "120", "60", "120", "60", "120"
), WHO = c("1", "1", "2", "2", "3", "3"), cuminc = c(0.0780868867206532, 
0.142831926593544, 0.25131050422863, 0.357325139768945, 0.550010238368203, 
0.682482624335479)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-6L))

Solution

  • Note that aes() values are lazily evaluated. You pretty much never want to use variables in there or $ without expanding the values. By default data ins inherited from the main layer. You want to set the data= values separately for each of your segments. Try

    library(ggplot2)
    library(dplyr)
    for(i in c("1", "2", "3")){
      
      predplot <- ggplot(filter(df_new, WHO == i), 
                         aes(x = ki67, y = pred, color = time, fill = time)) +
        
        scale_x_continuous(name = "",
                           breaks = seq(0, 50, 10)) +
        
        geom_segment(aes(x = 0, xend = 40,
                         y = cuminc,
                         yend = cuminc),
                     data = df_margrisk[df_margrisk$WHO == i & df_margrisk$time == "60", ],
                     color = "red") +
        
        geom_segment(aes(x = 0, xend = 40,
                         y = cuminc,
                         yend = cuminc),
                     data = df_margrisk[df_margrisk$WHO == i & df_margrisk$time == "120", ],
                     color = "red") +
        
        theme_classic()
      
      
      assign(paste0("who", i, "pred"), predplot)
      
    }