rggplot2geom-point

Issues aligning geom_point and geom_errorbar in R ggplot2


I am trying to create a plot in R ggplot2 where my data (see below) has multiple outcomes. For each outcome there are 5 possible methods that need to be plotted (although some are NA and should leave a blank space where this is the case). Each has an estimate, LCI, UCI and effect_type (can be OR, cont or RD). Where effect_type is OR I want to plot on the log scale, for cont or RD this needs a different scale. So I want 2 axes. Each method should be represented by a different shape in geom_point and each effect_type will also have a different colour.

I have tried to do this in the below code but having a range of issues. The main one being that the error bars do not align with the geom_points. Also some data (points and CIs) are not being plotted.

The plot and dataframe as well as the code are below.

Dataframe (df_filtered):

,Exposure,Outcome,Method,Model,Effect_estimate,LCI,UCI,p_value,units,effect_type,SD
1,Loneliness,Self harm,Obs,logistic,1.86,1.72,2.02,6.33E-53,yes/no,OR,NA
2,Loneliness,Self harm,Within-family,logistic,1.69,1.13,2.54,1.00E-02,yes/no,OR,NA
3,Loneliness,Self harm,Between-family,logistic,1.56,1.04,2.33,3.00E-02,yes/no,OR,NA
4,Loneliness,Self harm,2SMR,NA,NA,NA,NA,NA,NA,NA,NA
5,Loneliness,Suicide attempt,Obs,logistic,1.89,1.7,2.11,8.76E-31,yes/no,OR,NA
6,Loneliness,Suicide attempt,Within-family,logistic,1.85,1.06,3.24,3.00E-02,yes/no,OR,NA
7,Loneliness,Suicide attempt,Between-family,logistic,1.9,1.1,3.31,2.00E-02,yes/no,OR,NA
8,Loneliness,Suicide attempt,2SMR,IVW,2.42,1.46,3.99,5.57E-04,NA,OR,NA
9,Loneliness,Depression diagnosis,Obs,logistic,2.54,2.44,2.63,0.00E+00,yes/no,OR,NA
10,Loneliness,Depression diagnosis,Within-family,logistic,2.25,1.89,2.67,8.17E-20,yes/no,OR,NA
11,Loneliness,Depression diagnosis,Between-family,logistic,2.66,2.23,3.17,4.64E-28,yes/no,OR,NA
12,Loneliness,Depression diagnosis,2SMR,IVW,2.13,1.42,3.2,2.74E-04,NA,OR,NA
13,Loneliness,Anxiety diagnosis,Obs,logistic,1.9,1.82,1.98,1.73E-180,yes/no,OR,NA
14,Loneliness,Anxiety diagnosis,Within-family,logistic,1.69,1.39,2.06,1.93E-07,yes/no,OR,NA
15,Loneliness,Anxiety diagnosis,Between-family,logistic,2.01,1.65,2.46,5.56E-12,yes/no,OR,NA
16,Loneliness,Anxiety diagnosis,2SMR,IVW,1.91,0.84,4.36,1.20E-01,NA,OR,NA
17,Loneliness,Self harm,1SMR,ivreg,0.31,0.08,0.54,7.51E-03,yes/no,RD,NA
18,Loneliness,Suicide attempt,1SMR,ivreg,0.15,-0.00743,0.31,6.00E-02,yes/no,RD,NA
19,Loneliness,Depression diagnosis,1SMR,ivreg,0.35,0.15,0.55,5.18E-04,yes/no,RD,NA
20,Loneliness,Anxiety diagnosis,1SMR,ivreg,0.13,-0.04,0.29,1.30E-01,yes/no,RD,NA
21,Loneliness,Depression trait,Obs,linear,0.5815217,0.5570652,0.6032609,0.00E+00,score ranges from 0 to 27,cont,3.68
22,Loneliness,Depression trait,Within-family,linear,0.4809783,0.3831522,0.576087,1.39E-22,score ranges from 0 to 27,cont,3.68
23,Loneliness,Depression trait,Between-family,linear,0.6956522,0.6032609,0.7880435,1.20E-48,score ranges from 0 to 27,cont,3.68
24,Loneliness,Depression trait,2SMR,NA,NA,NA,NA,NA,NA,NA,3.68
25,Loneliness,Anxiety trait,Obs,linear,0.4705882,0.4470588,0.4941176,0.00E+00,score ranges from 0 to 21,cont,3.4
26,Loneliness,Anxiety trait,Within-family,linear,0.3735294,0.2764706,0.4676471,2.51E-14,score ranges from 0 to 21,cont,3.4
27,Loneliness,Anxiety trait,Between-family,linear,0.5735294,0.4823529,0.6676471,2.61E-33,score ranges from 0 to 21,cont,3.4
28,Loneliness,Anxiety trait,2SMR,NA,NA,NA,NA,NA,NA,NA,3.4
29,Loneliness,Positive affect,Obs,linear,-0.7027027,-0.7162162,-0.6891892,0.00E+00,rating ranges from 1 to 6,cont,0.74
30,Loneliness,Positive affect,Within-family,linear,-0.5945946,-0.6756757,-0.527027,9.56E-52,rating ranges from 1 to 6,cont,0.74
31,Loneliness,Positive affect,Between-family,linear,-0.7567568,-0.8378378,-0.6891892,5.58E-85,rating ranges from 1 to 6,cont,0.74
32,Loneliness,Positive affect,2SMR,NA,NA,NA,NA,NA,NA,NA,0.74
33,Loneliness,Meaning in Life,Obs,linear,-0.5180723,-0.5421687,-0.4939759,0.00E+00,rating ranges from 1 to 5,cont,0.83
34,Loneliness,Meaning in Life,Within-family,linear,-0.4096386,-0.4939759,-0.313253,2.67E-19,rating ranges from 1 to 5,cont,0.83
35,Loneliness,Meaning in Life,Between-family,linear,-0.5903614,-0.6746988,-0.5060241,4.59E-40,rating ranges from 1 to 5,cont,0.83
36,Loneliness,Meaning in Life,2SMR,NA,NA,NA,NA,NA,NA,NA,0.83
37,Loneliness,Wellbeing spectrum,Obs,NA,NA,NA,NA,NA,NA,NA,NA
38,Loneliness,Wellbeing spectrum,Within-family,NA,NA,NA,NA,NA,NA,NA,NA
39,Loneliness,Wellbeing spectrum,Between-family,NA,NA,NA,NA,NA,NA,NA,NA
40,Loneliness,Wellbeing spectrum,2SMR,IVW,-0.28,-0.32,-0.23,4.55E-33,NA,cont,NA
41,Loneliness,Positive affect,Obs,NA,NA,NA,NA,NA,NA,NA,0.74
42,Loneliness,Positive affect,Within-family,NA,NA,NA,NA,NA,NA,NA,0.74
43,Loneliness,Positive affect,Between-family,NA,NA,NA,NA,NA,NA,NA,0.74
44,Loneliness,Positive affect,2SMR,IVW,-0.472973,-0.6216216,-0.3243243,3.37E-10,NA,cont,0.74
45,Loneliness,Life satisfaction,Obs,NA,NA,NA,NA,NA,NA,NA,NA
46,Loneliness,Life satisfaction,Within-family,NA,NA,NA,NA,NA,NA,NA,NA
47,Loneliness,Life satisfaction,Between-family,NA,NA,NA,NA,NA,NA,NA,NA
48,Loneliness,Life satisfaction,2SMR,IVW,-0.47,-0.69,-0.24,4.28E-05,NA,cont,NA
49,Loneliness,Depression trait,1SMR,ivreg,3.5326087,2.0706522,4.9918478,2.16E-06,score ranges from 0 to 27,cont,3.68
50,Loneliness,Anxiety trait,1SMR,ivreg,2.2970588,1.1411765,3.4529412,9.81E-05,score ranges from 0 to 21,cont,3.4
51,Loneliness,Positive affect,1SMR,ivreg,-2.3783784,-3.2432432,-1.5135135,7.98E-08,rating ranges from 1 to 6,cont,0.74
52,Loneliness,Meaning in Life,1SMR,ivreg,-1.1084337,-2.0963855,-0.1204819,3.00E-02,rating ranges from 1 to 5,cont,0.83


Code:

library(ggplot2)
library(dplyr)
library(gridExtra)
library(cowplot)

df_filtered <- read.csv("data.csv", header=T)

custom_colors <- scale_color_manual(
  values = c("HR" = "#97D9E3", "OR" = "#A59BEE", "RD" = "#FDB633", "cont" = "#F6A4B7")
)

custom_shapes <- c("Obs" = 0, "Between-family" = 1, "Within-family" = 2, "1SMR" = 10, "2SMR" = 11)

# Create the combined plot
p <- ggplot(df_filtered, aes(x = Outcome)) + 
 
  geom_hline(yintercept = 1, color = "#646363", linetype = "dashed") +
  
  # Points for HR and OR
  geom_point(data = df_filtered %>% filter(effect_type %in% c("OR")), 
             aes(y = Effect_estimate, shape = Method, color = effect_type), 
             size = 3, position = position_dodge(width = 0.5)) + 

  # Confidence intervals for HR and OR
  geom_errorbar(data = df_filtered %>% filter(effect_type %in% c("OR")),
                aes(ymin = LCI, ymax = UCI, color = effect_type), 
                width = 0.2, position = position_dodge(width = 0.5), size = 0.5) + 

  # Points for cont and RD on the second y-axis
  geom_point(data = df_filtered %>% filter(effect_type %in% c("cont", "RD")), 
             aes(y = Effect_estimate, shape = Method, color = effect_type), 
             size = 3, position = position_dodge(width = 0.5)) + 

  # Confidence intervals for cont and RD on the second y-axis
  geom_errorbar(data = df_filtered %>% filter(effect_type %in% c("cont", "RD")),
                aes(ymin = LCI, ymax = UCI, color = effect_type), 
                width = 0.2, position = position_dodge(width = 0.5), size = 0.5) + 

  # Customize the theme
  theme_minimal(base_size = 15) +
  theme(panel.background = element_rect(fill = "gray90", color = NA), # Gray background
        panel.grid.major = element_line(color = "white"),            # White major grid lines
        panel.grid.minor = element_line(color = "white", linewidth = 0.5),
        legend.position = "right",
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(y = "Effect Estimate",
       x = "Outcome") +
  
  scale_fill_manual(values = custom_colors) +  # Use custom colors for effect types
  scale_shape_manual(values = custom_shapes) + 

  # Secondary y-axis for cont and RD
  scale_y_continuous(trans = 'log10', 
                     sec.axis = sec_axis(~ ., name = "Secondary Effect Estimate (cont, RD)")) 

# Print the plot
print(p)

Plot currently output: enter image description here


Solution

  • Dodging and other position adjustments work based on the group aesthetic.

    From ?aes_group_order: For most applications the grouping is set implicitly by mapping one or more discrete variables to x, y, colour, fill, alpha, shape, size, and/or linetype.

    So your points geoms are dodged based on Method, but your error bars geoms don't "see" that variable, so they are not dodged.

    You can manually assign using group = in aes(), like below. In your example, each Outcome only has one effect_type, so it is arguably redundant, but it might be prudent to include effect_type in the group mapping, since it is one of the grouping variables for the point layers.

    geom_errorbar(data = df_filtered %>% filter(effect_type %in% c("OR")),
                  aes(ymin = LCI, ymax = UCI, color = effect_type,
                      group = paste(effect_type, Method)), 
                  width = 0.2, position = position_dodge(width = 0.5), size = 0.5) + 
    

    I typically use group = paste(GROUP_VAR1, GROUP_VAR2) or interaction(GROUP_VAR1, GROUP_VAR2)

    enter image description here


    df_filtered <- data.frame(
      stringsAsFactors = FALSE,
                             Exposure = c("Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness","Loneliness","Loneliness",
                                          "Loneliness"),
                              Outcome = c("Self harm","Self harm","Self harm",
                                          "Self harm","Suicide attempt",
                                          "Suicide attempt","Suicide attempt","Suicide attempt",
                                          "Depression diagnosis",
                                          "Depression diagnosis","Depression diagnosis",
                                          "Depression diagnosis","Anxiety diagnosis",
                                          "Anxiety diagnosis","Anxiety diagnosis",
                                          "Anxiety diagnosis","Self harm",
                                          "Suicide attempt","Depression diagnosis",
                                          "Anxiety diagnosis","Depression trait",
                                          "Depression trait","Depression trait","Depression trait",
                                          "Anxiety trait","Anxiety trait",
                                          "Anxiety trait","Anxiety trait",
                                          "Positive affect","Positive affect","Positive affect",
                                          "Positive affect","Meaning in Life",
                                          "Meaning in Life","Meaning in Life",
                                          "Meaning in Life","Wellbeing spectrum",
                                          "Wellbeing spectrum","Wellbeing spectrum",
                                          "Wellbeing spectrum","Positive affect",
                                          "Positive affect","Positive affect",
                                          "Positive affect","Life satisfaction",
                                          "Life satisfaction","Life satisfaction",
                                          "Life satisfaction","Depression trait",
                                          "Anxiety trait","Positive affect","Meaning in Life"),
                               Method = c("Obs","Within-family","Between-family",
                                          "2SMR","Obs","Within-family",
                                          "Between-family","2SMR","Obs","Within-family",
                                          "Between-family","2SMR","Obs",
                                          "Within-family","Between-family","2SMR","1SMR",
                                          "1SMR","1SMR","1SMR","Obs",
                                          "Within-family","Between-family","2SMR","Obs",
                                          "Within-family","Between-family","2SMR","Obs",
                                          "Within-family","Between-family",
                                          "2SMR","Obs","Within-family",
                                          "Between-family","2SMR","Obs","Within-family",
                                          "Between-family","2SMR","Obs","Within-family",
                                          "Between-family","2SMR","Obs",
                                          "Within-family","Between-family","2SMR","1SMR",
                                          "1SMR","1SMR","1SMR"),
                                Model = c("logistic","logistic","logistic",NA,
                                          "logistic","logistic","logistic","IVW",
                                          "logistic","logistic","logistic","IVW",
                                          "logistic","logistic","logistic","IVW",
                                          "ivreg","ivreg","ivreg","ivreg",
                                          "linear","linear","linear",NA,"linear",
                                          "linear","linear",NA,"linear","linear",
                                          "linear",NA,"linear","linear","linear",
                                          NA,NA,NA,NA,"IVW",NA,NA,NA,"IVW",
                                          NA,NA,NA,"IVW","ivreg","ivreg",
                                          "ivreg","ivreg"),
                      Effect_estimate = c(1.86,1.69,1.56,NA,1.89,1.85,1.9,2.42,
                                          2.54,2.25,2.66,2.13,1.9,1.69,2.01,
                                          1.91,0.31,0.15,0.35,0.13,0.5815217,
                                          0.4809783,0.6956522,NA,0.4705882,
                                          0.3735294,0.5735294,NA,-0.7027027,
                                          -0.5945946,-0.7567568,NA,-0.5180723,-0.4096386,
                                          -0.5903614,NA,NA,NA,NA,-0.28,NA,NA,
                                          NA,-0.472973,NA,NA,NA,-0.47,
                                          3.5326087,2.2970588,-2.3783784,-1.1084337),
                                  LCI = c(1.72,1.13,1.04,NA,1.7,1.06,1.1,1.46,
                                          2.44,1.89,2.23,1.42,1.82,1.39,1.65,
                                          0.84,0.08,-0.00743,0.15,-0.04,
                                          0.5570652,0.3831522,0.6032609,NA,0.4470588,
                                          0.2764706,0.4823529,NA,-0.7162162,
                                          -0.6756757,-0.8378378,NA,-0.5421687,
                                          -0.4939759,-0.6746988,NA,NA,NA,NA,-0.32,
                                          NA,NA,NA,-0.6216216,NA,NA,NA,-0.69,
                                          2.0706522,1.1411765,-3.2432432,
                                          -2.0963855),
                                  UCI = c(2.02,2.54,2.33,NA,2.11,3.24,3.31,
                                          3.99,2.63,2.67,3.17,3.2,1.98,2.06,2.46,
                                          4.36,0.54,0.31,0.55,0.29,0.6032609,
                                          0.576087,0.7880435,NA,0.4941176,
                                          0.4676471,0.6676471,NA,-0.6891892,-0.527027,
                                          -0.6891892,NA,-0.4939759,-0.313253,
                                          -0.5060241,NA,NA,NA,NA,-0.23,NA,NA,
                                          NA,-0.3243243,NA,NA,NA,-0.24,
                                          4.9918478,3.4529412,-1.5135135,-0.1204819),
                              p_value = c(6.33e-53,0.01,0.03,NA,8.76e-31,0.03,
                                          0.02,0.000557,0,8.17e-20,4.64e-28,
                                          0.000274,1.73e-180,1.93e-07,5.56e-12,0.12,
                                          0.00751,0.06,0.000518,0.13,0,
                                          1.39e-22,1.2e-48,NA,0,2.51e-14,2.61e-33,NA,
                                          0,9.56e-52,5.58e-85,NA,0,2.67e-19,
                                          4.59e-40,NA,NA,NA,NA,4.55e-33,NA,NA,
                                          NA,3.37e-10,NA,NA,NA,4.28e-05,
                                          2.16e-06,9.81e-05,7.98e-08,0.03),
                                units = c("yes/no","yes/no","yes/no",NA,"yes/no",
                                          "yes/no","yes/no",NA,"yes/no",
                                          "yes/no","yes/no",NA,"yes/no","yes/no",
                                          "yes/no",NA,"yes/no","yes/no","yes/no",
                                          "yes/no","score ranges from 0 to 27",
                                          "score ranges from 0 to 27",
                                          "score ranges from 0 to 27",NA,"score ranges from 0 to 21",
                                          "score ranges from 0 to 21",
                                          "score ranges from 0 to 21",NA,
                                          "rating ranges from 1 to 6","rating ranges from 1 to 6",
                                          "rating ranges from 1 to 6",NA,
                                          "rating ranges from 1 to 5","rating ranges from 1 to 5",
                                          "rating ranges from 1 to 5",NA,NA,NA,
                                          NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
                                          "score ranges from 0 to 27",
                                          "score ranges from 0 to 21","rating ranges from 1 to 6",
                                          "rating ranges from 1 to 5"),
                          effect_type = c("OR","OR","OR",NA,"OR","OR","OR",
                                          "OR","OR","OR","OR","OR","OR","OR",
                                          "OR","OR","RD","RD","RD","RD","cont",
                                          "cont","cont",NA,"cont","cont","cont",
                                          NA,"cont","cont","cont",NA,"cont",
                                          "cont","cont",NA,NA,NA,NA,"cont",NA,
                                          NA,NA,"cont",NA,NA,NA,"cont",
                                          "cont","cont","cont","cont"),
                                   SD = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
                                          NA,NA,NA,NA,NA,NA,NA,NA,NA,3.68,
                                          3.68,3.68,3.68,3.4,3.4,3.4,3.4,
                                          0.74,0.74,0.74,0.74,0.83,0.83,0.83,
                                          0.83,NA,NA,NA,NA,0.74,0.74,0.74,0.74,
                                          NA,NA,NA,NA,3.68,3.4,0.74,0.83)
                   )