rdataframeggplot2

Combine scatter, boxplot and linear regression line on one chart ggplot R


I want to place a Boxplot, scatter plot and linear regression line for the scatter points onto one chart using GGplot. I am able to get 2 of the three onto one chart but have trouble combing regression with a boxplot.

A sample of my data below

df <- structure(list(Sample = c(2113, 2113, 2114, 2114, 2115, 2115, 
2116, 2116, 2117, 2117, 2118, 2118, 2119, 2119, 2120, 2120, 2121, 
2121, 2122, 2122, 2123, 2123, 2124, 2124), Rep_No = c("A", "B", 
"A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", 
"B", "A", "B", "A", "B", "A", "B", "A", "B"), Fe = c(57.24, 57.12, 
57.2, 57.13, 57.21, 57.14, 57.16, 57.31, 57.11, 57.18, 57.21, 
57.12, 57.14, 57.17, 57.1, 57.18, 57, 57.06, 57.13, 57.09, 57.17, 
57.23, 57.09, 57.1), SiO2 = c("6.85", "6.83", "6.7", "6.69", 
"6.83", "6.8", "6.76", "6.79", "6.82", "6.82", "6.8", "6.86", 
"6.9", "6.82", "6.81", "6.83", "6.79", "6.76", "6.8", "6.88", 
"6.83", "6.79", "6.8", "6.83"), Al2O3 = c("2.9", "2.88", "2.88", 
"2.88", "2.92", "2.9", "2.89", "2.87", "2.9", "2.89", "2.9", 
"2.89", "2.89", "2.88", "2.89", "2.91", "2.91", "2.91", "2.9", 
"2.9", "2.91", "2.91", "2.88", "2.86")), row.names = c(NA, -24L
), class = "data.frame")

My code thus far

x <- df$Sample                                                                                                                                                                                                                   
y <- df$Fe

lm_eqn <- function(df,...){
  m <- lm(y ~ x, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(unname(coef(m)[1]), digits = 2),
                        b = format(unname(coef(m)[2]), digits = 2),
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));
}

a <- lm_eqn(df)


p <- df %>%
  mutate(Sample = factor(Sample)) %>%
  ggplot()+
  geom_boxplot(mapping = aes(x = "All Data", y = Fe))+
  geom_point(mapping = aes(x = Sample, y = Fe, color = Sample))+
  ggtitle("Lab Test Order Fe") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position = "none")+
  xlab(label = "Sample No") +
  ylab("Homogeneity Test Fe %")
p

enter image description here

and my code to get linear trend line

p2 <- df %>% 
  ggplot(aes(Sample, y = Fe))+
  geom_point(mapping = aes(x = Sample, y = Fe))+
  geom_smooth(method = lm, se = FALSE)+
  theme(legend.position = "None")+
  geom_text(x = 2115, y = 57.05, check_overlap = T, label = a, parse = TRUE)

p2

enter image description here

How can I get all three onto the same chart. I would also like to put the boxplot first, maintain the colours for the points as well as have the text for the regression line placed in the optimal position rather than setting the coordinates for placement.

Any help appreciated.


Solution

  • I suggest two options. First, With the help of scales and ggpmisc packages, to get everything into a single plot/frame. This is what you asked, literally. Then, with the help of patchwork, to get two aligned plots. One with the boxplot, another with the scatter + regression curve.

    Option 1. All bundled together.

    library(tidyverse)
    library(scales)  # To get nice looking x-axis breaks
    library(ggpmisc) # To help with optimal position for the regression formula
    
      ggplot(data = df, aes(x = Sample, y = Fe))+
        geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample))) +
        stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", hjust = -0.35 ) +
        geom_smooth(method = lm, se = FALSE) +
        geom_boxplot(mapping = aes(x = min(Sample) - 1, y = Fe)) +  
        theme(legend.position = "None") +
        labs(title = "Lab Test Order Fe", x = "Sample No", y = "Homogeneity Test Fe %") +
        scale_x_continuous(labels = c("All Data", as.integer(df$Sample)),
                           breaks = c(min(df$Sample)-1, df$Sample))
    

    enter image description here

    Option 2. Assembled plot through patchwork.

    library(tidyverse)
    library(scales)    # To get nice looking x-axis breaks
    library(ggpmisc)   # To help with optimal position for the regression formula
    library(patchwork) # To assemble a composite plot
    
    p_boxplot <- 
      ggplot(data = df, aes(x = Sample, y = Fe))+
      geom_boxplot(data = df, mapping = aes(x = "All Data", y = Fe)) +
      labs(subtitle = "Box Plot", 
           x = "", 
           y = "Homogeneity Test Fe %")
    
    p_scatter <- 
      ggplot(data = df, aes(x = Sample, y = Fe))+
      geom_point(mapping = aes(x = Sample, y = Fe, color = as.factor(Sample))) +
      stat_poly_eq(formula = y ~x , mapping = aes( label = a), parse = TRUE, method = "lm", ) +
      geom_smooth(method = lm, se = FALSE) +
      theme(legend.position = "None") +
      labs(subtitle = "Scatter Plot", 
           x = "Sample No", y = "") +
      scale_x_continuous(labels = as.integer(df$Sample),
                         breaks = df$Sample)
    
    
    p_boxplot + p_scatter + 
      plot_layout(widths = c(1,5)) + 
      plot_annotation(title = "Lab Test Order Fe")
    

    enter image description here