rggplot2data-fittingggpmisc

add r-squared and equation of multiple fits (linear, power, logarithmic, exponential) in ggplot2


In this question (exponential fit with ggplot, showing regression line and R^2), there is a nice answer showing how to add multiple models/fits to the data. I would like to add the r-squared and the equation into the plot, for multiple plots in a for loop. My data looks like...

xstl, size, dose
b2x01, 010, 18.536153 
b2x01, 020, 12.180385 
b2x01, 030, 7.939649 
b2x01, 040, 5.544527 
b2x01, 050, 4.075597 
b2x01, 060, 3.097937 
b2x01, 070, 2.414469 
b2x01, 080, 1.921399 
b2x01, 090, 1.556978 
b2x01, 100, 1.282339 
b2x01, 110, 1.071527 
b2x01, 120, 0.907078 
b2x01, 130, 0.776750 
b2x01, 140, 0.671974 
b2x01, 150, 0.586674 
b2x01, 160, 0.516337 
b2x01, 170, 0.457731 
b2x01-v2, 010, 29.553145 
b2x01-v2, 020, 11.147937 
b2x01-v2, 030, 6.526314 
b2x01-v2, 040, 5.590084 
b2x01-v2, 050, 3.910282 
b2x01-v2, 060, 3.716836 
b2x01-v2, 070, 2.783777 
b2x01-v2, 080, 2.777439 
b2x01-v2, 090, 2.157218 
b2x01-v2, 100, 2.213712 
b2x01-v2, 110, 1.758258 
b2x01-v2, 120, 1.837702 
b2x01-v2, 130, 1.481908 
b2x01-v2, 140, 1.568848 
b2x01-v2, 150, 1.279272 
b2x01-v2, 160, 1.367446 
b2x01-v2, 170, 1.124449 
b2x01-v2, 180, 1.210679 
b2x01-v2, 190, 1.002108 
b2x01-v2, 200, 1.085397 
b2x01-v2, 210, 0.903101 
b2x04, 010, 17.719124 
b2x04, 020, 11.644191 
b2x04, 030, 7.590590 
b2x04, 040, 5.301100 
b2x04, 050, 3.896908 
b2x04, 060, 2.962311 
b2x04, 070, 2.308926 
b2x04, 080, 1.837534 
b2x04, 090, 1.489123 
b2x04, 100, 1.226537 
b2x04, 110, 1.024971 
b2x04, 120, 0.867727 
b2x04, 130, 0.743105 
b2x04, 140, 0.642913 
b2x04, 150, 0.561340 
b2x04, 160, 0.494074 
b2x04, 170, 0.438024 
b2x04-v2, 010, 27.870542 
b2x04-v2, 020, 10.514309 
b2x04-v2, 030, 6.155977 
b2x04-v2, 040, 5.273423 
b2x04-v2, 050, 3.689143 
b2x04-v2, 060, 3.506982 
b2x04-v2, 070, 2.626882 
b2x04-v2, 080, 2.621166 
b2x04-v2, 090, 2.036045 
b2x04-v2, 100, 2.089571 
b2x04-v2, 110, 1.659827 
b2x04-v2, 120, 1.734992 
b2x04-v2, 130, 1.399231 
b2x04-v2, 140, 1.481470 
b2x04-v2, 150, 1.208143 
b2x04-v2, 160, 1.291537 
b2x04-v2, 170, 1.062134 
b2x04-v2, 180, 1.143703 
b2x04-v2, 190, 0.946762 
b2x04-v2, 200, 1.025551 
b2x04-v2, 210, 0.853392 
b2x11, 010, 18.092049 
b2x11, 020, 11.888880 
b2x11, 030, 7.749847 
b2x11, 040, 5.412139 
b2x11, 050, 3.978398 
b2x11, 060, 3.024148 
b2x11, 070, 2.357035 
b2x11, 080, 1.875752 
b2x11, 090, 1.520037 
b2x11, 100, 1.251954 
b2x11, 110, 1.046171 
b2x11, 120, 0.885641 
b2x11, 130, 0.758418 
b2x11, 140, 0.656137 
b2x11, 150, 0.572865 
b2x11, 160, 0.504199 
b2x11, 170, 0.446984 
b3x11, 010, 2.356662 
b3x11, 020, 1.958124 
b3x11, 030, 1.517240 
b3x11, 040, 1.154898 
b3x11, 050, 0.892564 
b3x11, 060, 0.706621 
b3x11, 070, 0.571385 
b3x11, 080, 0.469808 
b3x11, 090, 0.391469 
b3x11, 100, 0.329882 
b3x11, 110, 0.280772 
b3x11, 120, 0.241133 
b3x11, 130, 0.208834 
b3x11, 140, 0.182249 
b3x11, 150, 0.160253 
b3x11, 160, 0.142079 
b3x11, 170, 0.127135 
b3x11, 180, 0.114803 
b3x11, 190, 0.104692 
b3x11, 200, 0.097177 
b3x11, 210, 0.092753 
b3x11, 220, 0.089836 
b3x11, 230, 0.086181 
b3x11, 240, 0.083828 
b3x11, 250, 0.080707 
b3x14, 010, 2.358140 
b3x14, 020, 1.959348 
b3x14, 030, 1.518185 
b3x14, 040, 1.155615 
b3x14, 050, 0.893116 
b3x14, 060, 0.707056 
b3x14, 070, 0.571736 
b3x14, 080, 0.470095 
b3x14, 090, 0.391707 
b3x14, 100, 0.330082 
b3x14, 110, 0.280941 
b3x14, 120, 0.241278 
b3x14, 130, 0.208959 
b3x14, 140, 0.182357 
b3x14, 150, 0.160348 
b3x14, 160, 0.142163 
b3x14, 170, 0.127209 
b3x14, 180, 0.114870 
b3x14, 190, 0.104753 
b3x14, 200, 0.097233 
b3x14, 210, 0.092806 
b3x14, 220, 0.089888 
b3x14, 230, 0.086230 
b3x14, 240, 0.083876 
b3x14, 250, 0.080753 

My code like...

library("readr")
library("dplyr")
library("ggplot2")

# Read data
datos <- read_csv("~/data_to_fit_ext.csv", col_types = cols(xstl = col_factor(levels = c("b2x01", "b2x01-v2", "b2x04", "b2x04-v2", "b2x11", "b3x11", "b3x14")), size = col_double()))

# Define crystals
crystals <- levels(datos$xstl)

# Plot with multiple smoothing methods
p <- NULL
for (crystal in crystals) {
  # Filter data for the current crystal
  filtered_data <- filter(datos, xstl == crystal)
  
  # Plot for current crystal
  p <- ggplot(data = filtered_data, aes(x = size, y = dose)) +
    geom_point() +
    stat_smooth(method = 'lm', formula = y ~ x, mapping = aes(colour = 'Linear'), se = FALSE) +
    stat_smooth(method = 'nls', formula = y ~ a * log(x) + b, mapping = aes(colour = 'Logarithmic'), se = FALSE, method.args = list(start = list(a = 1, b = 1))) +
    stat_smooth(method = 'nls', formula = y ~ a * exp(b * x), mapping = aes(colour = 'Exponential'), se = FALSE, method.args = list(start = list(a = 10, b = -0.01))) +
    stat_smooth(method = 'nls', formula = y ~ a * x^b, mapping = aes(colour = 'Power'), se = FALSE, method.args = list(start = list(a = 1, b = 1))) +
    labs(x = "Size", y = "Dose") +
    theme_bw() 
  # Save the plot for current crystal
  ggsave(paste("multiple_smoothing_", crystal, ".png", sep = ""), plot = p, width = 8, height = 6)
}

I tried with ggpmisc but apparently it can retrieve data only from linear models... Besides library(ggpmisc) I think this is the relevant code

p <- ggplot(datos_filtrados, aes(x = size, y = dose)) +
    geom_point() +
    stat_smooth(method = 'lm', formula = y ~ x, mapping = aes(colour = 'Linear'), se = FALSE) +
    stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                 formula = y ~ x, color = "green", parse = TRUE) +
    stat_smooth(method = 'nls', formula = y ~ a * log(x) + b, mapping = aes(colour = 'Logarithmic'), se = FALSE, 
                method.args = list(start = list(a = 1, b = 1))) +
    stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                 formula = y ~ a * log(x) + b, color = "blue", parse = TRUE) +
    stat_smooth(method = 'nls', formula = y ~ a * exp(b * x), mapping = aes(colour = 'Exponential'), se = FALSE, 
                method.args = list(start = list(a = 10, b = -0.01))) +
    stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                 formula = y ~ a * exp(b * x), color = "red", parse = TRUE) +
    stat_smooth(method = 'nls', formula = y ~ a * x^b, mapping = aes(colour = 'Power'), se = FALSE, 
                method.args = list(start = list(a = 1, b = 1))) +
    stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
                 formula = y ~ a * x^b, color = "purple", parse = TRUE) +
    labs(x = "Size", y = "Dose") +
    theme_bw()

The plots are the same; the equation and the r-squared are written in the plot, but only for the linear model. How can I get all equations and all r-squareds in all plots? Can this be done without 'outside' packages? (i.e. everything with ggplot2?) enter image description here


Solution

  • The issue is that by default stat_poly_eq will estimate a linear model, i.e. it uses method="lm", which can't deal with the formulas you provided and you should get some warnings to note you about that. Instead, if you want a non-linear model you have to specify the method you want to use to estimate your model, i.e. set method="nls" and also pass the method.args= you used in stat_smooth. Additionally, even after doing so,stat_poly_eq will still give you a formula for a linear model. Hence, to get the correct formulas you have create them manually for which I set output.type = "numeric". Doing so we can access the values of the coefficients using b_i. Finally note, that nls does not return or compute an R^2 statistic (see Calculating R^2 for a nonlinear least squares fit). As a result the r.squared is NA and we get a blank in the label:

    library(ggplot2)
    library(ggpmisc)
    
    ggplot(datos_filtrados, aes(x = size, y = dose)) +
      geom_point() +
      stat_smooth(method = "lm", formula = y ~ x, mapping = aes(colour = "Linear"), se = FALSE) +
      stat_poly_eq(
        aes(
          label = paste(..eq.label.., ..rr.label.., sep = "~~~"),
          colour = "Linear"
        ),
        formula = y ~ x, parse = TRUE,
        npcy = .95
      ) +
      stat_smooth(
        method = "nls", formula = y ~ a * log(x) + b, mapping = aes(colour = "Logarithmic"), se = FALSE,
        method.args = list(start = list(a = 1, b = 1))
      ) +
      stat_poly_eq(
        aes(label = after_stat(
          paste0(
            "y==", round(b_0, 2), "*~log(x)+", round(b_1, 2), "~~~italic(R)^2==", round(r.squared, 2)
          )
        ), colour = "Logarithmic"),
        formula = y ~ a * log(x) + b, parse = TRUE,
        method = "nls", method.args = list(start = list(a = 1, b = 1)),
        npcy = .9,
        output.type = "numeric"
      ) +
      stat_smooth(
        method = "nls",
        formula = y ~ a * exp(b * x), mapping = aes(colour = "Exponential"), se = FALSE,
        method.args = list(start = list(a = 10, b = -0.01))
      ) +
      stat_poly_eq(
        aes(label = after_stat(
          paste0(
            "y==", round(b_0, 2), "*~exp(", round(b_1, 2), "*x)", "~~~italic(R)^2==", round(r.squared, 2)
          )
        ), colour = "Exponential"),
        formula = y ~ a * exp(b * x), parse = TRUE,
        method = "nls", method.args = list(start = list(a = 10, b = -0.01)),
        npcy = .85,
        output.type = "numeric"
      ) +
      stat_smooth(
        method = "nls", formula = y ~ a * x^b, mapping = aes(colour = "Power"), se = FALSE,
        method.args = list(start = list(a = 1, b = 1))
      ) +
      stat_poly_eq(
        aes(label = after_stat(
          paste0(
            "y==", round(b_0, 2), "*x^", round(b_1, 2), "~~~italic(R)^2==", round(r.squared, 2)
          )
        ), colour = "Power"),
        formula = y ~ a * x^b, parse = TRUE,
        method = "nls", method.args = list(start = list(a = 1, b = 1)),
        npcy = .8,
        output.type = "numeric"
      ) +
      labs(x = "Size", y = "Dose") +
      theme_bw()