rggplot2widthscaleboxplot

How to set the width of boxplots with custom quantiles?


For these boxplots with custom y quantiles and centered on the medians x_q50, how to set their width based on given x quantiles (e.g. based on x_min and x_max, or x_q025 and x_q975...), while having the possibility to transform the x scale?

I tried this interesting suggestion but without success.

Thanks for help.

Graph attempt (failed):
enter image description here

Desired graph:
For example below (built manually with PowerPoint), the width of the boxplots is defined by the 5th and 95th quantiles of x (yellow x_q05 and x_q95 columns), with the medians being x_q50. For the last group, x_q05 = 82 and x_q95 = 432.8, and x_q50 = 132.
enter image description here

Data:

dat <-
structure(list(grp = structure(1:9, levels = c("0", "1", "2", 
"3", "4", "5", "6", "7", "8"), class = "factor"), nb = c(4364L, 
4363L, 4363L, 4363L, 4363L, 4363L, 4363L, 4363L, 4363L), x_min = c(0, 
5, 8, 11, 15, 20, 28, 43, 79), x_q025 = c(`2.5%` = 0, `2.5%` = 5, 
`2.5%` = 8, `2.5%` = 11, `2.5%` = 15, `2.5%` = 20, `2.5%` = 28, 
`2.5%` = 43, `2.5%` = 80), x_q05 = c(`5%` = 0, `5%` = 5, `5%` = 8, 
`5%` = 11, `5%` = 15, `5%` = 20, `5%` = 28, `5%` = 44, `5%` = 82
), x_q50 = c(3, 7, 9, 13, 17, 23, 34, 56, 132), x_q95 = c(`95%` = 5, 
`95%` = 8, `95%` = 11, `95%` = 14, `95%` = 19, `95%` = 27, `95%` = 42, 
`95%` = 76, `95%` = 432.8), x_q975 = c(`97.5%` = 5, 
`97.5%` = 8, `97.5%` = 11, `97.5%` = 15, `97.5%` = 20, `97.5%` = 28, 
`97.5%` = 42, `97.5%` = 77, `97.5%` = 576.8), x_max = c(5, 
8, 11, 15, 20, 28, 43, 79, 1932), y_q025 = c(`2.5%` = 8.22375, 
`2.5%` = 8.9525, `2.5%` = 9.031, `2.5%` = 9.96, `2.5%` = 10.1, 
`2.5%` = 10.205, `2.5%` = 10.9, `2.5%` = 11.405, `2.5%` = 12.4
), y_q05 = c(`5%` = 10.5, `5%` = 11.8, `5%` = 12.01, `5%` = 12.31, 
`5%` = 12.5, `5%` = 13.1, `5%` = 13.21, `5%` = 13.6, `5%` = 14.8
), y_q50 = c(28.9, 30, 29.7, 29.8, 29.5, 30.1, 30.3, 30.9, 32.8
), y_q95 = c(`95%` = 137, `95%` = 113, `95%` = 109, `95%` = 104, 
`95%` = 105.9, `95%` = 101, `95%` = 98.58, `95%` = 90.39, 
`95%` = 98.03), y_q975 = c(`97.5%` = 185.925, `97.5%` = 158, 
`97.5%` = 152, `97.5%` = 152, `97.5%` = 155, `97.5%` = 139, `97.5%` = 136, 
`97.5%` = 128, `97.5%` = 132.95), varx = c(5, 3, 3, 4, 5, 8, 
15, 36, 1853)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame"))

Code attempt:

library(tidyverse)
library(scales)

ggplot(dat, aes(x = x_q50)) +
  geom_boxplot(
    aes(
      ymin=y_q025,
      lower=y_q05,
      middle=y_q50,
      upper=y_q95,
      ymax=y_q975,
      fill=grp),
    stat="identity",
    varwidth = TRUE) +
  scale_x_continuous(limits=c(0, 2000),
                     breaks = c(0,10,20,30,40,50,100,150,200,
                                250,500,750,1000,1500,2000),
                     labels = c(0,10,20,30,40,50,100,150,200,
                                250,500,750,1000,1500,2000),
                     trans = modulus_trans(0.2)) +
  scale_fill_manual(values = c("#FEE186", "#FEB24C", "#FD9E45",
                              "#FD8A3E", "#FC7637", "#FC6230", 
                              "#FC4E2A","#E33E29", "#CA2E28"))

Solution

  • Since you have already calculated what you want, I suggest switching from geom_boxplot(.., stat="identity") to geom_rect() + geom_segment().

    ggplot(dat, aes(x = x_q50)) +
      # vertical x-median  
      geom_segment(aes(xend = x_q50, y = y_q025, yend = y_q975)) +
      # the boxes themselves
      geom_rect(
        aes(xmin = x_q05, xmax = x_q95, ymin = y_q05, ymax = y_q95,
            fill = grp), color = "black"
      ) +
      # horizontal y-median line
      geom_segment(aes(x = x_q05, xend = x_q95, y = y_q50)) +
      scale_x_continuous(limits=c(0, 2000),
                         breaks = c(0,10,20,30,40,50,100,150,200,
                                    250,500,750,1000,1500,2000),
                         labels = c(0,10,20,30,40,50,100,150,200,
                                    250,500,750,1000,1500,2000),
                         trans = scales::modulus_trans(0.2)) +
      scale_y_continuous(name = NULL) +
      scale_fill_manual(values = c("#FEE186", "#FEB24C", "#FD9E45",
                                  "#FD8A3E", "#FC7637", "#FC6230", 
                                  "#FC4E2A","#E33E29", "#CA2E28"))
    

    Showing your original on top, the new plot below:

    ggplot pseudo-boxplot using segment+rect+segment


    Data

    dat <- structure(list(grp = structure(1:9, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), nb = c(4364L, 4363L, 4363L, 4363L, 4363L, 4363L, 4363L, 4363L, 4363L), x_min = c(0, 5, 8, 11, 15, 20, 28, 43, 79), x_q025 = c("2.5%" = 0, "2.5%" = 5, "2.5%" = 8, "2.5%" = 11, "2.5%" = 15, "2.5%" = 20, "2.5%" = 28, "2.5%" = 43, "2.5%" = 80), x_q05 = c("5%" = 0, "5%" = 5, "5%" = 8, "5%" = 11, "5%" = 15, "5%" = 20, "5%" = 28, "5%" = 44, "5%" = 82), x_q50 = c(3, 7, 9, 13, 17, 23, 34, 56, 132 ), x_q95 = c("95%" = 5, "95%" = 8, "95%" = 11, "95%" = 14, "95%" = 19, "95%" = 27, "95%" = 42, "95%" = 76, "95%" = 432.799999999999), x_q975 = c("97.5%" = 5, "97.5%" = 8, "97.5%" = 11, "97.5%" = 15, "97.5%" = 20, "97.5%" = 28, "97.5%" = 42, "97.5%" = 77, "97.5%" = 576.799999999999), x_max = c(5, 8, 11, 15, 20, 28, 43, 79, 1932), y_q025 = c("2.5%" = 8.22375, "2.5%" = 8.9525, "2.5%" = 9.031, "2.5%" = 9.96, "2.5%" = 10.1, "2.5%" = 10.205, "2.5%" = 10.9, "2.5%" = 11.405, "2.5%" = 12.4), y_q05 = c("5%" = 10.5,  "5%" = 11.8, "5%" = 12.01, "5%" = 12.31, "5%" = 12.5, "5%" = 13.1, "5%" = 13.21, "5%" = 13.6, "5%" = 14.8), y_q50 = c(28.9, 30, 29.7, 29.8, 29.5, 30.1, 30.3, 30.9, 32.8), y_q95 = c("95%" = 137, "95%" = 113, "95%" = 109, "95%" = 104, "95%" = 105.9, "95%" = 101, "95%" = 98.5799999999999, "95%" = 90.39, "95%" = 98.0299999999997), y_q975 = c("97.5%" = 185.925, "97.5%" = 158, "97.5%" = 152, "97.5%" = 152, "97.5%" = 155, "97.5%" = 139, "97.5%" = 136, "97.5%" = 128, "97.5%" = 132.95), varx = c(5, 3, 3,  4, 5, 8, 15, 36, 1853)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", "data.frame"))