rggplot2boxplot

geom_boxplot with gradient fill


I try to put in gradient fill the column value (see dputdataframe exemple below) in my geom_boxplot to obtain something like this, however I only succeeded with the Period in fill, such as:

library(ggplot2)

ggplot(df, 
       aes(x=Period, ymin=min, lower=min, fill=Period,
           middle=0, upper=max, ymax=max)) +
  geom_boxplot(stat="identity")

Reproducible example data:

df <- structure(list(value = c(-0.256008390785499, -0.24004506049582, -0.250828695705342, 
    -0.267190101219487, -0.316647825400329, -0.366330881072921, 
    -0.349098289022608, -0.341879051913029, -0.368308158446536, 
    -0.377816620860917, -0.412561020729726, -0.458235576769901, 
    -0.438308012743532, -0.423376509219582, -0.446750016179227, 
    -0.456357816345701, -0.494158454876249, -0.546914450257824, 
    -0.457499662919834, -0.443757432245947, -0.472335096128546, 
    -0.49378723945037, -0.547975868432454, -0.600343358251158, 
    -0.406085464722591, -0.394433985798893, -0.420042741441662, 
    -0.431297210839104, -0.473576129192589, -0.527286933402277
    ), Period = c("1951-1980", "1951-1980", "1951-1980", "1951-1980", 
    "1951-1980", "1951-1980", "1981-2010", "1981-2010", "1981-2010", 
    "1981-2010", "1981-2010", "1981-2010", "2011-2040", "2011-2040", 
    "2011-2040", "2011-2040", "2011-2040", "2011-2040", "2041-2070", 
    "2041-2070", "2041-2070", "2041-2070", "2041-2070", "2041-2070", 
    "2071-2100", "2071-2100", "2071-2100", "2071-2100", "2071-2100", 
    "2071-2100"), max = c(0.815576174565362, 0.815576174565362, 
    0.815576174565362, 0.815576174565362, 0.815576174565362, 
    0.815576174565362, 0.801648736136538, 0.801648736136538, 
    0.801648736136538, 0.801648736136538, 0.801648736136538, 
    0.801648736136538, 0.766267272755767, 0.766267272755767, 
    0.766267272755767, 0.766267272755767, 0.766267272755767, 
    0.766267272755767, 0.76577744142212, 0.76577744142212, 0.76577744142212, 
    0.76577744142212, 0.76577744142212, 0.76577744142212, 0.771953608938838, 
    0.771953608938838, 0.771953608938838, 0.771953608938838, 
    0.771953608938838, 0.771953608938838), min = c(-0.558861617596901, 
    -0.558861617596901, -0.558861617596901, -0.558861617596901, 
    -0.558861617596901, -0.558861617596901, -0.591774302902796, 
    -0.591774302902796, -0.591774302902796, -0.591774302902796, 
    -0.591774302902796, -0.591774302902796, -0.762911874447465, 
    -0.762911874447465, -0.762911874447465, -0.762911874447465, 
    -0.762911874447465, -0.762911874447465, -0.875343482743915, 
    -0.875343482743915, -0.875343482743915, -0.875343482743915, 
    -0.875343482743915, -0.875343482743915, -1.03089239970852, 
    -1.03089239970852, -1.03089239970852, -1.03089239970852, 
    -1.03089239970852, -1.03089239970852)), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 8924L, 8925L, 8926L, 8927L, 8928L, 8929L, 
17847L, 17848L, 17849L, 17850L, 17851L, 17852L, 44616L, 44617L, 
44618L, 44619L, 44620L, 44621L, 71385L, 71386L, 71387L, 71388L, 
71389L, 71390L), class = "data.frame")

Solution

  • To obtain the plot you want, you should be using geom_tile, with some data management first.

    group_by(df, Period) |>
      distinct(max, min) |>
      expand(value=seq(min, max, by=0.01)) |>
      ggplot(aes(x=Period, y=value)) +
      geom_tile(aes(fill=value), width=0.8, height=0.01) +
      geom_segment(y=0, yend=0, linewidth=2,
                   aes(x=as.numeric(as.factor(Period))-0.4,
                       xend=as.numeric(as.factor(Period))+0.4)) +
      scale_fill_gradient2(low = 'blue', mid = 'grey', high = 'red', midpoint = 0) +
      guides(fill=guide_none()) +
      theme_classic()
    

    enter image description here

    You're really only using 5 rows of your data frame. This is your actual data that is used for plotting:

    group_by(df, Period) |>
       distinct(max, min)
    # A tibble: 5 × 3
    # Groups:   Period [5]
      Period      max    min
      <chr>     <dbl>  <dbl>
    1 1951-1980 0.816 -0.559
    2 1981-2010 0.802 -0.592
    3 2011-2040 0.766 -0.763
    4 2041-2070 0.766 -0.875
    5 2071-2100 0.772 -1.03 
    

    We then need to expand this to get the gradient colours.

    expand(value=seq(min, max, by=0.01))
    # A tibble: 777 × 2
    # Groups:   Period [5]
       Period     value
       <chr>      <dbl>
     1 1951-1980 -0.559
     2 1951-1980 -0.549
     3 1951-1980 -0.539
     4 1951-1980 -0.529
     5 1951-1980 -0.519
     6 1951-1980 -0.509
     7 1951-1980 -0.499
     8 1951-1980 -0.489
     9 1951-1980 -0.479
    10 1951-1980 -0.469
    # ℹ 767 more rows