rplot

Need specific graphic output of years and percentages


I need help with making a specific graphic output. I have a simple data frame with years in rows and numbers (percentages) in columns with two-letter headers. The graphic result should be something like this:

enter image description here

I've done this example in excel, but I need a R script as I need to change numbers and colors every now and then and need the flexibility of an R script. The cell colors are determined by the number, e.g., 0-4.5 (red), 4.5-7 (darkred), 7-10 (darker), >10 (violet), which are examples. No color if there is no data.

If it can be done in baseplot that is preferable, but ggplot is ok too.

Sample data frame below:

dput(combined_df)
structure(list(year = c(1970, 1971, 1972, 1973, 1974, 1975, 1976, 
1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 
1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 
2021, 2022, 2023), BB = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 6.70202507232401, 7.9556412729026, 7.03953712632594, 
6.07521697203472, 7.56991321118611, 8.72709739633558, 12.8736740597878, 
14.7477360931436, 15.006468305304, 10.7373868046572, 11.1901681759379, 
10.8020698576973, 11.1254851228978, 9.57309184993532, 10.2846054333765, 
10.608020698577, 4.77707006369427, 4.88322717622081, 4.81245576786978, 
3.96319886765747, 4.77707006369427, 5.20169851380042, 5.34324133050248, 
5.09554140127389, 5.1309271054494, 5.09554140127389, 4.03397027600849),
BS = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 0.264750378214826, 0.037821482602118, 0.037821482602118, 
0.037821482602118, 0.075642965204236, 0.075642965204236, 0.113464447806354, 
0.037821482602118, 0, 0.113464447806354, 0.037821482602118, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), CS = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 0.442628532516173, 0.612870275791624, 
0.408580183861083, 0.374531835205993, 0.476676881171263, 0.408580183861083, 
0.919305413687436, 0.646918624446714), DS = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 7.11111111111111, 6.77777777777778, 5.66666666666667, 
8, 7, 6.11111111111111, 5.55555555555556, 7, 5.88888888888889, 
6.44444444444444, NA, NA, NA, NA, 4.07142857142857, 3.92857142857143, 
4, 4.07142857142857, 4.57142857142857, 6.28571428571429, 4.28571428571429, 
5.28571428571429, 4.42857142857143, 4.21428571428571, 3.85714285714286, 
5.07142857142857, 3.87878787878788, 2.90909090909091, 2.5949953660797, 
3.1047265987025, 3.52177942539388, 3.19740500463392, 4.35588507877665, 
4.58758109360519, 5.60704355885079, 5.88507877664504, 4.30954587581094, 
3.05838739573679, 3.66079703429101, 3.17617866004963, 3.07692307692308, 
3.97022332506203, 2.72952853598015, 2.77915632754342, 2.4317617866005
), FB = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.41511151570323, 
5.4619936276741, 4.41511151570323, 4.46062812926718, 3.45926263086026, 
3.82339553937187, 4.64269458352299, 4.46062812926718, 4.50614474283113, 
4.36959490213928, 4.36959490213928, 4.26086956521739, 4.43478260869565, 
4.43478260869565, 4.65217391304348, 4.73913043478261, 4.21663442940039, 
4.13926499032882, 4.33268858800774, 4.13926499032882, 3.52030947775629, 
4.4100580270793, 4.10058027079304, 3.82978723404255, 4.21663442940039, 
4.68085106382979, 4.21663442940039, NA, NA, NA)), row.names = c(NA, -54L), 
class = "data.frame")

Solution

  • This is really quite easy with ggplot2::geom_tile -- either way you'll want the data in "long format" first:

    long_df <- tidyr::pivot_longer(combined_df, -"year") |>
      dplyr::mutate(category = cut(value, breaks=c(0, 4.5, 7, 10, Inf)))
    
    colors <- c(
      "(0,4.5]"="forestgreen",
      "(4.5,7]"="red2",
      "(7,10]"="red4",
      "(10,Inf]"="violet"
    )
    
    ggplot2::ggplot(long_df, ggplot2::aes(x=year, y=name, fill=category)) +
      ggplot2::geom_tile(color="black") +
      ggplot2::scale_fill_manual(values=colors, na.value="white") +
      ggplot2::theme_classic() +
      ggplot2::scale_x_continuous(expand=ggplot2::expansion(0, 0)) +
      ggplot2::scale_y_discrete(expand=ggplot2::expansion(0, 0))
    

    example output of geom_tile