rggplot2colorsgeom-col

Hierarchical scale_fill_* colors


I'm wondering if anyone has an easy way to have each category be a separate color scale.

ie: "Forb" would be all shades of blue and green, "Conifer" would be reds, "Shrub" orange and yellow, "Graminoids" purple and pink (or whatever, exact colors don't matter, just that each grouping be distinctly different from the others).

Sample data:

structure(list(identifier = c(1L, 2L, 5L, 6L, 17L, 7L, 4L, 11L, 
20L, 24L, 8L, 18L, 22L, 10L, 15L, 3L, 9L, 13L, 23L, 34L, 14L, 
12L, 16L, 42L, 43L, 30L, 38L, 29L, 33L, 28L, 27L), SPP = c("Penstemon", 
"Rosaceae Group 1", "Saxifraga OR Micranthes OR Boykinia", "Eriogonum", 
"Boykinia OR Saxifraga", "Vaccinium", "Hypericum", "Chamerion OR Epilobium OR Oenothera", 
"Aster Group 2", "Chrysosplenium tetrandum", "Oenothera", "Aster Group 1", 
"Poaceae", "Chamerion", "Luzula", "Abies", "Oxyria digyna", "Pinus", 
"Castilleja", "Erigeron", "Ribes", "Thalictrum", "Salix", "Xerophyllum tenax", 
"Valeriana", "Rhododendron", "Caryophyllaceae", "Sedum lanceolatum", 
"Senecio", "Polygonaceae", "Phrymaceae"), max = c(0.520063568, 
0.479127183, 0.434079314, 0.362801825, 0.217608897, 0.191388889, 
0.717687654, 0.120278432, 0.140414455, 0.078553735, 0.219305556, 
0.437633588, 0.184346498, 0.383032052, 0.178396573, 0.503981446, 
0.263381525, 0.358707915, 0.165725191, 0.046200125, 0.350292287, 
0.644661654, 0.2640831, 0.016758773, 0.021521319, 0.039176109, 
0.031850659, 0.202567022, 0.067327894, 0.20080737, 0.331692794
), readsum = c(6.716942576, 5.503499137, 3.49976764, 2.309000619, 
1.103758598, 1.913782497, 3.798417906, 1.263140584, 0.76553868, 
0.574245876, 1.616440058, 0.866744904, 0.635800875, 1.478810665, 
1.124030263, 3.881683753, 1.59921115, 1.247338241, 0.634873939, 
0.234050052, 1.246069294, 1.262268812, 1.124014166, 0.097837052, 
0.092817485, 0.344979525, 0.183615231, 0.353545529, 0.246583949, 
0.386051108, 0.390301853), funct_type = c("Forb", "Forb", "Forb", 
"Forb", "Forb", "Shrub", "Forb", "Forb", "Forb", "Forb", "Forb", 
"Forb", "Graminoid", "Forb", "Graminoid", "Conifer", "Forb", 
"Conifer", "Forb", "Forb", "Shrub", "Forb", "Shrub", "Forb", 
"Forb", "Shrub", "Forb", "Forb", "Forb", "Forb", "Forb"), frequencyformula = c(52L, 
50L, 47L, 47L, 47L, 46L, 45L, 45L, 45L, 44L, 43L, 43L, 42L, 41L, 
41L, 40L, 40L, 37L, 37L, 36L, 30L, 29L, 29L, 27L, 27L, 25L, 23L, 
22L, 22L, 20L, 18L), frequency = c(1, 0.961538462, 0.903846154, 
0.903846154, 0.903846154, 0.884615385, 0.865384615, 0.865384615, 
0.865384615, 0.846153846, 0.826923077, 0.826923077, 0.807692308, 
0.788461538, 0.788461538, 0.769230769, 0.769230769, 0.711538462, 
0.711538462, 0.692307692, 0.576923077, 0.557692308, 0.557692308, 
0.519230769, 0.519230769, 0.480769231, 0.442307692, 0.423076923, 
0.423076923, 0.384615385, 0.346153846)), class = "data.frame", row.names = c(NA, 
-31L))

and my code so far is

top32reads %>%
  mutate(funct_type = fct_reorder(.f = funct_type, .x = -readsum, min)) %>%
  ggplot(aes(x = funct_type, y = readsum)) + 
  geom_col(aes(fill = reorder(SPP, -readsum)),color = "black",
           position = position_dodge2(width = .9, preserve = "single")) +
  ylab("Sum of read percentages across samples")+
  xlab("OTUs Consumed by Functional Type")+
  ggtitle("Diet by Relative Read Abundance")+
  theme_bw()+scale_fill_viridis_d()+
  theme(axis.title = element_text(size = 16, face = "bold", family = "Caladea"),
        strip.text.y = element_text(size = 18, face="bold", family = "Caladea"),
        plot.title = element_text(size = 28, face = "bold", family = "Caladea", hjust = 0.5),
        axis.text = element_text(size = 18, face = "bold", family = "Caladea")
  )

Solution

  • I don't think your use case it's natively supported by gpplot2's scale_fill_* (but I'm happy to be proven wrong). What I would do is calculating the desired color "manually", and then use those with a scale_fill_identity:

    library(dplyr)
    library(forcats)
    library(ggplot2)
    library(scales)
    library(colorspace)
    
    df %>%
      mutate(funct_type = fct_reorder(.f = funct_type, .x = -readsum, min),
             main_color = brewer_pal("qual")(n_distinct(funct_type))[funct_type]
             ) %>%
      group_by(funct_type) %>% 
      mutate(lvl = as.integer(fct_reorder(SPP, -readsum)),
             relvl = rescale(lvl, c(0, .6), from = c(0, max(length(lvl), 5))),
             sub_color = darken(main_color, relvl)) %>% 
      ggplot(aes(x = funct_type, y = readsum)) + 
      geom_col(aes(fill = sub_color),
               position = position_dodge2(width = .9, preserve = "single")) +
      scale_fill_identity() +
      ylab("Sum of read percentages across samples") +
      xlab("OTUs Consumed by Functional Type") +
      ggtitle("Diet by Relative Read Abundance") +
      theme_bw() +
      theme(axis.title = element_text(size = 16, face = "bold", family = "Caladea"),
            strip.text.y = element_text(size = 18, face="bold", family = "Caladea"),
            plot.title = element_text(size = 28, face = "bold", family = "Caladea", hjust = 0.5),
            axis.text = element_text(size = 18, face = "bold", family = "Caladea")
      )
    

    I tried to split the variables in the mutate step, but you can condense those if you prefer.

    With some massaging you can achieve some improved results, particularly in the choice of main_colors, and then I bet some improvements and more freedom can be achieved in the choice of sub_colors.

    Created on 2022-11-04 with reprex v2.0.2