rggplot2shapes

Independently setting colour, fill and shape in a ggplot in R


I've got a plot in R where I want the point shape to be determined by the source dataset, and the fill to be determined by the name of the 'module' (they're named using colours). Ideally I would like the border of the points to be black, as it helps see the faintly coloured points. However, when I try setting the border to black, it makes the fill on all the points in the legend dark grey. It seems that when varying the shape, the fill on the legend points depends on the same variable being used for colour and fill aesthetics. Is there any way round this? Here's an example.

# Demo data
my_data <- structure(list(reference_dataset = c("rnaseq_20221003", "rnaseq_20221003", 
"rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", 
"rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", 
"rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", 
"rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", 
"rnaseq_20221003", "rnaseq_20221003", "rnaseq_20221003", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520", 
"rnaseq_20240520", "rnaseq_20240520", "rnaseq_20240520"), module_colour = c("black", 
"blue", "brown", "cyan", "green", "greenyellow", "grey", "grey60", 
"lightcyan", "lightgreen", "lightyellow", "magenta", "midnightblue", 
"pink", "purple", "red", "royalblue", "salmon", "tan", "turquoise", 
"yellow", "black", "blue", "brown", "cyan", "darkgreen", "darkgrey", 
"darkorange", "darkred", "darkturquoise", "green", "greenyellow", 
"grey", "grey60", "lightcyan", "lightgreen", "lightyellow", "magenta", 
"midnightblue", "orange", "pink", "purple", "red", "royalblue", 
"salmon", "tan", "turquoise", "white", "yellow"), module_size_true = c(795L, 
4433L, 4383L, 201L, 1209L, 319L, 538L, 152L, 165L, 138L, 109L, 
438L, 197L, 542L, 340L, 1012L, 74L, 254L, 310L, 5949L, 2391L, 
1289L, 2324L, 1861L, 205L, 64L, 56L, 50L, 75L, 59L, 1660L, 443L, 
245L, 141L, 183L, 138L, 137L, 848L, 190L, 53L, 911L, 539L, 1345L, 
89L, 222L, 322L, 2679L, 48L, 1690L), value = c(45.3024014265278, 
328.536315093724, 47.6289317994421, 8.0722217506375, 47.0102826407721, 
25.6212729407405, 15.9667402537371, 43.5506843039375, 4.16479576957992, 
56.0505293265084, 101.807009938558, 133.508372868264, 72.8772204323272, 
16.2228120071162, 16.3297653870868, 60.741103280882, 15.3258681509535, 
60.4276695931572, 22.4971863901054, 115.195014124107, 11.3713698424922, 
290.924221668345, 176.602120468604, 46.2140988305588, 17.2104833834473, 
19.4937904424201, 72.5866270648201, 12.689002053516, 11.2936959098718, 
16.3326371354103, 34.7210766570992, 45.6418962051133, -0.47009898715946, 
10.8819845099283, 9.68269199193089, 28.0999204258514, 27.8777614805833, 
86.4872510202394, 131.026324855926, 25.6898941094669, 242.270714620409, 
95.4247731014355, 211.805005117939, 6.68556677101457, 56.6833146113187, 
21.8917926014161, 159.668328998717, 34.111172219365, 426.963015310965
)), row.names = c(NA, -49L), class = c("tbl_df", "tbl", "data.frame"
))
# Plot without black border
ggplot(my_data, aes(x = module_size_true, y = value, fill = module_colour, shape = reference_dataset, colour = module_colour)) +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 10),
            fill = "lightgrey", alpha = 0.05, colour = NA) +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 2),
            fill = "darkgrey", alpha = 0.05, colour = NA) +
  geom_hline(yintercept = 0, colour = "black") +
  geom_point(size = 4, alpha = 0.7) +
  scale_x_continuous(breaks = pretty_breaks()) +
  scale_y_continuous(breaks = pretty_breaks()) +
  scale_fill_manual(values = setNames(my_data$module_colour, my_data$module_colour)) +
  scale_colour_manual(values = setNames(my_data$module_colour, my_data$module_colour)) +
  scale_shape_manual(values = c(21, 24)) +
  theme_bw()

enter image description here

# Plot with black border
ggplot(my_data, aes(x = module_size_true, y = value, fill = module_colour, shape = reference_dataset)) +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 10),
            fill = "lightgrey", alpha = 0.05, colour = NA) +
  geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 2),
            fill = "darkgrey", alpha = 0.05, colour = NA) +
  geom_hline(yintercept = 0, colour = "black") +
  geom_point(size = 4, alpha = 0.7, colour = "black") +
  scale_x_continuous(breaks = pretty_breaks()) +
  scale_y_continuous(breaks = pretty_breaks()) +
  scale_fill_manual(values = setNames(my_data$module_colour, my_data$module_colour)) +
  scale_shape_manual(values = c(21, 24)) +
  theme_bw()

enter image description here

Ive tried googling and a few post suggestions, but doesn't seem to work with this example.


Solution

  • The issue is that the shape used for the fill guide uses the default shape for points aka 16 which does not support a fill aes and hence you end up with all black keys. But you can fix that by overriding the shape used for the guide using the override.aes= argument of guide_legend.

    Also note that I switched to annotate to add the rectangles (using geom_rect you are drawing a rect for each row of your data) and I also use scale_fill_identity:

    library(ggplot2)
    library(scales)
    
    # Plot with black border
    ggplot(my_data, aes(
      x = module_size_true, y = value,
      fill = module_colour,
      shape = reference_dataset
    )) +
      annotate(
        geom = "rect",
        xmin = -Inf, xmax = Inf, ymin = 0, ymax = 10,
        fill = "lightgrey", colour = NA
      ) +
      annotate(
        geom = "rect",
        xmin = -Inf, xmax = Inf, ymin = 0, ymax = 2,
        fill = "darkgrey", colour = NA
      ) +
      geom_hline(yintercept = 0, colour = "black") +
      geom_point(
        size = 4, alpha = 1, colour = "black",
      ) +
      scale_x_continuous(breaks = pretty_breaks()) +
      scale_y_continuous(breaks = pretty_breaks()) +
      scale_fill_identity(
        guide = guide_legend(override.aes = list(shape = 21))
      ) +
      scale_shape_manual(values = c(21, 24)) +
      theme_bw()