ggplot2legendgroupggpattern

Cannot group boxplots or add manual legend using ggpattern and 2 datasets


I am quite new to R in general and currently struggling with a specific plot I want to create.

Preferably I would like to group my boxplots (2 per Area -> Good Habitat, average habitat etc.) and add a legend. Since I am using two datasets from two separate excel spreadsheets using the fill = Area command in the aesthetics does not help me group the plots or give me a legend. I also tried manually adding a legend as seen in other posts, but again since I am using two datasets it does not work.

This is the base code I use for this plot. The striped boxplots are supposed to show that larva are present whereas the empty boxplots show the availability of hostplant:

#DATASETS 
ControlPlots <- read_excel("ApolloLarva_EnvVari_PerGrid.xlsx", sheet = "ControlPlots_InHabitat")
LarvaDataAreas <- read_excel("ApolloLarva_M_CombinedAreas.xlsx", sheet = "OnlyLarvaPlots")

#Renaming Areas for x-axis 
ControlPlots$Area <- gsub("^.*Core", "Good Habitat", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Core_Area", "Good Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Ref", "Average Habitat", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Ref_Area", "Average Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2020", "Targeted Restoration", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Rest_Area_2020", "Targeted Restoration", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2021", "Non-targeted Restoration", ControlPlots$Area) 

#Plot 
a <- ggplot(ControlPlots, aes(x = Area, y = Hostplant)) + 
geom_boxplot(data= ControlPlots, colour = "black") +  
geom_boxplot_pattern(data= LarvaDataAreas,fill = "white", colour = "black",
pattern_density = 0.02, pattern_spacing = 0.01, 
pattern_fill = 'black', pattern_colour  = 'black', alpha = 0.8) + 
  theme_bw() +
  labs(x = "Areas",
       y = "Hostplant cover [%]", 
       title = "") +
  theme() 

What the plot currently looks like

If anyone has any tips, I would be really grateful! Thank you!!

UPDATE: Dataset:

dput(ControlPlots2)
structure(list(Area = c("Rest2020", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021"), GridID = c(13069, 11053, 11053, 11053, 11053, 11053, 
11053, 11053, 11053, 11053, 11053, 11053, 11053, 11053, 11052, 
11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052, 
11052, 11051, 11051, 11051, 11051, 11051, 11051, 11154, 11154, 
11154, 11154, 11154, 11154, 11154, 11154, 11154, 11154, 11153, 
11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153, 
11153, 11153, 11153, 11153, 11152, 11152, 11152, 11152, 11152, 
11152, 11152, 11152, 11152, 11152, 11255, 11255, 11255, 11255, 
11255, 11255, 11255, 11255, 11255, 11255, 11255, 11254, 11254, 
11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254, 
11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253, 
11253, 11253, 11253, 12161, 12161, 12161, 12161, 12161, 12161, 
12160, 12160, 12160, 12263, 12263, 12263, 12263, 12263, 12263, 
12263, 12262, 12262, 12262, 12262, 12262, 12262, 12262, 12262, 
12262, 12261, 12261, 12261, 12261, 12261, 12261, 12261, 12261, 
12466, 12466, 12466, 12466, 12466, 12466, 12466, 12567, 12567, 
12567, 12567, 12567, 12567, 12668, 12668, 12668, 12668, 12668, 
12668, 12668, 12668, 12667, 12667, 12667, 13272, 13272, 13272, 
13272, 13272, 13272, 13272, 13272, 13171, 13171, 13171, 13171, 
13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171, 
13171, 13171, 13171, 13171, 13171, 13171, 13170, 13170, 13170, 
13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170, 
13170, 13170, 13170, 13170, 13069, 13069, 13069, 13069, 13069, 
10867, 10867, 10867, 10867, 10766, 10766, 10766, 10766, 10766, 
10766, 10766, 10767, 10767, 10767, 10767, 10767, 10767, 10767, 
10767, 10767, 10768, 10768, 10768, 10768, 10768, 10768, 10768, 
10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666, 
10666, 10666, 10666, 10666, 10666, 10666, 10565, 10565, 10565, 
10565, 10565, 10565, 10566, 10566, 10566, 10566, 10566, 10566, 
10566, 10566, 10566, 10566, 10568, 10568, 10568, 10568, 10568, 
10568, 10568, 10568, 10568, 10568, 10668, 10668, 10668, 10668, 
10668, 10668, 10668, 10668, 10668, 10668, 10669, 10669, 10669, 
10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669, 
10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770, 
10770, 10770, 10770), Hostplant = c(0, 1, 0, 0, 5, 6, 12, 14, 
0, 5, 12, 13, 16, 3, 1, 0, 0, 14, 0, 2, 2, 10, 6, 0, 0, 0, 0, 
0, 0, 0, 1, 23, 2, 3, 0, 14, 5, 0, 0, 0, 0, 3, 1, 4, 0, 6, 6, 
2, 9, 3, 6, 7, 36, 16, 3, 2, 1, 0, 4, 16, 1, 0, 8, 0, 4, 0, 9, 
7, 0, 2, 8, 4, 0, 9, 2, 7, 16, 1, 0, 0, 5, 2, 5, 0, 4, 13, 4, 
4, 9, 0, 6, 1, 1, 1, 1, 1, 3, 22, 1, 0, 0, 1, 22, 0, 1, 0, 0, 
0, 2, 0, 3, 7, 0, 17, 0, 2, 4, 5, 9, 0, 3, 3, 0, 0, 1, 2, 4, 
0, 1, 6, 0, 6, 6, 6, 15, 2, 25, 0, 5, 21, 14, 0, 3, 2, 3, 0, 
12, 10, 2, 0, 13, 4, 0, 1, 11, 6, 0, 1, 0, 0, 3, 0, 0, 0, 0, 
0, 0, 3, 0, 0, 0, 0, 3, 18, 0, 0, 3, 0, 13, 0, 0, 0, 0, 0, 5, 
0, 5, 0, 5, 10, 7, 0, 3, 21, 4, 0, 3, 0, 0, 0, 0, 0, 3, 15, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), LarvaeCount = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -306L))

Larva Dataset: > dput(LarvaDataAreas2)
structure(list(Area = c("Rest_Area_2020", "Rest_Area_2020", "Rest_Area_2020", 
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", 
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", 
"Ref_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area"), FID = c(241, 243, 291, 
226, 162, 150, 151, 154, 156, 158, 161, 174, 179, 181, 210, 213, 
24, 1, 2, 3, 5, 7, 11, 12, 13, 14, 16, 17, 18, 19, 23, 29, 30, 
31, 36, 38, 41, 44, 64, 78, 108, 123, 135), Hostplant = c(5, 
9, 15, 17, 24, 4, 9, 6, 8, 12, 12, 11, 19, 18, 11, 12, 17, 5, 
10, 24, 8, 12, 11, 10, 5, 20, 0, 4, 24, 8, 4, 1, 5, 4, 4, 2, 
8, 8, 4, 9, 16, 5, 29)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -43L))

Solution

  • As suggested in the comments, easiest is to bind the data frames by using a source identifying column as an aesthetic. dplyr::bind_rows allows you to easily create an ID column "on the fly".

    library(tidyverse)
    library(ggpattern)
    
    ## bind data sets
    df <- bind_rows(test = LarvaDataAreas, control = ControlPlots, .id = "control")
    ggplot(df, aes(x = Area, y = Hostplant)) +  
      geom_boxplot_pattern(aes(pattern = control),
                           pattern_density = 0.02, pattern_spacing = 0.01,
                           pattern_colour  = 'black', alpha = 0.8) +
      scale_pattern_manual(NULL, values=c("none", "stripe"))