I am quite new to R in general and currently struggling with a specific plot I want to create.
Preferably I would like to group my boxplots (2 per Area -> Good Habitat, average habitat etc.) and add a legend. Since I am using two datasets from two separate excel spreadsheets using the fill = Area command in the aesthetics does not help me group the plots or give me a legend. I also tried manually adding a legend as seen in other posts, but again since I am using two datasets it does not work.
This is the base code I use for this plot. The striped boxplots are supposed to show that larva are present whereas the empty boxplots show the availability of hostplant:
#DATASETS
ControlPlots <- read_excel("ApolloLarva_EnvVari_PerGrid.xlsx", sheet = "ControlPlots_InHabitat")
LarvaDataAreas <- read_excel("ApolloLarva_M_CombinedAreas.xlsx", sheet = "OnlyLarvaPlots")
#Renaming Areas for x-axis
ControlPlots$Area <- gsub("^.*Core", "Good Habitat", ControlPlots$Area)
LarvaDataAreas $Area <- gsub("^.*Core_Area", "Good Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Ref", "Average Habitat", ControlPlots$Area)
LarvaDataAreas $Area <- gsub("^.*Ref_Area", "Average Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2020", "Targeted Restoration", ControlPlots$Area)
LarvaDataAreas $Area <- gsub("^.*Rest_Area_2020", "Targeted Restoration", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2021", "Non-targeted Restoration", ControlPlots$Area)
#Plot
a <- ggplot(ControlPlots, aes(x = Area, y = Hostplant)) +
geom_boxplot(data= ControlPlots, colour = "black") +
geom_boxplot_pattern(data= LarvaDataAreas,fill = "white", colour = "black",
pattern_density = 0.02, pattern_spacing = 0.01,
pattern_fill = 'black', pattern_colour = 'black', alpha = 0.8) +
theme_bw() +
labs(x = "Areas",
y = "Hostplant cover [%]",
title = "") +
theme()
If anyone has any tips, I would be really grateful! Thank you!!
UPDATE: Dataset:
dput(ControlPlots2)
structure(list(Area = c("Rest2020", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core",
"Core", "Core", "Core", "Core", "Core", "Core", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref",
"Ref", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020",
"Rest2020", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021",
"Rest2021"), GridID = c(13069, 11053, 11053, 11053, 11053, 11053,
11053, 11053, 11053, 11053, 11053, 11053, 11053, 11053, 11052,
11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052,
11052, 11051, 11051, 11051, 11051, 11051, 11051, 11154, 11154,
11154, 11154, 11154, 11154, 11154, 11154, 11154, 11154, 11153,
11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153,
11153, 11153, 11153, 11153, 11152, 11152, 11152, 11152, 11152,
11152, 11152, 11152, 11152, 11152, 11255, 11255, 11255, 11255,
11255, 11255, 11255, 11255, 11255, 11255, 11255, 11254, 11254,
11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254,
11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253,
11253, 11253, 11253, 12161, 12161, 12161, 12161, 12161, 12161,
12160, 12160, 12160, 12263, 12263, 12263, 12263, 12263, 12263,
12263, 12262, 12262, 12262, 12262, 12262, 12262, 12262, 12262,
12262, 12261, 12261, 12261, 12261, 12261, 12261, 12261, 12261,
12466, 12466, 12466, 12466, 12466, 12466, 12466, 12567, 12567,
12567, 12567, 12567, 12567, 12668, 12668, 12668, 12668, 12668,
12668, 12668, 12668, 12667, 12667, 12667, 13272, 13272, 13272,
13272, 13272, 13272, 13272, 13272, 13171, 13171, 13171, 13171,
13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171,
13171, 13171, 13171, 13171, 13171, 13171, 13170, 13170, 13170,
13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170,
13170, 13170, 13170, 13170, 13069, 13069, 13069, 13069, 13069,
10867, 10867, 10867, 10867, 10766, 10766, 10766, 10766, 10766,
10766, 10766, 10767, 10767, 10767, 10767, 10767, 10767, 10767,
10767, 10767, 10768, 10768, 10768, 10768, 10768, 10768, 10768,
10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666,
10666, 10666, 10666, 10666, 10666, 10666, 10565, 10565, 10565,
10565, 10565, 10565, 10566, 10566, 10566, 10566, 10566, 10566,
10566, 10566, 10566, 10566, 10568, 10568, 10568, 10568, 10568,
10568, 10568, 10568, 10568, 10568, 10668, 10668, 10668, 10668,
10668, 10668, 10668, 10668, 10668, 10668, 10669, 10669, 10669,
10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669,
10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770,
10770, 10770, 10770), Hostplant = c(0, 1, 0, 0, 5, 6, 12, 14,
0, 5, 12, 13, 16, 3, 1, 0, 0, 14, 0, 2, 2, 10, 6, 0, 0, 0, 0,
0, 0, 0, 1, 23, 2, 3, 0, 14, 5, 0, 0, 0, 0, 3, 1, 4, 0, 6, 6,
2, 9, 3, 6, 7, 36, 16, 3, 2, 1, 0, 4, 16, 1, 0, 8, 0, 4, 0, 9,
7, 0, 2, 8, 4, 0, 9, 2, 7, 16, 1, 0, 0, 5, 2, 5, 0, 4, 13, 4,
4, 9, 0, 6, 1, 1, 1, 1, 1, 3, 22, 1, 0, 0, 1, 22, 0, 1, 0, 0,
0, 2, 0, 3, 7, 0, 17, 0, 2, 4, 5, 9, 0, 3, 3, 0, 0, 1, 2, 4,
0, 1, 6, 0, 6, 6, 6, 15, 2, 25, 0, 5, 21, 14, 0, 3, 2, 3, 0,
12, 10, 2, 0, 13, 4, 0, 1, 11, 6, 0, 1, 0, 0, 3, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0, 0, 3, 18, 0, 0, 3, 0, 13, 0, 0, 0, 0, 0, 5,
0, 5, 0, 5, 10, 7, 0, 3, 21, 4, 0, 3, 0, 0, 0, 0, 0, 3, 15, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), LarvaeCount = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -306L))
Larva Dataset: > dput(LarvaDataAreas2)
structure(list(Area = c("Rest_Area_2020", "Rest_Area_2020", "Rest_Area_2020",
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area",
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area",
"Ref_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area",
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area",
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area",
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area",
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area",
"Core_Area", "Core_Area", "Core_Area"), FID = c(241, 243, 291,
226, 162, 150, 151, 154, 156, 158, 161, 174, 179, 181, 210, 213,
24, 1, 2, 3, 5, 7, 11, 12, 13, 14, 16, 17, 18, 19, 23, 29, 30,
31, 36, 38, 41, 44, 64, 78, 108, 123, 135), Hostplant = c(5,
9, 15, 17, 24, 4, 9, 6, 8, 12, 12, 11, 19, 18, 11, 12, 17, 5,
10, 24, 8, 12, 11, 10, 5, 20, 0, 4, 24, 8, 4, 1, 5, 4, 4, 2,
8, 8, 4, 9, 16, 5, 29)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -43L))
As suggested in the comments, easiest is to bind the data frames by using a source identifying column as an aesthetic. dplyr::bind_rows
allows you to easily create an ID column "on the fly".
library(tidyverse)
library(ggpattern)
## bind data sets
df <- bind_rows(test = LarvaDataAreas, control = ControlPlots, .id = "control")
ggplot(df, aes(x = Area, y = Hostplant)) +
geom_boxplot_pattern(aes(pattern = control),
pattern_density = 0.02, pattern_spacing = 0.01,
pattern_colour = 'black', alpha = 0.8) +
scale_pattern_manual(NULL, values=c("none", "stripe"))