I am trying to make a box plot that shows not only the overall mean of the data per box (red dot) but also the means of the 9 individuals included in the data set for each behaviour I am looking at. So rather than plotting all the raw data as shown in below I want it to plot the average for each individual at each behaviour
This is the graph currently and I want approx 9 means per box + the overall mean shown in red.
library(ggplot2)
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
geom_boxplot(outlier.shape= NA) +
geom_point(aes(fill=Behaviour), size=2, position=position_jitter(width=0.2, height=0.1)) +
stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
theme_classic() + my_scale +
theme(axis.text.y=element_text(size=16, angle=0))+
ylim(-30, 30)
EDIT
I am now needing to put these in order of behaviours with specific colours. The code worked fine before adding the jitter however now it wont order them. See full code with @Mark fix.
my_colors <- c("#CCFFFF", "#000000", "#7F7F7F", "#336699", "#008080", "#00CCFF", "#264AE2")
names(my_colors) <- levels(factor(c((Seen2$Behaviour), levels(Seen2$Behaviour))))
my_scale <- scale_fill_manual(name="Behaviour", values=my_colors,)
behavssec$Behaviour <- factor(Seen2$Behaviour,
levels=c("Burst", "High energy swimming",
"Medium energy swimming",
"Low energy swimming", "Travel",
"Ascending", "Descending"))
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
geom_boxplot(outlier.shape= NA) +
geom_point(data=means, size=2, position=position_jitter(width=0.2, height=0.1)) +
stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
theme_classic() + my_scale +
theme( axis.text.y= element_text( size=16, angle =0)) +
ylim(-30, 30)
Seen2 <- structure(list(SharkID = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L), Behaviour = c("Low.energy.swimming",
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming",
"Low.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "Travel", "Travel", "Travel", "Travel",
"Travel", "Burst", "Burst", "Burst", "Burst", "Burst", "Ascending",
"Ascending", "Ascending", "Ascending", "Ascending", "Ascending",
"Ascending", "Descending", "Descending", "Descending", "Descending",
"Descending", "Descending", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst",
"Burst", "High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming",
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming",
"Low.energy.swimming", "Low.energy.swimming", "Low.energy.swimming",
"Low.energy.swimming", "Low.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "High.energy.swimming", "High.energy.swimming",
"High.energy.swimming", "Burst", "Burst", "Burst", "Burst", "Burst",
"Burst", "Burst", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming", "Medium.energy.swimming", "Medium.energy.swimming",
"Medium.energy.swimming"), Roll_Avg = c(3.97084, 3.90604, 3.90738,
3.80425, 3.4154, -0.993225, -0.940408, -0.55992, -0.791121, -1.83573,
-3.41667, -14.0837, -14.9381, -16.4732, -16.6994, -15.5318, -18.2402,
-19.4427, -22.8129, -27.009, -27.3907, 17.3778, 13.4861, 7.82564,
4.63057, 6.94956, 14.3372, 22.0873, -11.5397, -11.7741, -11.4795,
-10.7844, -10.5135, -11.0162, -90, -90, 11.0157, 6.13595, 2.2689,
-0.710414, -5.56132, -12.0987, -9.70231, -7.13388, -5.41693,
-4.23157, 2.11092, 2.19057, 1.5597, 0.637742, 1.17135, 3.41601,
4.71664, 4.61525, -0.813111, -4.45238, -7.43746, -9.11626, -9.94338,
-11.0361, -11.8852, -10.472, -5.12697, 2.61247, 9.80993, 17.307,
10.5466, -4.01104, -7.40708, -2.72602, -5.43834, -5.22419, -4.8472,
-4.43957, -1.67914, 2.39693, 7.84736, -9.7158, -8.70349, -8.22463,
-8.22878, -9.43265, -0.527293, -0.283262, -0.614311, -0.380123,
-0.344986, 7.73204, 7.47037, 7.00224, 7.01661, 7.38737, 7.83069,
-1.83138, -1.7847, -1.68084, -1.61196, -1.49905, -1.61391, -1.46356,
-0.986477, -0.806394, -0.883015, -0.840026, -0.727501, -1.15641,
-1.28692, -1.38961, -1.43838, -1.42089, -1.27225)), class = "data.frame", row.names = c(NA,
-111L))
What you can do is, create a separate dataframe for just the samples you want to plot, and then reference that in the geom_point()
call:
library(tidyverse)
set.seed(123)
number_of_samples <- 9
Seen2 <- Seen2 %>%
mutate(Behaviour = factor(Behaviour, levels = c("Burst","High.energy.swimming" , "Medium.energy.swimming", "Low.energy.swimming", "Travel", "Ascending", "Descending")))
# create sample dataframe
means <- Seen2 %>%
group_by(Behaviour, SharkID) %>%
summarise(Roll_Avg = mean(Roll_Avg)) %>% # get the mean of each group
group_by(Behaviour) %>%
sample_n(ifelse(n() < number_of_samples, n(), number_of_samples)) # some behaviours have less than 9 rows, so we take the max of 9 and the number of rows for each group
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour))+
geom_boxplot(outlier.shape= NA)+
geom_point(data = means, size = 2, position=position_jitter(width=0.2, height = 0.1))+ # you don't need to repeat the fill=Behaviour or y=Roll_Avg, since that is in ggplot() already
stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
theme_classic () +
theme( axis.text.y= element_text( size = 16, angle =0))+
ylim(-30,30)