I'm trying to create a barplot showing the relative percentages in age categories within group (event location). I have one category that is not present in my observations (younger than 18 years old, namely category 1). I've tried it with "drop=FALSE", see: scale_x_discrete(breaks=c("1", "2","3","4","5","6"), labels=vec.age_cat, drop=FALSE).
Here is what I did so far. How can I display the first category with zero observations.
f.s_c_subset \<- data.frame(
study = c(rep(1,10),rep(2,5)),
age_cat = c(3,3,4,2,6,3,2,5,2,4,5,6,5,4,4)
)
table(df.s_c_subset)
vec.age_cat \<- c("\< 18 years", "18-23 years", "24-27 years", "28-35 years", "36-45 years", "\> 45 years")
ggplot(data=df.s_c_subset, aes(x=as.factor(age_cat), fill=as.factor(study)))+
geom_bar(aes(y=..count../tapply(..count.., ..fill.. ,sum)\[..fill..\]), position="dodge") +
scale_fill_manual(values=c("grey40","grey60"),name = "event location", labels = c("university", "outside")) +
geom_text(aes(y=..count../tapply(..count.., ..fill.. ,sum)\[..fill..\],
label=scales::percent(..count../tapply(..count.., ..fill.. ,sum)\[..fill..\], accuracy=1)),
stat="count", position=position_dodge(0.9), vjust=-0.5)+
ylab('percent of audience relative to location') +
xlab("age groups") +
theme(axis.text.x = element_text(angle = 45, hjust = .8)) +
theme(axis.ticks.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits=c(0,0.45)) +
scale_x_discrete(breaks=c("1", "2","3","4","5","6"), labels=vec.age_cat, drop=FALSE) +
theme(panel.border = element_rect(linetype = "solid", colour="black", linewidth=.5, fill = NA),
panel.grid.minor=element_line(colour="grey80", linewidth=.3),
panel.grid.major.y=element_line(colour="grey80", linewidth=.3),
panel.background=element_rect(fill="grey97")) +
theme(axis.title.x.bottom = element_text(margin = margin(t = .2, unit = "in")))
It would be great if you could share some ideas. Best, Hilke
As is drop=FALSE
will have no effect as your factor
does not include the missing age category. Also, setting the breaks
will have no effect either as only breaks will be displayed which fall inside the limits.
Instead, to fix your issue set the appropriate levels when converting to a factor
and I would also suggest to set the labels at this stage. Afterwards you should be fine with drop=FALSE
.
Additionally note that I dropped the ..
notation which is depcrecated and instead switched to after_stat
. Also, I dropped tapply
(which I also used in the past) and use ave()
instead.
df.s_c_subset <- data.frame(
study = c(rep(1, 10), rep(2, 5)),
age_cat = c(3, 3, 4, 2, 6, 3, 2, 5, 2, 4, 5, 6, 5, 4, 4)
)
vec.age_cat <- c("< 18 years", "18-23 years", "24-27 years", "28-35 years", "36-45 years", "> 45 years")
library(ggplot2)
ggplot(data = df.s_c_subset, aes(
x = factor(age_cat, levels = 1:6, labels = vec.age_cat),
fill = factor(study)
)) +
geom_bar(
aes(
y = after_stat(count / ave(count, fill, FUN = sum))
),
position = "dodge"
) +
scale_fill_manual(
values = c("grey40", "grey60"),
name = "event location",
labels = c("university", "outside")
) +
geom_text(
aes(
y = after_stat(count / ave(count, fill, FUN = sum)),
label = after_stat(scales::percent(count / ave(count, fill, FUN = sum), accuracy = 1))
),
stat = "count", position = position_dodge(0.9), vjust = -0.5
) +
ylab("percent of audience relative to location") +
xlab("age groups") +
theme(axis.text.x = element_text(angle = 45, hjust = .8)) +
theme(axis.ticks.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.45)) +
scale_x_discrete(drop = FALSE) +
theme(
panel.border = element_rect(linetype = "solid", colour = "black", linewidth = .5, fill = NA),
panel.grid.minor = element_line(colour = "grey80", linewidth = .3),
panel.grid.major.y = element_line(colour = "grey80", linewidth = .3),
panel.background = element_rect(fill = "grey97")
) +
theme(axis.title.x.bottom = element_text(margin = margin(t = .2, unit = "in")))