ggplot2dplyr

Dplyr complete is not filling empty combinations with zeros, but instead with 1s


I have an empty combination in my dataset that I want to fill with 0, so that when I use geom_bar I will have equal width bars and spacing.

Here is my data and code and the plot I create, except that it is creating false data.

Code and data (figure is below):

# Fill empty case (e.g., no small sized individuals from high urban sites)
my.data <- data_ordered %>% 
           arrange(factor(Urban.Intensity,levels=c("Low","Medium","High"))) %>% 
           complete(Urban.Intensity = unique(Urban.Intensity), 
                    Body.Size=unique(Body.Size), 
                    fill = list(n=0))

urban <- my.data %>%
         select(Body.Size, Urban.Intensity) %>%
         count() %>% 
         arrange(Urban.Intensity, desc(freq)); urban

ggplot(urban, aes(x=fct_inorder(Urban.Intensity), y=freq, fill=Body.Size, width=1)) +
      theme_bw() +
      geom_bar(stat = "identity", color="black", size=0.7, position='dodge') +
      theme(axis.text.x = element_text(size=12, face="bold"),
            axis.text.y = element_text(size=12, face="bold"),
            axis.title = element_text(size=13, face="bold"),
            legend.text = element_text(size=13),
            legend.title = element_text(size=13, face="bold")) +
      ylab("Frequency\n") + xlab("\nUrban Intensity") +
      guides(fill=guide_legend("Body Size")) +
      scale_y_continuous(expand=c(0,0), limits=c(0,125), breaks=seq(0,125,25)) +
      scale_fill_manual(values=col)


> dput(data_ordered)
structure(list(Urban.Intensity = structure(c(3L, 3L, 2L, 2L, 
2L, 2L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
1L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 
2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Low", "Medium", 
"High"), class = "factor"), Body.Size = structure(c(3L, 3L, 2L, 
2L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 
2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 
2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 
2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 
3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("Small", 
"Medium", "Large"), class = "factor")), row.names = c(NA, -510L
), class = "data.frame")

enter image description here

As you can see, in my data there should be 0 for the Small (body size) - High (urban intensity) combo but no matter what I do, complete isn't adding 0s and instead is filling that combination with a 1. So the figure is also erroneous because it is showing a value for that combination which should be empty or at least blank, but should keep the bar spacing and alignment with the x-axis labels.


Solution

  • By using complete(), you are forcing the creation of a row with Urban.intensity == High and Body.Size == Small. I think all you have to do is use the count() function with .drop = FALSE.

    data_ordered <- structure(list(Urban.Intensity = structure(c(3L, 3L, 2L, 2L, 
                                                                 2L, 2L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
                                                                 1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 
                                                                 1L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
                                                                 1L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
                                                                 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                                                                 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
                                                                 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 
                                                                 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 
                                                                 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                                                 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                                                 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
                                                                 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 
                                                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Low", "Medium","High"), class = "factor"), 
                                   Body.Size = structure(c(3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 
                                                           2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 
                                                           2L, 2L, 1L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 
                                                           2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 
                                                           2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 
                                                           1L, 1L, 1L, 2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L, 
                                                           2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 
                                                           2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 
                                                           2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                           1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 
                                                           2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 
                                                           3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 
                                                           3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
                                                           2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
                                                           3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
                                                           3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
                                                           1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                           2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                           3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
                                                           3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 
                                                           3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("Small", 
                                                                                                                   "Medium", "Large"), class = "factor")), row.names = c(NA, -510L
                                                                                                                   ), class = "data.frame")
    
    library(tidyverse)
    
    urban <- data_ordered |>  
      count(Urban.Intensity, Body.Size, .drop = FALSE)
    
    ggplot(urban, aes(x=Urban.Intensity, y=n, fill=Body.Size, width=1)) +
      theme_bw() +
      geom_bar(stat = "identity", color="black", size=0.7, position='dodge') +
      theme(axis.text.x = element_text(size=12, face="bold"),
            axis.text.y = element_text(size=12, face="bold"),
            axis.title = element_text(size=13, face="bold"),
            legend.text = element_text(size=13),
            legend.title = element_text(size=13, face="bold")) +
      ylab("Frequency\n") + xlab("\nUrban Intensity") +
      guides(fill=guide_legend("Body Size")) +
      scale_y_continuous(expand=c(0,0), limits=c(0,125), breaks=seq(0,125,25))
    #> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
    #> ℹ Please use `linewidth` instead.
    #> This warning is displayed once every 8 hours.
    #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
    #> generated.
    

    Bar plot

    Created on 2024-11-16 with reprex v2.1.1